### new approche to hand the preprocessing 

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

class DataPreprocessor:
    def __init__(self):
        self.encoders = {
            'Goal': LabelEncoder(),
            'Category': LabelEncoder(),
            'BodyPart': LabelEncoder(),
            'Level': LabelEncoder()
        }
        self.smote = SMOTE(random_state=42)

    def fit(self, X):
        # Fit the LabelEncoders
        for column, encoder in self.encoders.items():
            encoder.fit(X[column])

    def transform(self, X):
        # Apply encoding
        for column, encoder in self.encoders.items():
            X[column + '_encoded'] = encoder.transform(X[column])
        return X[[col + '_encoded' for col in self.encoders]]

    def fit_resample(self, X, y):
        # Fit and transform, then apply SMOTE
        X_encoded = self.transform(X)
        X_resampled, y_resampled = self.smote.fit_resample(X_encoded, y)
        return X_resampled, y_resampled

    def save(self, filename='preprocessor.pkl'):
        # Save the preprocessor object
        with open(filename, 'wb') as f:
            pickle.dump(self, f)

    @staticmethod
    def load(filename='preprocessor.pkl'):
        # Load a preprocessor object
        with open(filename, 'rb') as f:
            return pickle.load(f)


## Step 2: Use the Preprocessing Class in Training

In [4]:

from flask import Flask, request, jsonify
import joblib

# Load the data
data = pd.read_csv('workouts_subset.csv')

# Initialize and fit the preprocessor
preprocessor = DataPreprocessor()
preprocessor.fit(data)

# Encode and balance data
X, y = data.drop('Workout', axis=1), data['Workout']
X_resampled, y_resampled = preprocessor.fit_resample(X, y)

# Spliting the data
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Training the model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# Save model and preprocessor
joblib.dump(model, 'model_work.pkl')
preprocessor.save()


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Step 3:  Flask API to Use the Preprocessor

In [7]:
import traceback

In [16]:
from flask import Flask, request, jsonify
import pandas as pd
import joblib

# Load the model and preprocessor
model = joblib.load('model_work.pkl')
preprocessor = joblib.load('preprocessor.pkl')

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    try:
        json_data = request.get_json()
        df = pd.DataFrame([json_data])
        # Preprocess the data using the loaded preprocessor
        df_preprocessed = preprocessor.transform(df)
        # Predict using the loaded model
        prediction = model.predict(df_preprocessed)
        # Convert prediction to response, possibly decoding labels
        response = {'prediction': prediction.tolist()}
        return jsonify(response)
    except Exception as e:
        print("Error during prediction:", str(e))
        return jsonify({'error': str(e)})

if __name__ == '__main__':
    try:
        app.run(debug=False, use_reloader=False)
    except Exception as e:
        print("Caught an exception in Flask app:", e)
        traceback.print_exc()


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


In [19]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.


SyntaxError: invalid syntax (118272213.py, line 1)