In [41]:
import joblib
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import warnings 
warnings.filterwarnings('ignore',category=FutureWarning)
warnings.filterwarnings('ignore',category=DeprecationWarning)
from sklearn.multioutput import MultiOutputClassifier

In [42]:
target_variables = ['O_Cor', 'I_Cor', 'P_Co', 'Top_Cat', 'Low_Cat']

train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')[target_variables]

In [43]:
def print_result(results):
    # Display the best parameters found during grid search
    print('BEST PARAMETERS: {}\n'.format(results.best_params_))

    # Extract mean test scores, standard deviations, and corresponding parameter sets
    mean_scores = results.cv_results_['mean_test_score']
    std_scores = results.cv_results_['std_test_score']
    parameters = results.cv_results_['params']

    # Display mean test scores, along with their standard deviations, for each parameter set
    for mean, std, params in zip(mean_scores, std_scores, parameters):
        print('{} (+/- {}) for {}'.format(round(mean, 3), round(std * 2, 3), params))


In [44]:

print(train_features.shape[0])
print(train_labels.shape[0])


60
60


In [64]:
# Create a logistic regression model
lr = LogisticRegression(max_iter=1000)

# Use the MultiOutputClassifier for multi-output classification
multi_output_classifier = MultiOutputClassifier(lr)

# Set up the parameter grid for grid search
parameters = {
    'estimator__C': [ 0.001,0.01, 0.1, 1, 10, 100, 1000]
}

# Perform grid search
cv = GridSearchCV(multi_output_classifier, parameters, cv=3)
cv.fit(train_features, train_labels)

# Display the results
print_result(cv)


BEST PARAMETERS: {'estimator__C': 10}

0.0 (+/- 0.0) for {'estimator__C': 0.001}
0.017 (+/- 0.047) for {'estimator__C': 0.01}
0.117 (+/- 0.125) for {'estimator__C': 0.1}
0.217 (+/- 0.17) for {'estimator__C': 1}
0.233 (+/- 0.262) for {'estimator__C': 10}
0.2 (+/- 0.216) for {'estimator__C': 100}
0.233 (+/- 0.189) for {'estimator__C': 1000}


In [65]:
print(train_features.head())
print(train_labels.head())


    Q1   Q2   Q3   Q4   Q5   Q6   Q7   Q8   Q9  Q10  Q11  Q12  Q13  Q14  Q15  \
0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  1.0  1.0   
1  0.0  1.0  1.0  0.0  0.0  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  1.0  1.0   
2  1.0  1.0  0.0  0.0  1.0  0.0  1.0  1.0  0.0  1.0  1.0  1.0  1.0  1.0  0.0   
3  1.0  1.0  1.0  1.0  1.0  1.0  0.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  0.0   
4  1.0  1.0  0.0  0.0  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  1.0  1.0  0.0   

   T_Cor  
0    3.0  
1    6.0  
2   10.0  
3   13.0  
4    6.0  
   O_Cor  I_Cor  P_Co  Top_Cat  Low_Cat
0    1.0    1.0   1.0      7.0      7.0
1    0.0    3.0   3.0      6.0      1.0
2    4.0    1.0   5.0      2.0      3.0
3    4.0    4.0   5.0      2.0      5.0
4    3.0    0.0   3.0      4.0      3.0


In [66]:
cv.best_estimator_

In [67]:
joblib.dump(cv.best_estimator_,'Multi_output_LR_model.pkl')

['Multi_output_LR_model.pkl']