In [3]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score,precision_score,recall_score
from time import time
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import warnings
# Ignore FutureWarning and DeprecationWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)


vali_features = pd.read_csv('val_features.csv')
vali_labels = pd.read_csv('val_labels.csv')

test_features = pd.read_csv('test_features.csv')
test_labels = pd.read_csv('test_labels.csv')

In [3]:
#Bring the build models
models={}

for mdl in ['Multi_output_LR','Multi_output_SVM','Multi_output_RF','Multi_output_KNN']:
    models[mdl] = joblib.load('{}_model.pkl'.format(mdl))

models

{'Multi_output_LR': MultiOutputClassifier(estimator=LogisticRegression(C=10, max_iter=1000)),
 'Multi_output_SVM': MultiOutputClassifier(estimator=SVC(C=10, kernel='linear')),
 'Multi_output_RF': MultiOutputClassifier(estimator=RandomForestClassifier(max_depth=4,
                                                        n_estimators=50)),
 'Multi_output_KNN': MultiOutputClassifier(estimator=KNeighborsClassifier(n_neighbors=3, p=1,
                                                      weights='distance'))}

In [4]:

def model_evaluation(name, model, features, labels):
    start = time()
    predict = model.predict(features)
    end = time()
    

# Calculate accuracy separately for each label
    accuracy = [round(accuracy_score(labels[label], predict[:, idx]), 3) for idx, label in enumerate(labels.columns)]
    
    # Calculate precision and recall separately for each label
    precision = [round(precision_score(labels[label], predict[:, idx], average='weighted',zero_division=1), 3) for idx, label in enumerate(labels.columns)]
    recall = [round(recall_score(labels[label], predict[:, idx], average='weighted',zero_division=1), 3) for idx, label in enumerate(labels.columns)]

    print('{} -- Accuracy: {}/Precision: {}/Recall: {}/Latency: {}ms'.format(
        name, accuracy, precision, recall, round((end - start) * 1000, 1)))

# Iterate through models
for name, mdl in models.items():
    model_evaluation(name, mdl, vali_features, vali_labels)
    

Multi_output_LR -- Accuracy: [0.4, 0.45, 0.4, 0.65, 0.7]/Precision: [0.562, 0.55, 0.458, 0.695, 0.862]/Recall: [0.4, 0.45, 0.4, 0.65, 0.7]/Latency: 9.3ms
Multi_output_SVM -- Accuracy: [0.6, 0.55, 0.65, 0.6, 0.65]/Precision: [0.745, 0.636, 0.662, 0.66, 0.796]/Recall: [0.6, 0.55, 0.65, 0.6, 0.65]/Latency: 8.0ms
Multi_output_RF -- Accuracy: [0.5, 0.45, 0.65, 0.55, 0.45]/Precision: [0.753, 0.492, 0.752, 0.664, 0.542]/Recall: [0.5, 0.45, 0.65, 0.55, 0.45]/Latency: 39.6ms
Multi_output_KNN -- Accuracy: [0.6, 0.6, 0.55, 0.55, 0.6]/Precision: [0.808, 0.729, 0.598, 0.6, 0.738]/Recall: [0.6, 0.6, 0.55, 0.55, 0.6]/Latency: 246.5ms


In [5]:
#Evaluating best model on test set

model_evaluation('support vector machines',models['Multi_output_SVM'],test_features,test_labels)

support vector machines -- Accuracy: [0.65, 0.6, 0.7, 0.55, 0.45]/Precision: [0.7, 0.839, 0.752, 0.733, 0.614]/Recall: [0.65, 0.6, 0.7, 0.55, 0.45]/Latency: 9.3ms


In [4]:
# Q1-Q15 data is stored in a DataFrame called 'new_data'
new_data = pd.DataFrame({
    'Q1': [1],  
    'Q2': [1],
    'Q3': [1],  
    'Q4': [1],
    'Q5': [1],  
    'Q6': [1], 
    'Q7': [1],  
    'Q8': [1],
    'Q9': [1],  
    'Q10': [1],
    'Q11': [1],  
    'Q12': [1], 
    'Q13': [1],
    'Q14': [1],  
    'Q15': [1], 
    'T_Cor':[15],
})

In [5]:
new_data

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15,T_Cor
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,15


In [9]:
selected_model = models['Multi_output_SVM'] 
predicted_labels = selected_model.predict(new_data)

In [10]:
# Display the predicted labels
print("Predicted Labels:")
print(predicted_labels)

Predicted Labels:
[[5. 5. 5. 7. 8.]]
