In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
# load dataset
df = pd.read_csv('csv_files/merged_pca.csv')

# response and feature columns
feature_cols = [col for col in df.columns if col.startswith('pca_component_')]
response_cols = [f'Q{i}' for i in range(1, 23) if f'Q{i}' in df.columns]

In [3]:
X = df[feature_cols]
Y = df[response_cols]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [5]:
model_predictions = {}

models = {
    "RandomForest": RandomForestClassifier(),
    "ExtraTrees": ExtraTreesClassifier(),
    "XGBoost": XGBClassifier(),
    "Ridge": RidgeClassifierCV(),
    "SVM": SVC(decision_function_shape='ovo')
}

In [6]:
import numpy as np

In [7]:
for model_name, model in models.items():
    # Some models require a wrapper to support multi-output
    if model_name in ["Ridge", "SVM"]:
        model = MultiOutputClassifier(model)
    elif model_name == "XGBoost":
        model = MultiOutputClassifier(XGBClassifier(objective='multi:softprob', num_class=7))
    print(f"Training {model_name} model...")
    model.fit(X_train, Y_train)
    print(f"Predicting with {model_name} model...")
    Y_pred = model.predict(X_test)
    model_predictions[model_name] = Y_pred
    print(f"{model_name} predictions complete.")

Training RandomForest model...
Predicting with RandomForest model...
RandomForest predictions complete.
Training ExtraTrees model...
Predicting with ExtraTrees model...
ExtraTrees predictions complete.
Training XGBoost model...


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  

Predicting with XGBoost model...
XGBoost predictions complete.
Training Ridge model...
Predicting with Ridge model...
Ridge predictions complete.
Training SVM model...
Predicting with SVM model...
SVM predictions complete.


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  

In [8]:
for model_name, predictions in model_predictions.items():
    print(f"\nPredictions by {model_name} model:")
    print(predictions)


Predictions by RandomForest model:
[[0 0 0 0 0 6 0 0 0 6 6 6 0 0 0 6]
 [0 0 0 0 0 6 0 0 0 6 1 6 0 0 0 1]
 [6 0 4 0 0 6 0 0 6 6 6 6 0 0 0 6]
 [6 1 6 6 0 6 3 0 6 6 6 6 0 1 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 0 0 0]
 [0 0 0 0 0 6 0 0 6 6 6 6 0 0 0 2]
 [5 0 6 0 0 6 0 0 0 6 6 6 0 1 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 0 0 3]
 [0 0 0 0 0 6 0 0 0 6 6 6 0 0 0 6]
 [6 0 6 6 6 6 6 0 6 6 6 6 0 0 2 6]
 [6 1 0 0 0 6 3 0 6 6 6 6 0 1 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 0 0 6]
 [6 1 6 6 0 6 0 0 6 6 6 6 0 0 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 0 0 0]
 [6 0 0 6 0 6 0 0 6 6 6 6 0 1 0 6]]

Predictions by ExtraTrees model:
[[6 0 6 0 0 6 3 0 6 6 6 6 0 1 0 6]
 [0 0 0 0 0 6 0 0 0 6 6 6 0 0 0 2]
 [6 0 6 6 0 6 0 0 6 6 6 6 0 1 0 6]
 [6 1 0 6 0 6 6 0 6 6 6 6 0 0 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 0 0 0]
 [0 0 0 0 0 6 0 0 0 6 6 6 0 0 0 6]
 [6 1 0 0 0 6 0 0 6 6 6 6 0 1 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 1 0 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 0 0 6]
 [6 0 6 6 6 6 6 0 6 6 6 6 0 1 0 6]
 [6 0 0 0 0 6 0 0 6 6 6 6 0 0 0 6]
 [6 0 0 0 0 6 0 0 0

In [9]:
Y_test

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q19,Q20,Q21,Q22
4,6,0,3,6,5,4,3,1,3,0,1,0,5,2,1,0
63,0,0,3,0,0,1,0,0,0,0,0,0,0,1,0,1
18,1,1,0,0,3,1,0,1,0,1,2,2,1,0,0,3
0,6,5,6,6,6,6,5,6,6,6,6,6,5,6,6,6
28,2,0,0,0,0,3,1,0,6,0,0,1,1,0,0,1
73,5,1,5,6,6,1,3,3,0,1,0,6,0,1,0,1
10,2,1,1,1,1,2,0,2,1,1,6,2,0,1,0,3
34,6,5,5,5,6,6,4,4,6,6,4,4,1,1,1,6
12,5,2,1,2,5,6,2,1,4,6,3,3,4,6,1,5
55,5,5,5,1,6,6,6,6,2,1,6,6,1,6,3,6


In [10]:
from sklearn.metrics import accuracy_score

Y_test_array = Y_test.values

predictions = model_predictions['ExtraTrees']

accuracies = []
for i in range(Y_test_array.shape[1]): 
    
    acc = accuracy_score(Y_test_array[:, i], predictions[:, i])
    accuracies.append(acc)
    print(f"Accuracy for Q{i+1}: {acc:.4f}")

overall_accuracy = sum(accuracies) / len(accuracies)
print(f"Overall accuracy: {overall_accuracy:.4f}")

Accuracy for Q1: 0.3333
Accuracy for Q2: 0.4000
Accuracy for Q3: 0.2000
Accuracy for Q4: 0.4667
Accuracy for Q5: 0.3333
Accuracy for Q6: 0.4667
Accuracy for Q7: 0.5333
Accuracy for Q8: 0.2667
Accuracy for Q9: 0.3333
Accuracy for Q10: 0.2667
Accuracy for Q11: 0.2667
Accuracy for Q12: 0.2000
Accuracy for Q13: 0.4000
Accuracy for Q14: 0.2667
Accuracy for Q15: 0.5333
Accuracy for Q16: 0.3333
Overall accuracy: 0.3500
