In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import RidgeClassifierCV
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
# load dataset
df = pd.read_csv('csv_files/combined_data.csv')

In [3]:
test_df = pd.read_csv('csv_files/emb_test.csv')

In [4]:
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df

Unnamed: 0,USER,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,...,embedding_758,embedding_759,embedding_760,embedding_761,embedding_762,embedding_763,embedding_764,embedding_765,embedding_766,embedding_767
0,Subject1,6,5,6,6,6,6,5,6,6,...,0.100639,-0.182320,0.108933,-0.086876,0.102525,0.008190,-0.236708,-0.295113,0.385691,0.336576
1,Subject2,0,0,0,0,0,1,3,0,1,...,-0.030667,-0.153180,0.130563,-0.086958,0.084553,0.039385,-0.146900,-0.298797,0.358410,0.428483
2,Subject3,1,6,0,0,2,1,3,1,0,...,-0.063319,-0.261499,0.140101,-0.122861,0.124406,-0.017984,-0.158476,-0.172822,0.270584,0.426972
3,Subject4,2,1,4,6,1,6,4,5,4,...,0.043089,-0.207021,0.135036,-0.074184,0.155560,0.121889,-0.207565,-0.337076,0.370116,0.328862
4,Subject5,6,0,3,6,5,4,3,1,3,...,-0.044701,-0.218263,0.122511,-0.066349,0.100347,0.059433,-0.146092,-0.317965,0.378754,0.439828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,Subject70,0,0,0,0,0,0,0,0,0,...,-0.026926,-0.175980,0.062150,-0.097276,0.049301,0.054396,-0.162456,-0.193348,0.305240,0.486529
70,Subject71,6,0,6,6,0,4,0,1,6,...,-0.054233,-0.154628,0.055183,-0.080547,0.114158,0.052118,-0.208308,-0.298104,0.326223,0.433215
71,Subject72,0,0,0,0,0,0,1,1,1,...,0.084031,-0.131269,0.064756,-0.019835,0.126615,-0.004574,-0.134545,-0.273466,0.303105,0.397931
72,Subject73,3,1,2,3,3,1,3,2,4,...,-0.045603,-0.189933,0.058660,-0.076568,0.151434,0.043682,-0.155111,-0.256472,0.300448,0.380891


In [5]:
test_df = test_df.loc[:, ~test_df.columns.str.contains('^Unnamed')]
test_df

Unnamed: 0,USER,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_758,embedding_759,embedding_760,embedding_761,embedding_762,embedding_763,embedding_764,embedding_765,embedding_766,embedding_767
0,Subject1,-0.072027,0.117544,-0.024513,-0.362428,-0.537909,-0.433939,0.091954,0.537988,0.770397,...,-0.121276,-0.102062,0.232782,-0.286756,0.171106,0.321854,-0.225586,-0.273288,0.525084,0.492826
1,Subject1,-0.173974,0.194406,-0.208257,0.054306,-0.203133,-0.498073,0.240525,0.719508,0.122839,...,-0.135093,-0.344488,0.206339,0.077439,0.011982,0.288609,-0.190208,-0.127556,0.178047,0.497107
2,Subject1,-0.432588,-0.410271,-0.491965,-0.175570,-0.411142,-0.249240,0.393846,0.389656,0.208259,...,-0.244405,0.082435,0.137110,-0.047537,0.368410,0.279262,-0.004822,-0.201456,1.117711,0.285813
3,Subject1,-0.156224,-0.251372,0.015289,0.006797,-0.398336,0.035703,0.040379,0.283533,-0.103094,...,0.007751,-0.208741,0.214994,0.251831,0.502341,0.192172,-0.196032,0.023102,0.136884,0.463758
4,Subject1,-0.305068,-0.026600,0.030862,0.006466,-0.267850,-0.071505,0.352290,0.420227,-0.042371,...,0.310831,0.091647,0.160889,0.018344,0.187901,0.030385,0.136087,-0.376439,0.096566,0.565359
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7514,Subject18,0.125881,0.261106,-0.252636,-0.368598,-0.386131,-0.219428,0.211567,0.562050,0.264253,...,-0.338463,-0.307492,0.073405,-0.200391,0.000701,0.100048,-0.051348,-0.125357,0.164694,0.660239
7515,Subject18,0.172623,0.136061,-0.119321,0.035150,-0.284074,-0.361906,0.427074,0.597285,-0.162653,...,0.128674,-0.594550,0.247146,-0.028827,0.249608,0.074043,0.065057,-0.079368,0.208377,0.276980
7516,Subject18,0.082023,-0.160851,-0.193370,-0.021431,-0.547860,-0.349370,0.053164,0.462721,0.185631,...,-0.257056,-0.334448,-0.137110,0.044021,-0.106367,0.059120,-0.143163,-0.007731,0.429831,0.531334
7517,Subject18,0.244429,0.056481,-0.286747,-0.096453,-0.463076,-0.334498,0.207813,0.540986,0.136466,...,0.072153,-0.227923,0.101447,-0.068244,-0.053858,0.042077,-0.280002,-0.125584,0.176294,0.442782


In [6]:
# response and feature columns
feature_cols = [col for col in df.columns if col.startswith('embedding_')]
response_cols = [f'Q{i}' for i in range(1, 29) if f'Q{i}' in df.columns]

In [7]:
response_cols

['Q1',
 'Q2',
 'Q3',
 'Q4',
 'Q5',
 'Q6',
 'Q7',
 'Q8',
 'Q9',
 'Q10',
 'Q11',
 'Q12',
 'Q19',
 'Q20',
 'Q21',
 'Q22',
 'Q23',
 'Q24',
 'Q25',
 'Q26',
 'Q27',
 'Q28']

In [8]:
X = df[feature_cols]
Y = df[response_cols]
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

In [9]:
# grouping embeddings by user
test_df = test_df.groupby('USER').mean().reset_index()
test_df

Unnamed: 0,USER,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_758,embedding_759,embedding_760,embedding_761,embedding_762,embedding_763,embedding_764,embedding_765,embedding_766,embedding_767
0,Subject1,-0.026629,0.141997,-0.127126,-0.162734,-0.35386,-0.276038,0.366942,0.515109,0.198524,...,0.03945,-0.165443,0.148496,-0.072708,0.111795,0.103414,-0.162429,-0.265971,0.366762,0.421947
1,Subject10,-0.017783,0.133092,0.009919,-0.160283,-0.343908,-0.291799,0.364976,0.584861,0.041381,...,0.028836,-0.200854,0.133521,-0.050943,0.13753,-0.050716,-0.184917,-0.215312,0.198857,0.2531
2,Subject11,-0.030306,0.139682,0.12921,-0.031973,-0.181151,-0.290085,0.252133,0.37597,0.031028,...,0.037931,-0.157375,0.002266,-0.044821,0.031857,0.029006,-0.201016,-0.250361,0.212307,0.440712
3,Subject12,-0.1276,0.117198,-0.110783,-0.186623,-0.276635,-0.246499,0.281233,0.53477,0.102632,...,-0.049229,-0.194841,0.079158,-0.034087,0.080705,0.085071,-0.189822,-0.316781,0.373736,0.383071
4,Subject13,-0.031378,0.167198,-0.002994,-0.173719,-0.377853,-0.333249,0.351781,0.480293,0.160992,...,0.047793,-0.15642,0.122135,-0.070356,0.118598,0.024703,-0.209933,-0.288171,0.336555,0.341598
5,Subject14,-0.052167,0.054324,-0.14885,-0.142011,-0.33569,-0.221068,0.224389,0.510928,0.020726,...,-0.098069,-0.157468,0.109668,-0.100449,0.096954,-0.0049,-0.159379,-0.222649,0.340532,0.433292
6,Subject15,-0.039452,0.046812,-0.017494,-0.078385,-0.331904,-0.323551,0.372181,0.611517,0.079158,...,0.051897,-0.284822,0.152954,-0.06895,0.099132,0.007648,-0.165956,-0.248347,0.279479,0.310477
7,Subject16,-0.106793,0.05875,-0.075549,-0.229999,-0.299277,-0.146905,0.344192,0.492459,-0.015169,...,-0.008165,-0.229788,0.111585,-0.069599,0.188364,0.035425,-0.207815,-0.178656,0.321327,0.39004
8,Subject17,0.048389,0.019432,-0.053689,-0.094537,-0.3135,-0.305675,0.223033,0.546231,0.071783,...,-0.135882,-0.24444,0.100713,-0.120487,0.089209,0.023698,-0.154502,-0.200617,0.359204,0.419433
9,Subject18,-0.02209,0.036382,-0.039254,-0.101022,-0.364331,-0.304244,0.249528,0.497258,0.060148,...,-0.091185,-0.265826,0.096749,-0.063328,0.127796,-0.049289,-0.187898,-0.097981,0.245706,0.371556


In [10]:
import re
def get_num(subject):
    match = re.search(r'\d+', subject)
    return int(match.group()) if match else None

test_df['subject_number'] = test_df['USER'].apply(get_num)

test_df = test_df.sort_values('subject_number').reset_index(drop=True)

test_df = test_df.drop('subject_number', axis=1)

test_df

Unnamed: 0,USER,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_758,embedding_759,embedding_760,embedding_761,embedding_762,embedding_763,embedding_764,embedding_765,embedding_766,embedding_767
0,Subject1,-0.026629,0.141997,-0.127126,-0.162734,-0.35386,-0.276038,0.366942,0.515109,0.198524,...,0.03945,-0.165443,0.148496,-0.072708,0.111795,0.103414,-0.162429,-0.265971,0.366762,0.421947
1,Subject2,0.057415,0.060792,-0.006638,-0.217231,-0.346573,-0.296612,0.336695,0.62176,0.075066,...,-0.028857,-0.275607,0.115917,-0.081281,0.173209,0.012713,-0.218245,-0.226822,0.253924,0.382279
2,Subject3,-0.075177,0.091323,-0.114435,-0.130415,-0.282962,-0.22982,0.262876,0.524838,-0.012877,...,0.017065,-0.167342,0.081301,-0.044895,0.138002,-0.051495,-0.176809,-0.232322,0.304919,0.355751
3,Subject4,-0.248138,0.11408,-0.093014,-0.134036,-0.151076,-0.0988,0.392553,0.418402,-0.054941,...,0.221919,-0.264802,0.101415,-0.07226,0.070442,-0.114057,-0.12843,-0.208558,0.365414,0.498666
4,Subject5,-0.042907,0.030534,-0.100998,-0.186843,-0.307916,-0.208446,0.190411,0.558769,0.038363,...,-0.073373,-0.220964,0.121666,-0.117395,0.101498,-0.001176,-0.1648,-0.195089,0.311998,0.507076
5,Subject6,-0.165584,0.078739,-0.115799,-0.286452,-0.174327,-0.276113,0.356674,0.519034,0.109565,...,0.117925,-0.211147,0.062907,-0.150045,0.074687,-0.052214,-0.223175,-0.30761,0.277794,0.344039
6,Subject7,-0.110453,0.221378,-0.109618,-0.26568,-0.400976,-0.305116,0.421111,0.474304,0.229387,...,0.176837,-0.144561,0.137198,-0.087693,0.100759,0.152895,-0.144128,-0.340883,0.384446,0.377159
7,Subject8,-0.110303,0.195323,-0.011662,-0.176195,-0.328743,-0.253306,0.3687,0.474358,0.128649,...,0.082581,-0.156564,0.118931,-0.053544,0.106535,0.029654,-0.176979,-0.308765,0.367465,0.370291
8,Subject9,-0.074562,0.16899,-0.096313,-0.161565,-0.353927,-0.295757,0.384126,0.524514,0.166025,...,0.069846,-0.181443,0.141738,-0.105682,0.067223,0.066756,-0.169225,-0.338655,0.394977,0.391039
9,Subject10,-0.017783,0.133092,0.009919,-0.160283,-0.343908,-0.291799,0.364976,0.584861,0.041381,...,0.028836,-0.200854,0.133521,-0.050943,0.13753,-0.050716,-0.184917,-0.215312,0.198857,0.2531


In [11]:
test_features = [col for col in test_df.columns if col.startswith('embedding_')]
X_test = test_df[test_features]
X_test

Unnamed: 0,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,embedding_9,...,embedding_758,embedding_759,embedding_760,embedding_761,embedding_762,embedding_763,embedding_764,embedding_765,embedding_766,embedding_767
0,-0.026629,0.141997,-0.127126,-0.162734,-0.35386,-0.276038,0.366942,0.515109,0.198524,-0.324544,...,0.03945,-0.165443,0.148496,-0.072708,0.111795,0.103414,-0.162429,-0.265971,0.366762,0.421947
1,0.057415,0.060792,-0.006638,-0.217231,-0.346573,-0.296612,0.336695,0.62176,0.075066,-0.269215,...,-0.028857,-0.275607,0.115917,-0.081281,0.173209,0.012713,-0.218245,-0.226822,0.253924,0.382279
2,-0.075177,0.091323,-0.114435,-0.130415,-0.282962,-0.22982,0.262876,0.524838,-0.012877,-0.173276,...,0.017065,-0.167342,0.081301,-0.044895,0.138002,-0.051495,-0.176809,-0.232322,0.304919,0.355751
3,-0.248138,0.11408,-0.093014,-0.134036,-0.151076,-0.0988,0.392553,0.418402,-0.054941,-0.195162,...,0.221919,-0.264802,0.101415,-0.07226,0.070442,-0.114057,-0.12843,-0.208558,0.365414,0.498666
4,-0.042907,0.030534,-0.100998,-0.186843,-0.307916,-0.208446,0.190411,0.558769,0.038363,-0.219994,...,-0.073373,-0.220964,0.121666,-0.117395,0.101498,-0.001176,-0.1648,-0.195089,0.311998,0.507076
5,-0.165584,0.078739,-0.115799,-0.286452,-0.174327,-0.276113,0.356674,0.519034,0.109565,-0.323124,...,0.117925,-0.211147,0.062907,-0.150045,0.074687,-0.052214,-0.223175,-0.30761,0.277794,0.344039
6,-0.110453,0.221378,-0.109618,-0.26568,-0.400976,-0.305116,0.421111,0.474304,0.229387,-0.390365,...,0.176837,-0.144561,0.137198,-0.087693,0.100759,0.152895,-0.144128,-0.340883,0.384446,0.377159
7,-0.110303,0.195323,-0.011662,-0.176195,-0.328743,-0.253306,0.3687,0.474358,0.128649,-0.257086,...,0.082581,-0.156564,0.118931,-0.053544,0.106535,0.029654,-0.176979,-0.308765,0.367465,0.370291
8,-0.074562,0.16899,-0.096313,-0.161565,-0.353927,-0.295757,0.384126,0.524514,0.166025,-0.250446,...,0.069846,-0.181443,0.141738,-0.105682,0.067223,0.066756,-0.169225,-0.338655,0.394977,0.391039
9,-0.017783,0.133092,0.009919,-0.160283,-0.343908,-0.291799,0.364976,0.584861,0.041381,-0.198571,...,0.028836,-0.200854,0.133521,-0.050943,0.13753,-0.050716,-0.184917,-0.215312,0.198857,0.2531


In [12]:
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [13]:
model_predictions = {}

models = {
    "RandomForest": RandomForestClassifier(),
    "ExtraTrees": ExtraTreesClassifier(),
    "XGBoost": XGBClassifier(),
    "Ridge": RidgeClassifierCV(),
    "SVM": SVC(decision_function_shape='ovo')
}

In [14]:
import numpy as np

In [15]:
for model_name, model in models.items():
    # Some models require a wrapper to support multi-output
    if model_name in ["Ridge", "SVM"]:
        model = MultiOutputClassifier(model)
    elif model_name == "XGBoost":
        model = MultiOutputClassifier(XGBClassifier(objective='multi:softprob', num_class=7))
    print(f"Training {model_name} model...")
    model.fit(X, Y)
    print(f"Predicting with {model_name} model...")
    Y_pred = model.predict(X_test)
    model_predictions[model_name] = Y_pred
    print(f"{model_name} predictions complete.")

Training RandomForest model...
Predicting with RandomForest model...
RandomForest predictions complete.
Training ExtraTrees model...
Predicting with ExtraTrees model...
ExtraTrees predictions complete.
Training XGBoost model...


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  

Predicting with XGBoost model...


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_sparse(dtype):
  

XGBoost predictions complete.
Training Ridge model...
Predicting with Ridge model...
Ridge predictions complete.
Training SVM model...
Predicting with SVM model...
SVM predictions complete.


In [16]:
out = []
for model_name, predictions in model_predictions.items():
    print(f"\nPredictions by {model_name} model:")
    print(predictions)
    if model_name == 'ExtraTrees':
        out = predictions


Predictions by RandomForest model:
[[6 0 3 6 0 6 6 1 6 6 6 6 0 0 0 5 6 6 6 5 4 5]
 [6 0 0 0 0 6 0 6 6 6 6 6 0 1 0 6 6 0 6 6 4 6]
 [6 0 6 6 6 6 6 0 6 6 6 6 0 1 0 6 6 0 6 6 6 6]
 [0 0 0 0 0 6 0 0 0 6 6 6 0 0 0 1 6 0 0 6 0 0]
 [6 0 0 0 0 6 0 0 0 6 0 2 0 1 0 2 1 0 2 1 2 1]
 [6 0 6 6 0 6 0 0 6 6 6 6 0 1 0 6 6 0 6 6 6 6]
 [6 1 6 6 6 6 6 6 6 6 6 6 3 6 2 6 6 6 6 6 6 6]
 [6 1 6 6 6 6 6 6 6 6 6 6 1 6 2 6 6 6 6 6 6 6]
 [6 1 6 6 6 6 6 0 6 6 6 6 0 6 2 6 6 0 6 6 6 6]
 [6 1 6 6 6 6 6 6 6 6 6 6 1 6 0 6 6 0 6 6 6 6]
 [0 0 0 0 0 6 0 0 0 0 6 0 0 0 0 1 2 0 0 0 2 6]
 [6 0 0 6 0 6 3 0 6 6 0 0 0 0 0 0 1 0 0 1 2 0]
 [6 1 6 6 6 6 6 6 6 6 6 6 1 6 3 6 6 6 6 6 6 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 1 0 1 1 0 0 6 4 6]
 [6 0 0 6 6 6 0 6 6 6 6 6 0 1 0 6 6 0 6 6 6 6]
 [5 0 6 6 6 6 6 6 6 6 6 6 1 1 0 6 5 0 6 6 5 6]
 [0 0 0 0 0 6 0 0 0 1 6 0 0 1 0 0 2 0 4 6 2 6]
 [6 0 0 0 0 6 0 0 0 6 6 6 0 1 0 3 3 0 6 4 2 6]]

Predictions by ExtraTrees model:
[[4 3 3 6 4 6 6 6 6 6 3 6 4 5 5 6 6 6 6 5 4 5]
 [6 1 1 6 6 6 3 1 6 6 6 6 0 1 0 6 6 

In [17]:
out

array([[4, 3, 3, 6, 4, 6, 6, 6, 6, 6, 3, 6, 4, 5, 5, 6, 6, 6, 6, 5, 4, 5],
       [6, 1, 1, 6, 6, 6, 3, 1, 6, 6, 6, 6, 0, 1, 0, 6, 6, 6, 6, 6, 4, 6],
       [6, 0, 0, 6, 0, 6, 6, 0, 6, 6, 6, 6, 1, 1, 0, 6, 6, 0, 6, 6, 6, 6],
       [6, 0, 6, 0, 0, 6, 0, 0, 0, 6, 6, 6, 0, 0, 0, 6, 5, 0, 6, 3, 4, 6],
       [6, 0, 0, 0, 0, 6, 0, 0, 1, 6, 6, 6, 0, 1, 0, 2, 2, 0, 2, 6, 2, 6],
       [6, 0, 0, 6, 0, 6, 0, 0, 6, 6, 6, 6, 0, 0, 0, 6, 6, 0, 6, 6, 6, 6],
       [6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 2, 0, 6, 6, 6, 6, 6, 6, 6],
       [6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 6, 2, 5, 6, 6, 6, 6, 6, 6],
       [6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 6, 2, 6, 6, 6, 6, 6, 6, 6],
       [6, 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 6, 0, 6, 6, 0, 6, 6, 6, 6],
       [6, 0, 0, 6, 0, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 6, 6, 0, 6, 6, 6, 6],
       [6, 0, 3, 6, 0, 6, 3, 0, 6, 6, 6, 0, 0, 0, 0, 0, 6, 0, 6, 6, 4, 0],
       [6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 6, 6, 6, 6, 0, 6, 6, 6, 6],
       [6, 0, 0, 0, 0, 1,

In [18]:
out_df = pd.DataFrame(out)
out_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,4,3,3,6,4,6,6,6,6,6,...,4,5,5,6,6,6,6,5,4,5
1,6,1,1,6,6,6,3,1,6,6,...,0,1,0,6,6,6,6,6,4,6
2,6,0,0,6,0,6,6,0,6,6,...,1,1,0,6,6,0,6,6,6,6
3,6,0,6,0,0,6,0,0,0,6,...,0,0,0,6,5,0,6,3,4,6
4,6,0,0,0,0,6,0,0,1,6,...,0,1,0,2,2,0,2,6,2,6
5,6,0,0,6,0,6,0,0,6,6,...,0,0,0,6,6,0,6,6,6,6
6,6,1,6,6,6,6,6,6,6,6,...,3,2,0,6,6,6,6,6,6,6
7,6,1,6,6,6,6,6,6,6,6,...,1,6,2,5,6,6,6,6,6,6
8,6,1,6,6,6,6,6,6,6,6,...,0,6,2,6,6,6,6,6,6,6
9,6,0,6,6,6,6,6,6,6,6,...,1,6,0,6,6,0,6,6,6,6


In [19]:
users = test_df['USER']
users

0      Subject1
1      Subject2
2      Subject3
3      Subject4
4      Subject5
5      Subject6
6      Subject7
7      Subject8
8      Subject9
9     Subject10
10    Subject11
11    Subject12
12    Subject13
13    Subject14
14    Subject15
15    Subject16
16    Subject17
17    Subject18
Name: USER, dtype: object

In [20]:
out_df.insert(0, 'USER', users)

In [21]:
out_df

Unnamed: 0,USER,0,1,2,3,4,5,6,7,8,...,12,13,14,15,16,17,18,19,20,21
0,Subject1,4,3,3,6,4,6,6,6,6,...,4,5,5,6,6,6,6,5,4,5
1,Subject2,6,1,1,6,6,6,3,1,6,...,0,1,0,6,6,6,6,6,4,6
2,Subject3,6,0,0,6,0,6,6,0,6,...,1,1,0,6,6,0,6,6,6,6
3,Subject4,6,0,6,0,0,6,0,0,0,...,0,0,0,6,5,0,6,3,4,6
4,Subject5,6,0,0,0,0,6,0,0,1,...,0,1,0,2,2,0,2,6,2,6
5,Subject6,6,0,0,6,0,6,0,0,6,...,0,0,0,6,6,0,6,6,6,6
6,Subject7,6,1,6,6,6,6,6,6,6,...,3,2,0,6,6,6,6,6,6,6
7,Subject8,6,1,6,6,6,6,6,6,6,...,1,6,2,5,6,6,6,6,6,6
8,Subject9,6,1,6,6,6,6,6,6,6,...,0,6,2,6,6,6,6,6,6,6
9,Subject10,6,0,6,6,6,6,6,6,6,...,1,6,0,6,6,0,6,6,6,6


In [22]:
fixed_cols = [
    'USER', 'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 
    'Q10', 'Q11', 'Q12', 'Q19', 'Q20', 'Q21', 'Q22', 'Q23', 'Q24', 
    'Q25', 'Q26', 'Q27', 'Q28'
]

out_df.columns = fixed_cols

In [23]:
out_df.to_csv('output.csv')

In [24]:
out_df.to_csv('output.txt', header=None, index=None, sep=' ', mode='a')