# Data cleaning

In [133]:
import pandas as pd
import numpy as np
import random
import json
from tensorflow.keras.models import Model, load_model
# import pycaret

# Replace double quotes with single quotes for moods JSON arrays
f = open('song_data.csv','r+')
text = f.read()
text = text.replace('"["', '"[?').replace('", "', '?,?').replace('"]"', '?]"') # Changed to "[$Happy$,$Sad$]" for easy replacing later
f.seek(0)
f.write(text)
f.close()

# Importing data
df = pd.read_csv('song_data.csv', index_col=0)
df.drop('uuid', axis=1, inplace=True)
df.dropna(inplace=True) # drop rows with nan values
for col in df.columns:
    if col not in ['id','isSkipped']:
        if col == 'moods':
            df[col] = df[col].apply(lambda x:x.replace('?','"')) # Replaces mood values back to the form ["Happy","Sad"] so it can be loaded by json
        df[col] = df[col].apply(json.loads)
df['activity'] = df.apply(lambda _: '', axis=1) # empty activity column
print('Number of samples: ', df.shape[0])
df.head()

Number of samples:  550


Unnamed: 0_level_0,gyroX,gyroY,gyroZ,accelX,accelY,accelZ,optical,temp,humidity,moods,isSkipped,activity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,"[499.30572509765625, 499.53460693359375, 0.198...","[1.861572265625, 2.49481201171875, 1.022338867...","[1.24359130859375, 1.2359619140625, 1.06048583...","[1.1943359375, 1.201171875, 1.1845703125, 1.18...","[1.1455078125, 1.1591796875, 1.1630859375, 1.1...","[3.68359375, 3.654296875, 3.6748046875, 3.6650...","[139.64, 138.36, 139.64, 140.28]","[30.50567626953125, 30.50567626953125, 30.5056...","[71.3134765625, 71.3134765625, 71.3134765625]","[Atmospheric, Melancholic]",0,
2,"[498.1765747070313, 0.98419189453125, 1.579284...","[32.27996826171875, 14.7247314453125, 9.864807...","[497.9248046875, 496.368408203125, 494.9645996...","[2.9326171875, 2.9345703125, 2.728515625, 2.64...","[0.8466796875, 0.74609375, 0.865234375, 15.524...","[2.7548828125, 2.8037109375, 2.806640625, 3.34...","[123.24, 123.24, 139.32, 228.64]","[31.69403076171875, 31.69403076171875, 31.6940...","[67.05322265625, 67.05322265625, 67.0532226562...",[Melancholic],0,
3,"[15.76995849609375, 10.65826416015625, 6.87408...","[488.36517333984375, 486.5798950195313, 496.92...","[3.86810302734375, 5.0811767578125, 498.947143...","[14.0107421875, 14.3212890625, 14.232421875, 1...","[14.96484375, 15.212890625, 15.275390625, 15.1...","[3.2021484375, 3.3291015625, 3.375, 3.35644531...","[256.08, 307.84000000000003, 315.2, 301.36, 30...","[32.21771240234375, 32.21771240234375, 32.2177...","[65.850830078125, 65.850830078125, 65.85083007...","[Passionate, Melancholic]",0,
4,"[499.93896484375, 499.45068359375, 499.7482299...","[1.82342529296875, 2.74658203125, 1.8844604492...","[1.57928466796875, 1.434326171875, 1.365661621...","[1.7353515625, 1.708984375, 1.7333984375, 1.71...","[13.7841796875, 13.80078125, 13.7744140625, 13...","[2.8232421875, 2.8369140625, 2.8154296875, 2.8...","[127.08, 126.76, 125.48, 124.52]","[32.42919921875, 32.42919921875, 32.4291992187...","[64.6728515625, 64.6728515625, 64.6728515625, ...",[Elegant],1,
5,"[499.9465942382813, 0.03814697265625, 499.7482...","[2.01416015625, 1.77001953125, 1.7852783203125...","[1.1444091796875, 1.2359619140625, 1.129150390...","[0.4580078125, 0.4609375, 0.453125, 0.43847656...","[13.181640625, 13.1689453125, 13.1787109375, 1...","[2.6806640625, 2.693359375, 2.6875, 2.68652343...","[145.76, 144.48, 146.4, 144.8]","[32.42919921875, 32.42919921875, 32.4291992187...","[64.6728515625, 64.6728515625, 64.6728515625, ...","[Passionate, Melancholic]",1,


In [134]:
data = df.iloc[0,:]
display(data)
# print(data['temp'])

gyroX        [499.30572509765625, 499.53460693359375, 0.198...
gyroY        [1.861572265625, 2.49481201171875, 1.022338867...
gyroZ        [1.24359130859375, 1.2359619140625, 1.06048583...
accelX       [1.1943359375, 1.201171875, 1.1845703125, 1.18...
accelY       [1.1455078125, 1.1591796875, 1.1630859375, 1.1...
accelZ       [3.68359375, 3.654296875, 3.6748046875, 3.6650...
optical                       [139.64, 138.36, 139.64, 140.28]
temp         [30.50567626953125, 30.50567626953125, 30.5056...
humidity         [71.3134765625, 71.3134765625, 71.3134765625]
moods                               [Atmospheric, Melancholic]
isSkipped                                                    0
activity                                                      
Name: 1, dtype: object

In [135]:
# Filtering defective data

defective_ids = []
for idx,row in df.iterrows():
    # defective if temp array only has -40 values
    # defective if any humidity values are above 99.99
    if len([k for k in row['temp'] if k==-40]) == len(row['temp']) or \
    len([k for k in row['humidity'] if k>99]) == len(row['humidity']):
        defective_ids.append(idx)
    
    # if only some values are defective, keep the row, but remove defective values
    # remove -40 temp values and >99.99 humidity values
    elif (-40 in row['temp']) or len([k for k in row['humidity'] if k>99]) or (0 in row['optical']):
        df.at[idx,'temp'] = [k for k in row['temp'] if k!=-40]
        df.at[idx,'humidity'] = [k for k in row['humidity'] if k<=99]
        
    # some gyro/accel data have 40 samples
    # take the last 30 samples for these rows
    for col in df.columns[:6]:
        if len(row[col]) > 30:
            df.at[idx,col] = row[col][-30:]

filtered_df = df[~df.index.isin(defective_ids)].copy() # .copy() to avoid warning
print('%d defective rows: ' % len(defective_ids),defective_ids)

23 defective rows:  [21, 22, 33, 214, 236, 238, 245, 246, 247, 248, 249, 250, 251, 252, 386, 387, 388, 389, 390, 391, 392, 393, 394]


In [136]:
motion_model_path = 'LocalhostPredicter/savedModel_stackedLSTM'
model = load_model(motion_model_path)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 30, 128)           69120     
_________________________________________________________________
lstm_4 (LSTM)                (None, 30, 64)            49408     
_________________________________________________________________
lstm_5 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               3300      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 303       
Total params: 134,547
Trainable params: 134,547
Non-trainable params: 0
_________________________________________________________________


In [137]:
# Add activity from motion recognition model
x = [list(k) for k in filtered_df.iloc[:,:6].values]
x = np.array(x) # (num_samples, 6, 30)
x = np.array([k.T for k in x]) # reshape as (num_samples, 30, 6)

pred = model.predict(x)

activity_cats = np.array(['Running', 'Walking', 'Working']).astype(str) # hardcoded categories from 'Physical Activity Classification.ipynb'
filtered_df['activity'] = activity_cats[np.argmax(pred, axis=1)]
#filtered_df['activity'] = np.argmax(pred, axis=1)
filtered_df = filtered_df.iloc[:,6:].copy() # drop gyro and accel columns

filtered_df.sample(5)

Unnamed: 0_level_0,optical,temp,humidity,moods,isSkipped,activity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
174,"[1074.88, 1230.4, 1072, 1370.88, 1511.04]","[33.38592529296875, 33.38592529296875, 33.3859...","[74.310302734375, 74.310302734375, 74.31030273...",[Celebratory],0,Running
161,"[268.32, 268.64, 269.28000000000003, 269.28000...","[34.080810546875, 34.080810546875, 34.08081054...","[59.002685546875, 59.002685546875, 59.00268554...","[Celebratory, Athletic]",0,Working
532,"[429.6, 402.08, 396.96]","[31.53289794921875, 31.53289794921875, 31.5429...","[77.4169921875, 77.4169921875, 77.4169921875]",[Passionate],0,Working
555,"[253.92, 253.6, 253.92, 253.6, 253.92, 254.24]","[33.32550048828125, 33.32550048828125, 33.3255...","[61.07177734375, 61.07177734375, 61.0717773437...",[Celebratory],0,Working
353,"[4471.04, 4464.64, 3384.32, 2551.04]","[32.6104736328125, 32.6104736328125, 32.610473...","[80.21240234375, 80.21240234375, 80.21240234375]","[Passionate, Warm]",0,Walking


In [138]:
# Obtain mean optical, temp and humidity values

for col in filtered_df.columns:
    if col not in ['moods','isSkipped','activity']:
        filtered_df[col] = filtered_df[col].apply(np.mean)
filtered_df.head()

Unnamed: 0_level_0,optical,temp,humidity,moods,isSkipped,activity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,139.48,30.505676,71.313477,"[Atmospheric, Melancholic]",0,Working
2,153.61,31.694031,67.053223,[Melancholic],0,Working
3,297.792,32.217712,65.85083,"[Passionate, Melancholic]",0,Working
4,125.96,32.429199,64.672852,[Elegant],1,Working
5,145.36,32.429199,64.672852,"[Passionate, Melancholic]",1,Working


In [139]:
# One-hot encoding for moods

moods = []
for k in filtered_df['moods']:
    moods += list(k)
moods = np.unique(np.array(moods)).tolist()
for mood in moods:
    mood_values = filtered_df['moods'].astype(str).str.contains(mood)
    filtered_df[mood] = mood_values
filtered_df.drop('moods', axis=1, inplace=True)
print('Added one-hot encoded columns for moods:')
filtered_df.head()

Added one-hot encoded columns for moods:


Unnamed: 0_level_0,optical,temp,humidity,isSkipped,activity,Aggressive,Athletic,Atmospheric,Celebratory,Elegant,Melancholic,Passionate,Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,139.48,30.505676,71.313477,0,Working,False,False,True,False,False,True,False,False
2,153.61,31.694031,67.053223,0,Working,False,False,False,False,False,True,False,False
3,297.792,32.217712,65.85083,0,Working,False,False,False,False,False,True,True,False
4,125.96,32.429199,64.672852,1,Working,False,False,False,False,True,False,False,False
5,145.36,32.429199,64.672852,1,Working,False,False,False,False,False,True,True,False


In [140]:
# Invert mood boolean values based on "isSkipped"

ANTI_MOODS = ['Not Aggressive', 'Not Athletic', 'Not Atmospheric', 'Not Celebratory',\
             'Not Melancholic', 'Not Elegant', 'Not Passionate', 'Not Warm']

filtered_df[ANTI_MOODS] = filtered_df[['Aggressive', 'Athletic', 'Atmospheric', 'Celebratory', \
                                       'Melancholic', 'Elegant', 'Passionate', 'Warm']].copy()

for mood in moods:
    filtered_df[mood] = np.where(filtered_df['isSkipped'], 0, filtered_df[mood]) # remove isSkipped from normal decisions

for mood in ANTI_MOODS:
    filtered_df[mood] = np.where(filtered_df['isSkipped'], filtered_df[mood], 0) # and non-isSkipped from skip decisions

filtered_df.drop('isSkipped', axis=1, inplace=True)

print('Invert mood values based on "isSkipped" boolean:')
filtered_df.head()

Invert mood values based on "isSkipped" boolean:


Unnamed: 0_level_0,optical,temp,humidity,activity,Aggressive,Athletic,Atmospheric,Celebratory,Elegant,Melancholic,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,139.48,30.505676,71.313477,Working,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
2,153.61,31.694031,67.053223,Working,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
3,297.792,32.217712,65.85083,Working,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0
4,125.96,32.429199,64.672852,Working,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
5,145.36,32.429199,64.672852,Working,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0


In [141]:
# One-hot encoding for activity
activities = []
for k in filtered_df['activity']:
    activities.append(k)
activities = np.unique(np.array(activities)).tolist()
for activity in activities:
    activity_values = filtered_df['activity'].astype(str).str.contains(activity)
    activity_values = activity_values.astype(int)
    filtered_df[activity] = activity_values
filtered_df.drop('activity', axis=1, inplace=True)

filtered_df.head()

Unnamed: 0_level_0,optical,temp,humidity,Aggressive,Athletic,Atmospheric,Celebratory,Elegant,Melancholic,Passionate,...,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm,Running,Walking,Working
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,139.48,30.505676,71.313477,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
2,153.61,31.694031,67.053223,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
3,297.792,32.217712,65.85083,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,1
4,125.96,32.429199,64.672852,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
5,145.36,32.429199,64.672852,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,1


# Split into train/test datasets

In [143]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

# label encoding for activity
# le = LabelEncoder()
# filtered_df['activity'] = le.fit_transform(filtered_df['activity'].values)
# display(filtered_df.head())

# split into training & testing
x = filtered_df[['optical', 'temp', 'humidity','Working', 'Running', 'Walking']]
y = filtered_df[['Aggressive', 'Athletic', 'Atmospheric', 'Celebratory', \
                 'Melancholic', 'Elegant', 'Passionate', 'Warm', \
                 'Not Aggressive', 'Not Athletic', 'Not Atmospheric', 'Not Celebratory',\
                 'Not Melancholic', 'Not Elegant', 'Not Passionate', 'Not Warm']]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

print('Training (x, y): ', x_train.shape, y_train.shape)
print('Testing (x, y): ', x_test.shape, y_test.shape)

# create dfs for training and test data for easy prediction later
train_df = x_train.join(y_train)
test_df = x_test.join(y_test)

print('\nTrain:')
display(train_df.head())
print('Test:')
display(test_df.head())

Training (x, y):  (421, 6) (421, 16)
Testing (x, y):  (106, 6) (106, 16)

Train:


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,...,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
539,433.813333,31.586609,69.521077,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
196,10368.64,34.07074,69.805908,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
338,224.5,32.610474,80.212402,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
620,14.343333,28.15918,83.361816,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
186,2018.4,33.486633,82.092285,0,1,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


Test:


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,...,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
264,44.98,29.266968,43.444824,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
333,220.896,32.76825,79.789225,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
342,980.64,31.613464,67.248535,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
18,224.12,32.69104,62.652588,1,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
518,273.04,31.220703,79.919434,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


# SVM

In [14]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import svm

# model training
# since predict_proba only works with two classes at a time,
# eight svm models are trained for each mood and stored in the dictionary "svm_moods"
svms_std = {} # key:mood, value:svm model trained on that mood
params = {'C':[0.001, 0.01, 0.1, 1, 10], 'kernel':('linear', 'poly', 'rbf', 'sigmoid'), 'decision_function_shape':('ovr', 'ovo')}
for mood in y_train.columns:
    svm_pipe_std = Pipeline([('scaler', StandardScaler()), 
                       ('svm', GridSearchCV(svm.SVC(max_iter=100000, probability=True), params)), ])
    svm_pipe_std.fit(x_train, y_train.loc[:,mood].values)
    svms_std[mood] = svm_pipe_std



In [15]:
# Define functions for prediction and evaluation of SVM model

from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score

from sklearn.preprocessing import normalize

# Predict binary values or confidence scores for moods from sensor data
# For now, input_data is a Dataframe for convenience
def svm_predict(svms, input_data, prob=True):
    pred_df = input_data.copy()
    for mood,svm in svms.items():
        if prob:
            pred = svm.predict_proba(pred_df.iloc[:,:-16].values)
            pred_df[mood] = pred[:,1]
        else:
            pred = svm.predict(pred_df.iloc[:,:-16].values)
            pred_df[mood] = pred.reshape(-1)
    return pred_df
    
# Get loss (MSE) of predicted confidence scores
# Input: DataFrames of actual and predicted moods
def evaluate(df_actual, df_pred, df_pred_proba, clf):
    evaluation_df_dict = {
        'mood': [],
        'accuracy (%s)'%clf: [],
        'precision (%s)'%clf: [],
        'recall (%s)'%clf: [],
        'mse (%s)'%clf: [],
    }
    df_moods_actual = df_actual.iloc[:,-16:]
    df_moods_pred = df_pred.iloc[:,-16:]
    
    #mse
    df_moods_actual_normed = normalize(df_actual.iloc[:,-16:], axis=1)
    df_moods_pred_proba =  normalize(df_pred_proba.iloc[:,-16:], axis=1)
    currCol = 0
    
    for mood in df_moods_actual.columns:
        y_actual, y_pred = df_moods_actual[mood].values, df_moods_pred[mood].values
        evaluation_df_dict['mood'].append(mood)
        evaluation_df_dict['accuracy (%s)'%clf].append(accuracy_score(y_actual, y_pred))
        evaluation_df_dict['precision (%s)'%clf].append(precision_score(y_actual, y_pred, zero_division=0))
        evaluation_df_dict['recall (%s)'%clf].append(recall_score(y_actual, y_pred))
        
        evaluation_df_dict['mse (%s)'%clf].append(mean_squared_error(df_moods_actual_normed[currCol], df_moods_pred_proba[currCol]))
        currCol += 1
    evaluation_df = pd.DataFrame(evaluation_df_dict)
    evaluation_df.set_index('mood', inplace=True)
    return evaluation_df

In [16]:
train_pred_svm_df = svm_predict(svms_std, train_df, prob=False)
test_pred_svm_df = svm_predict(svms_std, test_df, prob=False)
train_prob_svm_df = svm_predict(svms_std, train_df, prob=True)
test_prob_svm_df = svm_predict(svms_std, test_df, prob=True)

# Show predicted values and confidence scores for moods for random samples
print('Test data mood labels (Actual):')
#display(test_df.head())

print('Test data mood labels (Predicted):')
#display(test_pred_svm_df.head())

print('Test data mood labels (Confidence Scores):')
display(test_prob_svm_df.head())

#Postprocess, normal categories - probabilities of skip categories

NORMAL_MOODS = ['Aggressive', 'Athletic', 'Atmospheric', 'Celebratory', \
                  'Melancholic', 'Elegant', 'Passionate', 'Warm']

test_prob_svm_df[NORMAL_MOODS] = test_prob_svm_df[NORMAL_MOODS].to_numpy() - test_prob_svm_df[ANTI_MOODS].to_numpy()
test_prob_svm_df[ANTI_MOODS] = 0

print('Test data mood labels (Confidence Scores, Postprocessed):')
display(test_prob_svm_df.head())

# Show evaluation of predictions
train_eval_svm_df = evaluate(train_df, train_pred_svm_df, train_prob_svm_df, 'svm,std')
test_eval_svm_df = evaluate(test_df, test_pred_svm_df, test_prob_svm_df, 'svm,std')
display(test_eval_svm_df)
display(test_eval_svm_df.describe())

Test data mood labels (Actual):
Test data mood labels (Predicted):
Test data mood labels (Confidence Scores):


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,...,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
264,44.98,29.266968,43.444824,1,0,0,0.065722,0.184385,0.093029,0.086221,...,0.121724,0.11113,0.084653,0.071044,0.100664,0.031187,0.075728,0.066068,0.095445,0.085183
333,220.896,32.76825,79.789225,1,0,0,0.040301,0.073669,0.094144,0.092918,...,0.137514,0.116,0.095773,0.064006,0.098621,0.057132,0.076558,0.072298,0.100437,0.067647
342,980.64,31.613464,67.248535,0,0,1,0.04881,0.065104,0.086131,0.092995,...,0.152732,0.109528,0.080826,0.069901,0.093183,0.051272,0.076101,0.065492,0.097481,0.107796
18,224.12,32.69104,62.652588,1,0,0,0.053075,0.116637,0.094797,0.090191,...,0.126762,0.113734,0.095042,0.067034,0.099967,0.038567,0.076329,0.068105,0.098501,0.072345
518,273.04,31.220703,79.919434,1,0,0,0.039425,0.072207,0.09299,0.092447,...,0.140822,0.115956,0.091197,0.06415,0.098246,0.059637,0.076392,0.073385,0.099898,0.070441


Test data mood labels (Confidence Scores, Postprocessed):


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,...,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
264,44.98,29.266968,43.444824,1,0,0,-0.018932,0.113341,-0.007635,0.055034,...,0.02628,0.025947,0,0,0,0,0,0,0,0
333,220.896,32.76825,79.789225,1,0,0,-0.055472,0.009662,-0.004477,0.035785,...,0.037077,0.048353,0,0,0,0,0,0,0,0
342,980.64,31.613464,67.248535,0,0,1,-0.032015,-0.004797,-0.007052,0.041723,...,0.055251,0.001732,0,0,0,0,0,0,0,0
18,224.12,32.69104,62.652588,1,0,0,-0.041967,0.049603,-0.00517,0.051624,...,0.028261,0.041389,0,0,0,0,0,0,0,0
518,273.04,31.220703,79.919434,1,0,0,-0.051772,0.008057,-0.005256,0.032809,...,0.040924,0.045514,0,0,0,0,0,0,0,0


Unnamed: 0_level_0,"accuracy (svm,std)","precision (svm,std)","recall (svm,std)","mse (svm,std)"
mood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aggressive,0.95283,0.0,0.0,0.100791
Athletic,0.90566,0.0,0.0,0.125
Atmospheric,0.858491,0.0,0.0,0.125
Celebratory,0.915094,0.0,0.0,0.063104
Melancholic,0.867925,0.0,0.0,0.113854
Elegant,0.858491,0.0,0.0,0.080244
Passionate,0.915094,0.0,0.0,0.125
Warm,0.90566,0.0,0.0,0.116092
Not Aggressive,0.933962,0.0,0.0,0.091611
Not Athletic,0.981132,0.0,0.0,0.053418


Unnamed: 0,"accuracy (svm,std)","precision (svm,std)","recall (svm,std)","mse (svm,std)"
count,16.0,16.0,16.0,16.0
mean,0.92158,0.0,0.0,0.112488
std,0.036573,0.0,0.0,0.033885
min,0.858491,0.0,0.0,0.053418
25%,0.90566,0.0,0.0,0.089749
50%,0.924528,0.0,0.0,0.118912
75%,0.945755,0.0,0.0,0.125
max,0.981132,0.0,0.0,0.185079


# Random Forest

In [144]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier


NORMAL_MOODS = ['Aggressive', 'Athletic', 'Atmospheric', 'Celebratory', \
                  'Melancholic', 'Elegant', 'Passionate', 'Warm']
ANTI_MOODS = ['Not Aggressive', 'Not Athletic', 'Not Atmospheric', 'Not Celebratory',\
             'Not Melancholic', 'Not Elegant', 'Not Passionate', 'Not Warm']

params = {
    'criterion':('gini', 'entropy'),
}

# About the same, can swap back to try. DecisionTreeClassifier much faster
rf = GridSearchCV(RandomForestClassifier(max_depth=7, random_state=0), params)
#rf = GridSearchCV(DecisionTreeClassifier(max_depth=7), params)
rf.fit(x_train, y_train)
rf.best_params_

{'criterion': 'gini'}

In [149]:
# Predict binary values or confidence scores for moods from sensor data
# For now, input_data is a Dataframe for convenience
# Returns confidence scores if prob=True else binary values
def rf_predict(rf, input_data, prob=True):
    pred_df = input_data.copy()
    data = input_data.iloc[:,:-16]
    target = input_data.iloc[:,-16:]
    if prob:
        pred = np.array(rf.predict_proba(data))
    else:
        pred = np.array(rf.predict(data))
    for i in range(len(target.columns)):
        mood = target.columns[i]
        if prob:
            pred_df[mood] = pred[i,:,1] # predict_proba returns shape (n_features, n_samples, probs)
        else:
            pred_df[mood] = pred[:,i] # predict returns shape (n_samples, n_features)
    return pred_df

In [150]:
# Predict binary values or confidence scores for moods from sensor data
# For now, input_data is a Dataframe for convenience
# Returns confidence scores if prob=True else binary values

train_pred_rf_df = rf_predict(rf, train_df, prob=False)
test_pred_rf_df = rf_predict(rf, test_df, prob=False)
train_prob_rf_df = rf_predict(rf, train_df, prob=True)
test_prob_rf_df = rf_predict(rf, test_df, prob=True)

# Show predicted values and confidence scores for moods for random samples
print('Test data mood labels (Actual):')
#test_df_copy = test_df.copy()
#test_df_copy['activity'] = [activity_cats[k] for k in test_df_copy['activity'].values]
# test_df_copy['activity'] = le.inverse_transform(test_df_copy['activity'].values) # convert activity values back to strings
display(test_df.head(10))

print('Test data mood labels (Predicted):')
# test_pred_rf_df['activity'] = le.inverse_transform(test_pred_rf_df['activity'].values)
#test_pred_rf_df['activity'] = [activity_cats[k] for k in test_pred_rf_df['activity'].values]
display(test_pred_rf_df.head(10))

print('Test data mood labels (Confidence Scores):')
#test_prob_rf_df['activity'] = [activity_cats[k] for k in test_prob_rf_df['activity'].values]
# test_pred_proba_rf_df['activity'] = le.inverse_transform(test_pred_proba_rf_df['activity'].values)
pd.set_option('display.max_columns', None)
display(test_prob_rf_df.head(10))


# Show evaluation of predictions
train_eval_rf_df = evaluate(train_df, train_pred_rf_df, train_prob_rf_df, 'rf')
test_eval_rf_df = evaluate(test_df, test_pred_rf_df, test_prob_rf_df, 'rf')
display(test_eval_rf_df)
display(test_eval_rf_df.describe())


test_prob_rf_df[NORMAL_MOODS] = test_prob_rf_df[NORMAL_MOODS].to_numpy() - test_prob_rf_df[ANTI_MOODS].to_numpy()
test_prob_rf_df.drop(ANTI_MOODS, axis=1, inplace=True)

# Normalize, just for visualization (cosine measure already does it)
from sklearn.preprocessing import normalize
test_prob_rf_df[NORMAL_MOODS] = normalize(test_prob_rf_df[NORMAL_MOODS].to_numpy(), axis=1)

print('Test data mood labels (Confidence Scores, Postprocessed):')
display(test_prob_rf_df.head(20))


Test data mood labels (Actual):


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,Melancholic,Elegant,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
264,44.98,29.266968,43.444824,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
333,220.896,32.76825,79.789225,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0
342,980.64,31.613464,67.248535,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18,224.12,32.69104,62.652588,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0
518,273.04,31.220703,79.919434,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16,240.84,32.677612,62.8479,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0
286,25.4,31.039429,38.787842,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
292,0.8,31.039429,38.787842,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
227,26.705,31.039429,76.367188,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
165,254.52,34.080811,59.002686,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Test data mood labels (Predicted):


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,Melancholic,Elegant,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
264,44.98,29.266968,43.444824,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
333,220.896,32.76825,79.789225,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
342,980.64,31.613464,67.248535,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18,224.12,32.69104,62.652588,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
518,273.04,31.220703,79.919434,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
16,240.84,32.677612,62.8479,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
286,25.4,31.039429,38.787842,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
292,0.8,31.039429,38.787842,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
227,26.705,31.039429,76.367188,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
165,254.52,34.080811,59.002686,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Test data mood labels (Confidence Scores):


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,Melancholic,Elegant,Passionate,Warm,Not Aggressive,Not Athletic,Not Atmospheric,Not Celebratory,Not Melancholic,Not Elegant,Not Passionate,Not Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
264,44.98,29.266968,43.444824,1,0,0,0.013362,0.094606,0.29144,0.060121,0.178652,0.129088,0.097,0.156553,0.181103,0.007558,0.029751,0.027229,0.02194,0.010959,0.088355,0.074654
333,220.896,32.76825,79.789225,1,0,0,0.036882,0.035671,0.126944,0.027376,0.232338,0.107727,0.13375,0.118231,0.100286,0.119124,0.018895,0.069249,0.032614,0.012659,0.038007,0.037464
342,980.64,31.613464,67.248535,0,0,1,0.067435,0.056202,0.014241,0.093173,0.013469,0.020523,0.121864,0.06295,0.056132,0.031326,0.261456,0.097836,0.068839,0.048758,0.110965,0.119354
18,224.12,32.69104,62.652588,1,0,0,0.06761,0.061275,0.099951,0.063051,0.127514,0.074191,0.110876,0.119485,0.165726,0.109047,0.041102,0.031966,0.066623,0.024999,0.071925,0.042322
518,273.04,31.220703,79.919434,1,0,0,0.024006,0.0973,0.056922,0.10844,0.068245,0.143015,0.153033,0.049591,0.069141,0.03468,0.154502,0.009945,0.085933,0.098809,0.086508,0.159856
16,240.84,32.677612,62.8479,1,0,0,0.049865,0.059699,0.089998,0.060995,0.132148,0.062573,0.10488,0.097278,0.178226,0.131776,0.042651,0.032347,0.066529,0.024687,0.072729,0.04063
286,25.4,31.039429,38.787842,1,0,0,0.016994,0.033723,0.219748,0.024876,0.518289,0.055113,0.329054,0.076668,0.076004,0.021246,0.066977,0.0601,0.026626,0.010964,0.091891,0.027772
292,0.8,31.039429,38.787842,1,0,0,0.00931,0.014938,0.051103,0.008408,0.100219,0.016979,0.064301,0.02632,0.478459,0.054567,0.131091,0.104269,0.040192,0.005989,0.48814,0.019697
227,26.705,31.039429,76.367188,0,0,1,0.024872,0.072788,0.022423,0.053281,0.016786,0.022956,0.08978,0.050431,0.092822,0.079839,0.310736,0.06134,0.039148,0.221999,0.223237,0.253
165,254.52,34.080811,59.002686,1,0,0,0.020069,0.207638,0.045499,0.166623,0.05886,0.085667,0.130093,0.102024,0.056998,0.042,0.020433,0.022713,0.072041,0.121182,0.034052,0.14904


Unnamed: 0_level_0,accuracy (rf),precision (rf),recall (rf),mse (rf)
mood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aggressive,0.943396,0.0,0.0,0.084075
Athletic,0.90566,0.0,0.0,0.093483
Atmospheric,0.849057,0.0,0.0,0.109354
Celebratory,0.90566,0.0,0.0,0.067346
Melancholic,0.858491,0.333333,0.071429,0.094223
Elegant,0.858491,0.0,0.0,0.084825
Passionate,0.915094,0.0,0.0,0.109414
Warm,0.90566,0.0,0.0,0.122052
Not Aggressive,0.933962,0.0,0.0,0.112853
Not Athletic,0.981132,0.0,0.0,0.060251


Unnamed: 0,accuracy (rf),precision (rf),recall (rf),mse (rf)
count,16.0,16.0,16.0,16.0
mean,0.917453,0.020833,0.004464,0.091926
std,0.038744,0.083333,0.017857,0.019665
min,0.849057,0.0,0.0,0.060251
25%,0.903302,0.0,0.0,0.074597
50%,0.919811,0.0,0.0,0.093853
75%,0.943396,0.0,0.0,0.109369
max,0.981132,0.333333,0.071429,0.122052


Test data mood labels (Confidence Scores, Postprocessed):


Unnamed: 0_level_0,optical,temp,humidity,Working,Running,Walking,Aggressive,Athletic,Atmospheric,Celebratory,Melancholic,Elegant,Passionate,Warm
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
264,44.98,29.266968,43.444824,1,0,0,-0.432281,0.224331,0.674392,0.084765,0.403858,0.304427,0.022278,0.211059
333,220.896,32.76825,79.789225,1,0,0,-0.212494,-0.279687,0.362121,-0.140335,0.669363,0.318615,0.320878,0.270687
342,980.64,31.613464,67.248535,0,0,1,0.043018,0.094668,-0.940812,-0.017749,-0.210719,-0.10745,0.041477,-0.214656
18,224.12,32.69104,62.652588,1,0,0,-0.567066,-0.276098,0.340122,0.179656,0.351924,0.284308,0.22512,0.445967
518,273.04,31.220703,79.919434,1,0,0,-0.215093,0.298419,-0.465024,0.469383,-0.084296,0.210667,0.317029,-0.525471
16,240.84,32.677612,62.8479,1,0,0,-0.688898,-0.386829,0.254106,0.153756,0.352169,0.203331,0.172551,0.304025
286,25.4,31.039429,38.787842,1,0,0,-0.102639,0.021702,0.265722,-0.061267,0.855181,0.076791,0.412513,0.085047
292,0.8,31.039429,38.787842,1,0,0,-0.723339,-0.061099,-0.123326,-0.1478,0.092551,0.016943,-0.65348,0.010211
227,26.705,31.039429,76.367188,0,0,1,-0.15721,-0.016314,-0.667043,-0.018645,-0.051736,-0.460506,-0.308766,-0.468664
165,254.52,34.080811,59.002686,1,0,0,-0.147119,0.659878,0.09986,0.573317,-0.052515,-0.141487,0.382613,-0.187306


In [151]:
evaluate_df = pd.concat([test_eval_svm_df, test_eval_rf_df], axis=1)
display(evaluate_df)
display(evaluate_df.describe())

Unnamed: 0_level_0,"accuracy (svm,std)","precision (svm,std)","recall (svm,std)","mse (svm,std)",accuracy (rf),precision (rf),recall (rf),mse (rf)
mood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Aggressive,0.95283,0.0,0.0,0.100791,0.943396,0.0,0.0,0.084075
Athletic,0.90566,0.0,0.0,0.125,0.90566,0.0,0.0,0.093483
Atmospheric,0.858491,0.0,0.0,0.125,0.849057,0.0,0.0,0.109354
Celebratory,0.915094,0.0,0.0,0.063104,0.90566,0.0,0.0,0.067346
Melancholic,0.867925,0.0,0.0,0.113854,0.858491,0.333333,0.071429,0.094223
Elegant,0.858491,0.0,0.0,0.080244,0.858491,0.0,0.0,0.084825
Passionate,0.915094,0.0,0.0,0.125,0.915094,0.0,0.0,0.109414
Warm,0.90566,0.0,0.0,0.116092,0.90566,0.0,0.0,0.122052
Not Aggressive,0.933962,0.0,0.0,0.091611,0.933962,0.0,0.0,0.112853
Not Athletic,0.981132,0.0,0.0,0.053418,0.981132,0.0,0.0,0.060251


Unnamed: 0,"accuracy (svm,std)","precision (svm,std)","recall (svm,std)","mse (svm,std)",accuracy (rf),precision (rf),recall (rf),mse (rf)
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,0.92158,0.0,0.0,0.112488,0.917453,0.020833,0.004464,0.091926
std,0.036573,0.0,0.0,0.033885,0.038744,0.083333,0.017857,0.019665
min,0.858491,0.0,0.0,0.053418,0.849057,0.0,0.0,0.060251
25%,0.90566,0.0,0.0,0.089749,0.903302,0.0,0.0,0.074597
50%,0.924528,0.0,0.0,0.118912,0.919811,0.0,0.0,0.093853
75%,0.945755,0.0,0.0,0.125,0.943396,0.0,0.0,0.109369
max,0.981132,0.0,0.0,0.185079,0.981132,0.333333,0.071429,0.122052


## Save Model

In [153]:
import pickle
pickle.dump(rf, open('LocalhostPredicter/RandomForest', 'wb'))