In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 
import numpy as np

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle

In [3]:
df = pd.read_csv('/content/drive/MyDrive/emotion_ds/out.csv')

In [4]:
df.isnull().values.any()

False

In [5]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Happy,0.69462,0.501455,-3.254367,0.999469,0.744245,0.396398,-3.179939,0.998429,0.775747,...,-0.036378,0.0,0.829803,0.345981,0.006385,0.0,0.837651,0.335166,0.007548,0.0
1,Happy,0.694588,0.504175,-2.534289,0.999432,0.743602,0.397457,-2.490653,0.998371,0.7748,...,-0.040013,0.0,0.828951,0.34654,0.000428,0.0,0.836443,0.336774,0.001195,0.0
2,Happy,0.693829,0.505565,-2.322195,0.999407,0.741747,0.39761,-2.29119,0.99831,0.772492,...,-0.040898,0.0,0.828537,0.348232,-0.0002,0.0,0.835846,0.338181,0.00052,0.0
3,Happy,0.693892,0.505048,-2.359528,0.999396,0.741726,0.395579,-2.318423,0.998286,0.772314,...,-0.040329,0.0,0.827169,0.349005,0.000364,0.0,0.834496,0.339008,0.001077,0.0
4,Happy,0.693893,0.503199,-2.337915,0.999399,0.741634,0.393342,-2.288379,0.99831,0.771971,...,-0.039535,0.0,0.825541,0.348934,0.001577,0.0,0.832743,0.339025,0.002383,0.0


In [6]:
df['class'].value_counts()

Sad      5414
Happy    3935
Name: class, dtype: int64

In [9]:
df.rename(columns={'class': 'Class'},
          inplace=True, errors='raise')

In [10]:
import pandas as pd
import matplotlib.pyplot as plt

from imblearn.over_sampling import SMOTE

# for reproducibility purposes
seed = 100
# SMOTE number of neighbors
k = 8

#df = pd.read_csv('diabetes.csv', encoding='utf-8', engine='python')
# make a new df made of all the columns, except the target class
X = df.loc[:, df.columns != 'Class']
y = df.Class
sm = SMOTE(sampling_strategy='auto', k_neighbors=k, random_state=seed)
X_res, y_res = sm.fit_resample(X, y)



df_balance = pd.concat([pd.DataFrame(X_res), pd.DataFrame(y_res)], axis=1)

In [11]:
df_balance

Unnamed: 0,x1,y1,z1,v1,x2,y2,z2,v2,x3,y3,...,v499,x500,y500,z500,v500,x501,y501,z501,v501,Class
0,0.694620,0.501455,-3.254367,0.999469,0.744245,0.396398,-3.179939,0.998429,0.775747,0.395627,...,0.0,0.829803,0.345981,0.006385,0.0,0.837651,0.335166,0.007548,0.0,Happy
1,0.694588,0.504175,-2.534289,0.999432,0.743602,0.397457,-2.490653,0.998371,0.774800,0.396850,...,0.0,0.828951,0.346540,0.000428,0.0,0.836443,0.336774,0.001195,0.0,Happy
2,0.693829,0.505565,-2.322195,0.999407,0.741747,0.397610,-2.291190,0.998310,0.772492,0.396936,...,0.0,0.828537,0.348232,-0.000200,0.0,0.835846,0.338181,0.000520,0.0,Happy
3,0.693892,0.505048,-2.359528,0.999396,0.741726,0.395579,-2.318423,0.998286,0.772314,0.394706,...,0.0,0.827169,0.349005,0.000364,0.0,0.834496,0.339008,0.001077,0.0,Happy
4,0.693893,0.503199,-2.337915,0.999399,0.741634,0.393342,-2.288379,0.998310,0.771971,0.392735,...,0.0,0.825541,0.348934,0.001577,0.0,0.832743,0.339025,0.002383,0.0,Happy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10823,0.457946,0.595637,-0.770073,0.999965,0.480690,0.532435,-0.735769,0.999921,0.500043,0.527417,...,0.0,0.528228,0.518510,0.001351,0.0,0.531970,0.511513,0.001626,0.0,Happy
10824,0.466500,0.488495,-1.600426,0.999635,0.498015,0.414636,-1.585081,0.999580,0.524501,0.408450,...,0.0,0.529522,0.378469,-0.008979,0.0,0.535759,0.368874,-0.010007,0.0,Happy
10825,0.393680,0.532382,-1.847011,0.999418,0.451638,0.389278,-1.889270,0.998696,0.491755,0.381427,...,0.0,0.527458,0.356188,-0.057962,0.0,0.538045,0.339365,-0.061357,0.0,Happy
10826,0.470256,0.353320,-1.160354,0.999891,0.498946,0.298835,-1.103488,0.999797,0.521836,0.297651,...,0.0,0.538829,0.275081,0.019981,0.0,0.545849,0.264189,0.020396,0.0,Happy


In [12]:
df_balance['Class'].value_counts()

Happy    5414
Sad      5414
Name: Class, dtype: int64

In [13]:
df_balance.to_csv('df_smote.csv', index=False, encoding='utf-8')

In [14]:
X = df_balance.drop('Class', axis=1) # features
y = df_balance['Class'] # target value

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1234)

In [16]:
logisticRegr = LogisticRegression()

In [17]:
logisticRegr.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression()

In [18]:
score = logisticRegr.score(X_test, y_test)
print(score)

0.948291782086796


In [19]:
y_pred = logisticRegr.predict(X_test)

In [20]:
from sklearn.metrics import precision_score

In [21]:
precision_score(y_test, y_pred, pos_label='Happy')

0.9499072356215214

In [22]:
from sklearn.metrics import recall_score

In [23]:
recall_score(y_test, y_pred, pos_label='Happy')

0.9463955637707948

In [24]:
ridge_class =  RidgeClassifier()

In [25]:
ridge_class.fit(X_train, y_train)

RidgeClassifier()

In [26]:
score_ridge = ridge_class.score(X_test, y_test)
print(score_ridge)

0.9695290858725761


In [27]:
y_pred = ridge_class.predict(X_test)

In [28]:
precision_score(y_test, y_pred, pos_label='Happy')

0.9756554307116105

In [29]:
recall_score(y_test, y_pred, pos_label='Happy')

0.9630314232902033

In [30]:
randomf= RandomForestClassifier()

In [31]:
randomf.fit(X_train, y_train)

RandomForestClassifier()

In [32]:
score_randomf = randomf.score(X_test, y_test)
print(score_randomf)

0.997229916897507


In [33]:
y_pred = randomf.predict(X_test)

In [34]:
precision_score(y_test, y_pred, pos_label='Happy')

0.996309963099631

In [35]:
recall_score(y_test, y_pred, pos_label='Happy')

0.9981515711645101

In [36]:
gb = GradientBoostingClassifier()

In [37]:
gb.fit(X_train, y_train)

GradientBoostingClassifier()

In [38]:
score_gb = gb.score(X_test, y_test)
print(score_gb)

0.9889196675900277


In [39]:
y_pred = gb.predict(X_test)

In [40]:
precision_score(y_test, y_pred, pos_label='Happy')

0.9907235621521335

In [41]:
recall_score(y_test, y_pred, pos_label='Happy')

0.9870609981515711