In [11]:
import pandas as pd
from sklearn.linear_model import LogisticRegression, RidgeClassifier, PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [12]:
# part 1
data = pd.read_csv("aus000400.csv")
emotions = data["emotion"]
inputs = data.drop(labels="emotion", axis=1)

In [13]:
accuracies = {}

In [14]:
# part 2
data_in, test_in, data_out, test_out = train_test_split(inputs, emotions, test_size=0.15, stratify=emotions) # test = 15%, data= 85%
train_in, val_in, train_out, val_out = train_test_split(data_in, data_out, test_size=0.2) # val is 0.85*0.2 = 0.17 -> 17 %, train is 68%

In [15]:
#part 3
model = LogisticRegression()
model.fit(X=train_in, y=train_out)
predictions = model.predict(X=val_in)
accuracy = accuracy_score(y_true=val_out, y_pred=predictions)
acc_tuple = accuracies.get("LR", (0,0))
accuracies["LR"] = (acc_tuple[0] + accuracy, acc_tuple[1] + 1)


model2 = RidgeClassifier()
model2.fit(X=train_in, y=train_out)
predictions2 = model2.predict(X=val_in)
accuracy2 = accuracy_score(y_true=val_out, y_pred=predictions2)
acc_tuple = accuracies.get("RC", (0,0))
accuracies["RC"] = (acc_tuple[0] + accuracy2, acc_tuple[1] + 1)

model3 = PassiveAggressiveClassifier()
model3.fit(X=train_in, y=train_out)
predictions3 = model3.predict(X=val_in)
accuracy3 = accuracy_score(y_true=val_out, y_pred=predictions3)
acc_tuple = accuracies.get("PAC", (0,0))
accuracies["PAC"] = (acc_tuple[0] + accuracy3, acc_tuple[1] + 1)

In [16]:
# part 4
model = SVC()
search_grid = [
    {"kernel": ["poly", "linear", "rbf", "sigmoid"], "gamma": ["auto", "scale"]},
    {"kernel": ["poly"], "degree": [1,2,3,4,5], "gamma":["auto", "scale"]}
]
SVM_model = GridSearchCV(estimator = model, param_grid=search_grid)
SVM_model.fit(X=train_in, y=train_out)
predictions_SVM = SVM_model.predict(X=val_in)
SVM_accuracy = accuracy_score(y_true=val_out, y_pred=predictions_SVM)
acc_tuple = accuracies.get(str(SVM_model.best_params_), (0,0))
accuracies[str(SVM_model.best_params_)] = (acc_tuple[0] + SVM_accuracy, acc_tuple[1] + 1)

# part 5
print(accuracies)

{'LR': (0.4577603143418468, 1), 'RC': (0.4656188605108055, 1), 'PAC': (0.32612966601178783, 1), "{'degree': 2, 'gamma': 'scale', 'kernel': 'poly'}": (0.449901768172888, 1)}


In [17]:
for model in accuracies:
    (cummulative_acc, number) = accuracies[model]
    precision = cummulative_acc/number
    print("accuracy of model ", model, "in ", number, " times: ", precision)

accuracy of model  LR in  1  times:  0.4577603143418468
accuracy of model  RC in  1  times:  0.4656188605108055
accuracy of model  PAC in  1  times:  0.32612966601178783
accuracy of model  {'degree': 2, 'gamma': 'scale', 'kernel': 'poly'} in  1  times:  0.449901768172888


In [18]:
# part 6 execute on given dataset
model = SVC()
optimal_params = [
    {"kernel": ["poly"], "degree": [2], "gamma": ["scale"]}
]
optimal_SVM_model = GridSearchCV(estimator = model, param_grid=optimal_params)

data_in, test_in, data_out, test_out = train_test_split(inputs, emotions, test_size=0.15, stratify=emotions) # test = 15%, data= 85%
train_in, val_in, train_out, val_out = train_test_split(data_in, data_out, test_size=0.2) # val is 0.85*0.2 = 0.17 -> 17 %, train is 68%
optimal_SVM_model.fit(X=train_in, y=train_out)



In [19]:
accuracies = {}


for number in range(50): # Run 50 times to find the best model
# part 2
    print(number)
    data_in, test_in, data_out, test_out = train_test_split(inputs, emotions, test_size=0.15, stratify=emotions) # test = 15%, data= 85%
    train_in, val_in, train_out, val_out = train_test_split(data_in, data_out, test_size=0.2) # val is 0.85*0.2 = 0.17 -> 17 %, train is 68%

    #part 3
    model = LogisticRegression()
    model.fit(X=train_in, y=train_out)
    predictions = model.predict(X=val_in)
    accuracy = accuracy_score(y_true=val_out, y_pred=predictions)
    acc_tuple = accuracies.get("LR", (0,0))
    accuracies["LR"] = (acc_tuple[0] + accuracy, acc_tuple[1] + 1)
    

    model2 = RidgeClassifier()
    model2.fit(X=train_in, y=train_out)
    predictions2 = model2.predict(X=val_in)
    accuracy2 = accuracy_score(y_true=val_out, y_pred=predictions2)
    acc_tuple = accuracies.get("RC", (0,0))
    accuracies["RC"] = (acc_tuple[0] + accuracy2, acc_tuple[1] + 1)

    model3 = PassiveAggressiveClassifier()
    model3.fit(X=train_in, y=train_out)
    predictions3 = model3.predict(X=val_in)
    accuracy3 = accuracy_score(y_true=val_out, y_pred=predictions3)
    acc_tuple = accuracies.get("PAC", (0,0))
    accuracies["PAC"] = (acc_tuple[0] + accuracy3, acc_tuple[1] + 1)

    # part 4
    #model = SVC()
    #search_grid = [
    #    {"kernel": ["poly", "linear", "rbf", "sigmoid"], "gamma": ["auto", "scale"]},
    #    {"kernel": ["poly"], "degree": [1,2,3,4,5, 10, 15, 20, 25, 30, 40], "gamma":["auto", "scale"]}
    #]

    #SVM_model = GridSearchCV(estimator = model, param_grid=search_grid)
    #SVM_model.fit(X=train_in, y=train_out)
    #predictions_SVM = SVM_model.predict(X=val_in)
    #SVM_accuracy = accuracy_score(y_true=val_out, y_pred=predictions_SVM)
    #acc_tuple = accuracies.get(str(SVM_model.best_params_), (0,0))
    #accuracies[str(SVM_model.best_params_)] = (acc_tuple[0] + SVM_accuracy, acc_tuple[1] + 1)

    # part 5
    print(accuracies)

0
{'LR': (0.4381139489194499, 1), 'RC': (0.4223968565815324, 1), 'PAC': (0.28487229862475444, 1)}
1
{'LR': (0.9037328094302554, 2), 'RC': (0.8703339882121808, 2), 'PAC': (0.6424361493123772, 2)}
2
{'LR': (1.3850687622789783, 3), 'RC': (1.3418467583497053, 3), 'PAC': (1.0353634577603144, 3)}
3
{'LR': (1.848722986247544, 4), 'RC': (1.7956777996070727, 4), 'PAC': (1.3929273084479372, 4)}
4
{'LR': (2.326129666011788, 5), 'RC': (2.2416502946954813, 5), 'PAC': (1.7210216110019647, 5)}
5
{'LR': (2.764243614931238, 6), 'RC': (2.673870333988212, 6), 'PAC': (2.0471512770137528, 6)}
6
{'LR': (3.2357563850687625, 7), 'RC': (3.1159135559921416, 7), 'PAC': (2.3123772102161104, 7)}
7
{'LR': (3.705304518664047, 8), 'RC': (3.5618860510805503, 8), 'PAC': (2.6247544204322204, 8)}
8
{'LR': (4.143418467583497, 9), 'RC': (3.9941060903732812, 9), 'PAC': (3.007858546168959, 9)}
9
{'LR': (4.609037328094303, 10), 'RC': (4.432220039292731, 10), 'PAC': (3.339882121807466, 10)}
10
{'LR': (5.023575638506876, 11), '

In [20]:
for model in accuracies:
    (cummulative_acc, number) = accuracies[model]
    precision = cummulative_acc/number
    print("accuracy of model ", model, "in ", number, " times: ", precision)

accuracy of model  LR in  50  times:  0.46113948919449904
accuracy of model  RC in  50  times:  0.4426326129666013
accuracy of model  PAC in  50  times:  0.3317092337917486
