In [12]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression


In [35]:
dfo = pd.read_csv('data/obama_cleaned.csv')
dfr = pd.read_csv('data/romney_cleaned.csv')
df = pd.concat([dfo, dfr], ignore_index = True)
df.info()
pd.set_option('display.max_rows', None)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11271 entries, 0 to 11270
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tweets  11271 non-null  object
 1   class   11271 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 176.2+ KB


In [36]:
df = df.astype({'tweets' : 'string'})

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11271 entries, 0 to 11270
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tweets  11271 non-null  string
 1   class   11271 non-null  int64 
dtypes: int64(1), string(1)
memory usage: 176.2 KB


# Pre-trained model: VADER

In [38]:
analyzer = SentimentIntensityAnalyzer()
pos = list()
neg = list()
neu = list()
for t in df['tweets']:
    p = analyzer.polarity_scores(t)
    pos.append(p['pos'])
    neg.append(p['neg'])
    neu.append(p['neu'])



In [39]:
df['pos'] = pos
df['neg'] = neg
df['neu'] = neu

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11271 entries, 0 to 11270
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tweets  11271 non-null  string 
 1   class   11271 non-null  int64  
 2   pos     11271 non-null  float64
 3   neg     11271 non-null  float64
 4   neu     11271 non-null  float64
dtypes: float64(3), int64(1), string(1)
memory usage: 440.4 KB


# Predict label using maximum probability


In [41]:
preds = list()
for idx, row in df.iterrows():
    if row['pos'] >= row['neu'] and row['pos'] >= row['neg']:
        preds.append(1)
    elif row['neg'] >= row['neu'] and row['neg'] > row['pos']:
        preds.append(-1)
    elif row['neu'] > row['pos'] and row['neu'] > row['neg']:
        preds.append(0)
df['pred'] = preds

In [42]:
acc = accuracy_score(df['class'], df['pred'])
prec = precision_score(df['class'], df['pred'], average = None, zero_division = np.nan)
rec = recall_score(df['class'], df['pred'], average = None)
f1 = f1_score(df['class'], df['pred'], average = None)
print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1:", f1)

Accuracy: 0.3469967172389318
Precision: [0.67553191 0.33095987 0.76208178]
Recall: [0.02613169 0.97867104 0.07443718]
F1: [0.05031696 0.49464446 0.13562686]


# Predict label using ML models

In [10]:
def test_model(model, parameters, X, y, n_splits):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state = 27)
    avg_accuracies = list()
    avg_precisions = list()
    avg_recalls = list()
    avg_f1s = list()
    confs = list()
    for conf in ParameterGrid(parameters):
        print('Testing', conf)
        accuracies = list()
        precisions = list()
        recalls = list()
        f1s = list()
        i = 1
        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            try:
                model.set_params(**conf)
                model.fit(X_train, y_train)
            except:
                print('Skipped', conf)
                break
            print('\tFold', i, 'of', n_splits)
            y_pred = model.predict(X_test)
            accuracies.append(accuracy_score(y_test, y_pred))
            precisions.append(precision_score(y_test, y_pred, average=None, zero_division = np.nan))
            recalls.append(recall_score(y_test, y_pred, average=None, zero_division = np.nan))
            f1s.append(f1_score(y_test, y_pred, average=None, zero_division = np.nan))
            if i == 1:
                confs.append(conf)
            i = i + 1
            
    
        if len(accuracies) != 0:  
            avg_accuracies.append(sum(accuracies)/len(accuracies))
            avg_precisions.append((sum(precisions)/len(precisions)) if len(precisions) > 0 else np.nan)
            avg_recalls.append(sum(recalls)/len(recalls) if len(recalls) > 0 else np.nan)
            avg_f1s.append(sum(f1s)/len(f1s) if len(f1s) > 0 else np.nan)
        
    results = {'Parameters' : confs,
              'Accuracy' : avg_accuracies,
              'Precision' : avg_precisions,
              'Recall' : avg_recalls,
              'F1' : avg_f1s}
    
    return pd.DataFrame.from_dict(results), model.classes_
    
        
        

In [11]:
X = df[['pos', 'neg', 'neu']]
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
y.value_counts()

-1    4860
 0    3657
 1    2754
Name: class, dtype: int64

In [12]:
params_svm = {'C' : (0.1, 1, 10, 100),
             'kernel' : ('rbf', 'poly', 'linear'),
             'degree' : (3, 5, 7),
             'gamma' : ('scale', 'auto')}
svm = SVC()
svm_results, cl_svm = test_model(svm, params_svm, X_train, y_train, 4)

Testing {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'linear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'linear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 5, 'gamma': 'scale', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 5, 'gamma': 'scale', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 5, 'gamma': '

	Fold 4 of 4
Testing {'C': 100, 'degree': 7, 'gamma': 'auto', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'degree': 7, 'gamma': 'auto', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'degree': 7, 'gamma': 'auto', 'kernel': 'linear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4


In [13]:
svm_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",0.495563,"[0.5212038628878527, 0.4223974219613062, 0.602...","[0.6914630499096732, 0.43869701141406, 0.22709...","[0.5941847338552682, 0.430069849501397, 0.3298..."
1,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",0.490794,"[0.5418180647637059, 0.4052622067716687, 0.598...","[0.6177928120575628, 0.5135794171743846, 0.238...","[0.5763039641512815, 0.45191276634721844, 0.34..."
2,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",0.471939,"[0.4592762211269936, nan, 0.587310026654289]","[0.9589315534246666, 0.0, 0.23615406204279474]","[0.6210445830395928, nan, 0.3365618982318441]"
3,"{'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kern...",0.465506,"[0.45234219964399053, nan, 0.6524877869835191]","[0.9789788486307383, 0.0, 0.17496119449830208]","[0.6187403292071448, nan, 0.2756805550806031]"
4,"{'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kern...",0.431566,"[0.43156610470275064, nan, nan]","[1.0, 0.0, 0.0]","[0.6029081726894239, nan, nan]"
5,"{'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kern...",0.471939,"[0.4592762211269936, nan, 0.587310026654289]","[0.9589315534246666, 0.0, 0.23615406204279474]","[0.6210445830395928, nan, 0.3365618982318441]"
6,"{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'ker...",0.495563,"[0.5212038628878527, 0.4223974219613062, 0.602...","[0.6914630499096732, 0.43869701141406, 0.22709...","[0.5941847338552682, 0.430069849501397, 0.3298..."
7,"{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'ker...",0.499778,"[0.5420693454557777, 0.4200907277818867, 0.539...","[0.6564747045003172, 0.4472755021798268, 0.293...","[0.5935310578805456, 0.4325806635587954, 0.380..."
8,"{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'ker...",0.471939,"[0.4592762211269936, nan, 0.587310026654289]","[0.9589315534246666, 0.0, 0.23615406204279474]","[0.6210445830395928, nan, 0.3365618982318441]"
9,"{'C': 0.1, 'degree': 5, 'gamma': 'auto', 'kern...",0.465506,"[0.45234219964399053, nan, 0.6524877869835191]","[0.9789788486307383, 0.0, 0.17496119449830208]","[0.6187403292071448, nan, 0.2756805550806031]"


In [14]:
cl_svm

array([-1,  0,  1])

In [15]:
params_rf = {'n_estimators' : (50, 100, 150),
            'criterion' : ('entropy', 'gini'),
            'max_features' : (None, 'sqrt')}
rf = RandomForestClassifier()
rf_results, cl_rf = test_model(rf, params_rf, X_train, y_train, 4)

Testing {'criterion': 'entropy', 'max_features': None, 'n_estimators': 50}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': None, 'n_estimators': 100}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': None, 'n_estimators': 150}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 50}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 100}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 150}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'gini', 'max_features': None, 'n_estimators': 50}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'gini', 'max_features': None, 'n_estimators': 100}
	Fold 1 of 4
	Fold 2 of 4
	Fold

In [16]:
rf_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'criterion': 'entropy', 'max_features': None,...",0.458407,"[0.5605625415429494, 0.38206935325165575, 0.45...","[0.4811291762261844, 0.568307418241445, 0.2747...","[0.5177637178808192, 0.45693036956357985, 0.34..."
1,"{'criterion': 'entropy', 'max_features': None,...",0.460958,"[0.5645991870966689, 0.38364600662857046, 0.45...","[0.48420170341124213, 0.5641541189355117, 0.28...","[0.5212879770053767, 0.4566769533861395, 0.348..."
2,"{'criterion': 'entropy', 'max_features': None,...",0.459627,"[0.5590515622251038, 0.38087191438483387, 0.45...","[0.4888437922444734, 0.5614023817410363, 0.275...","[0.5215600458072054, 0.45380446199417024, 0.34..."
3,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.460626,"[0.5578747027640936, 0.3838296341454986, 0.460...","[0.48470455805323626, 0.5651530224220145, 0.28...","[0.5186959659360749, 0.4571606549298697, 0.349..."
4,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.458407,"[0.5581498342989086, 0.38290170980902277, 0.44...","[0.4857198310025866, 0.5648495451918549, 0.271...","[0.5194041905936726, 0.4563752448899015, 0.337..."
5,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.45752,"[0.5590111311406494, 0.38212522478321365, 0.44...","[0.48058822994880135, 0.5659059457393453, 0.27...","[0.5168351257565571, 0.45615448729467656, 0.34..."
6,"{'criterion': 'gini', 'max_features': None, 'n...",0.454969,"[0.5574188663687172, 0.3790274877392068, 0.443...","[0.4808422248709504, 0.563853599086536, 0.2669...","[0.5162464384711618, 0.4533154150519478, 0.332..."
7,"{'criterion': 'gini', 'max_features': None, 'n...",0.458185,"[0.5629236751500005, 0.3795068502760241, 0.454...","[0.4783147766352933, 0.5661968003189374, 0.281...","[0.5171451203946877, 0.4544056267506301, 0.347..."
8,"{'criterion': 'gini', 'max_features': None, 'n...",0.458075,"[0.5611380547217102, 0.3825427515295795, 0.443...","[0.4816149396559565, 0.565835549664328, 0.2757...","[0.5183130850305477, 0.45646310382031463, 0.33..."
9,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.459516,"[0.5615153391856658, 0.38289780038232446, 0.45...","[0.4849696027835322, 0.567982595392312, 0.2730...","[0.5204016130298084, 0.45738413958188795, 0.34..."


In [17]:
cl_rf

array([-1,  0,  1])

In [18]:
params_knn = {'n_neighbors' : (1, 3, 5, 7, 9),
             'metric' : ('minkowski', 'euclidean', 'manhattan', 'cosine')}
knn = KNeighborsClassifier()
knn_results, cl_knn = test_model(knn, params_knn, X_train, y_train, 4)

Testing {'metric': 'minkowski', 'n_neighbors': 1}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 3}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 5}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 7}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 9}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 1}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 3}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 5}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 7}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 9}
	Fold 1 of 4
	Fold 2 of 4
	Fold 

In [19]:
knn_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'metric': 'minkowski', 'n_neighbors': 1}",0.411823,"[0.5288011243485464, 0.342776037574362, 0.3567...","[0.4691672064022998, 0.3929695064115487, 0.342...","[0.48595519968536793, 0.345725892303888, 0.329..."
1,"{'metric': 'minkowski', 'n_neighbors': 3}",0.450532,"[0.5013048694473518, 0.3506085385953639, 0.472...","[0.6576200619672004, 0.33469447889821546, 0.24...","[0.5569148440368162, 0.3137237406058025, 0.321..."
2,"{'metric': 'minkowski', 'n_neighbors': 5}",0.457742,"[0.5025764834091004, 0.34522601670504494, 0.52...","[0.6767954243840266, 0.34160655791627953, 0.22...","[0.5663506491313783, 0.32029839901222457, 0.31..."
3,"{'metric': 'minkowski', 'n_neighbors': 7}",0.460736,"[0.5054117747871308, 0.3598505380453758, 0.502...","[0.673631713007792, 0.3343666018890008, 0.2535...","[0.5663058512738357, 0.31863804565751686, 0.33..."
4,"{'metric': 'minkowski', 'n_neighbors': 9}",0.465284,"[0.5099822234877045, 0.34539132370635317, 0.51...","[0.702261827900526, 0.3130485842993334, 0.2534...","[0.5793577132831587, 0.2990646013889473, 0.340..."
5,"{'metric': 'euclidean', 'n_neighbors': 1}",0.411823,"[0.5288011243485464, 0.342776037574362, 0.3567...","[0.4691672064022998, 0.3929695064115487, 0.342...","[0.48595519968536793, 0.345725892303888, 0.329..."
6,"{'metric': 'euclidean', 'n_neighbors': 3}",0.450532,"[0.5013048694473518, 0.3506085385953639, 0.472...","[0.6576200619672004, 0.33469447889821546, 0.24...","[0.5569148440368162, 0.3137237406058025, 0.321..."
7,"{'metric': 'euclidean', 'n_neighbors': 5}",0.457742,"[0.5025764834091004, 0.34522601670504494, 0.52...","[0.6767954243840266, 0.34160655791627953, 0.22...","[0.5663506491313783, 0.32029839901222457, 0.31..."
8,"{'metric': 'euclidean', 'n_neighbors': 7}",0.460736,"[0.5054117747871308, 0.3598505380453758, 0.502...","[0.673631713007792, 0.3343666018890008, 0.2535...","[0.5663058512738357, 0.31863804565751686, 0.33..."
9,"{'metric': 'euclidean', 'n_neighbors': 9}",0.465284,"[0.5099822234877045, 0.34539132370635317, 0.51...","[0.702261827900526, 0.3130485842993334, 0.2534...","[0.5793577132831587, 0.2990646013889473, 0.340..."


In [20]:
cl_knn

array([-1,  0,  1])

In [21]:
params_lr = {'penalty' : ('l1', 'l2', 'elasticnet', None),
            'C' : (0.1, 1, 10, 100),
            'solver' : ('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'),
            'max_iter' : [500]}

lr = LogisticRegression()
lr_results, cl_lr = test_model(lr, params_lr, X_train, y_train, 4)

Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_i



	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'sag'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 



	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_



	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cholesky'}




	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'sag'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'newton-cg'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'sag'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'elasticnet', 'solver': 'lbfgs'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'elasticnet', 'solver': 'lbfgs'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'elasticnet', 'solver



	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': None, 'solver': 'sag'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4
	Fold 4 of 4




In [22]:
lr_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l1', '...",0.472937,"[0.47751223568782786, nan, 0.4824690261622007]","[0.8704073029330592, 0.03679631672341854, 0.34...","[0.616068117340512, nan, 0.4041682948200644]"
1,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l1', '...",0.485248,"[0.5401394127153538, nan, 0.48763136046214944]","[0.7151740862901966, 0.28528607740635936, 0.34...","[0.5920775389615235, nan, 0.4040979502971927]"
2,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.475377,"[0.4750452308910534, nan, 0.5068303491162047]","[0.8902552066690578, 0.035813630041724616, 0.3...","[0.6184373786057753, nan, 0.395787581860259]"
3,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.476264,"[0.46950404299980086, nan, 0.5143793553300746]","[0.9237439203956603, 0.0, 0.31546708954319086]","[0.6225396930453587, nan, 0.3909475670141892]"
4,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.475377,"[0.4750452308910534, nan, 0.5068303491162047]","[0.8902552066690578, 0.035813630041724616, 0.3...","[0.6184373786057753, nan, 0.395787581860259]"
5,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.476154,"[0.46940499544395903, nan, 0.5144916010305548]","[0.9245188076788884, 0.0, 0.3136791913176284]","[0.6226278008514351, nan, 0.3895952357972349]"
6,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.475377,"[0.4750452308910534, nan, 0.5068303491162047]","[0.8902552066690578, 0.035813630041724616, 0.3...","[0.6184373786057753, nan, 0.395787581860259]"
7,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.475377,"[0.4750452308910534, nan, 0.5068303491162047]","[0.8902552066690578, 0.035813630041724616, 0.3...","[0.6184373786057753, nan, 0.395787581860259]"
8,"{'C': 0.1, 'max_iter': 500, 'penalty': None, '...",0.492347,"[0.6086790160530714, 0.40336323261111034, 0.46...","[0.5275860593068826, 0.5488243077730788, 0.356...","[0.56515639552671, 0.4648920785070749, 0.40495..."
9,"{'C': 0.1, 'max_iter': 500, 'penalty': None, '...",0.492347,"[0.6090012435271221, 0.4035153486128056, 0.467...","[0.5270763562439881, 0.5491720129191149, 0.357...","[0.5649956556799896, 0.46511694591694935, 0.40..."


In [23]:
cl_lr

array([-1,  0,  1])