In [1]:
import pandas as pd
import numpy as np
from pysentimiento import create_analyzer
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression


In [2]:
dfo = pd.read_csv('data/obama_cleaned.csv')
dfr = pd.read_csv('data/romney_cleaned.csv')
df = pd.concat([dfo, dfr], ignore_index = True)
df.info()
pd.set_option('display.max_rows', None)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11271 entries, 0 to 11270
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tweets  11271 non-null  object
 1   class   11271 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 176.2+ KB


In [3]:
df = df.astype({'tweets' : 'string'})

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11271 entries, 0 to 11270
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tweets  11271 non-null  string
 1   class   11271 non-null  int64 
dtypes: int64(1), string(1)
memory usage: 176.2 KB


# Pre-trained model: BERTweet

In [5]:

analyzer = create_analyzer(task="sentiment", lang="en")


In [6]:
def get_prob_dictionary(p):
    start_index = p.find("{") + 1
    end_index = p.rfind("}")
    d = p[start_index:end_index]
    scores = d.split(', ')
    probs = dict()
    for i in range(0, len(scores)):
        k, v = scores[i].split(': ')
        probs[k] = float(v)
    return probs


In [7]:

pos = list()
neg = list()
neu = list()

for t in df['tweets']:
    p = get_prob_dictionary(str(analyzer.predict(t)))
    pos.append(p['POS'])
    neg.append(p['NEG'])
    neu.append(p['NEU'])


df['pos'] = pos
df['neg'] = neg
df['neu'] = neu

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11271 entries, 0 to 11270
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tweets  11271 non-null  string 
 1   class   11271 non-null  int64  
 2   pos     11271 non-null  float64
 3   neg     11271 non-null  float64
 4   neu     11271 non-null  float64
dtypes: float64(3), int64(1), string(1)
memory usage: 440.4 KB


# Predict label using maximum probability


In [9]:
preds = list()
for idx, row in df.iterrows():
    if row['pos'] >= row['neu'] and row['pos'] >= row['neg']:
        preds.append(1)
    elif row['neg'] >= row['neu'] and row['neg'] > row['pos']:
        preds.append(-1)
    elif row['neu'] > row['pos'] and row['neu'] > row['neg']:
        preds.append(0)
df['pred'] = preds

In [10]:
acc = accuracy_score(df['class'], df['pred'])
prec = precision_score(df['class'], df['pred'], average = None, zero_division = np.nan)
rec = recall_score(df['class'], df['pred'], average = None)
f1 = f1_score(df['class'], df['pred'], average = None)
print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1:", f1)

Accuracy: 0.6247005589566144
Precision: [0.68694362 0.51322085 0.68081761]
Recall: [0.76213992 0.55728739 0.47167756]
F1: [0.72259071 0.53434714 0.55727156]


# Predict label using ML models

In [11]:
def test_model(model, parameters, X, y, n_splits):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state = 27)
    avg_accuracies = list()
    avg_precisions = list()
    avg_recalls = list()
    avg_f1s = list()
    confs = list()
    for conf in ParameterGrid(parameters):
        print('Testing', conf)
        accuracies = list()
        precisions = list()
        recalls = list()
        f1s = list()
        i = 1
        for train_index, test_index in kf.split(X):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            try:
                model.set_params(**conf)
                model.fit(X_train, y_train)
            except:
                print('Skipped', conf)
                break
            print('\tFold', i, 'of', n_splits)
            y_pred = model.predict(X_test)
            accuracies.append(accuracy_score(y_test, y_pred))
            precisions.append(precision_score(y_test, y_pred, average=None, zero_division = np.nan))
            recalls.append(recall_score(y_test, y_pred, average=None, zero_division = np.nan))
            f1s.append(f1_score(y_test, y_pred, average=None, zero_division = np.nan))
            if i == 1:
                confs.append(conf)
            i = i + 1
            
    
        if len(accuracies) != 0:  
            avg_accuracies.append(sum(accuracies)/len(accuracies))
            avg_precisions.append((sum(precisions)/len(precisions)) if len(precisions) > 0 else np.nan)
            avg_recalls.append(sum(recalls)/len(recalls) if len(recalls) > 0 else np.nan)
            avg_f1s.append(sum(f1s)/len(f1s) if len(f1s) > 0 else np.nan)
        
    results = {'Parameters' : confs,
              'Accuracy' : avg_accuracies,
              'Precision' : avg_precisions,
              'Recall' : avg_recalls,
              'F1' : avg_f1s}
    
    return pd.DataFrame.from_dict(results), model.classes_
    
        
        

In [12]:
X = df[['pos', 'neg', 'neu']]
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
y.value_counts()

-1    4860
 0    3657
 1    2754
Name: class, dtype: int64

In [13]:
params_svm = {'C' : (0.1, 1, 10, 100),
             'kernel' : ('rbf', 'poly', 'linear'),
             'degree' : (3, 5, 7),
             'gamma' : ('scale', 'auto')}
svm = SVC()
svm_results, cl_svm = test_model(svm, params_svm, X_train, y_train, 4)

Testing {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'scale', 'kernel': 'linear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'linear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 5, 'gamma': 'scale', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 5, 'gamma': 'scale', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'degree': 5, 'gamma': '

	Fold 4 of 4
Testing {'C': 100, 'degree': 7, 'gamma': 'auto', 'kernel': 'rbf'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'degree': 7, 'gamma': 'auto', 'kernel': 'poly'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'degree': 7, 'gamma': 'auto', 'kernel': 'linear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4


In [14]:
svm_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",0.631877,"[0.6686929905644312, 0.5554441125154859, 0.638...","[0.7928513982678294, 0.4567506423202562, 0.580...","[0.7254189492718474, 0.5012033590387954, 0.608..."
1,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",0.634428,"[0.6755285119136286, 0.5652423971441656, 0.624...","[0.784509377788667, 0.4558548617835057, 0.6074...","[0.7256841263300873, 0.5043653099620091, 0.615..."
2,"{'C': 0.1, 'degree': 3, 'gamma': 'scale', 'ker...",0.626664,"[0.6863953052872127, 0.5138464613995553, 0.678...","[0.7685634039468109, 0.5371871822407619, 0.495...","[0.7251089487835226, 0.5252119669085269, 0.572..."
3,"{'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kern...",0.629104,"[0.6665153903977636, 0.5398755005855435, 0.657...","[0.7969531783470971, 0.4696845904984206, 0.545...","[0.7258648430199313, 0.502325898639411, 0.5959..."
4,"{'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kern...",0.614241,"[0.6910711922240975, 0.4853684614250898, 0.725...","[0.7549987231588444, 0.5930342772374732, 0.395...","[0.721528914000479, 0.5337116465062113, 0.5114..."
5,"{'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kern...",0.626664,"[0.6863953052872127, 0.5138464613995553, 0.678...","[0.7685634039468109, 0.5371871822407619, 0.495...","[0.7251089487835226, 0.5252119669085269, 0.572..."
6,"{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'ker...",0.631877,"[0.6686929905644312, 0.5554441125154859, 0.638...","[0.7928513982678294, 0.4567506423202562, 0.580...","[0.7254189492718474, 0.5012033590387954, 0.608..."
7,"{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'ker...",0.63232,"[0.6625186977057487, 0.5664688116787795, 0.635...","[0.8044906897662671, 0.4403706758193363, 0.583...","[0.7265273646315414, 0.4953110975972635, 0.608..."
8,"{'C': 0.1, 'degree': 5, 'gamma': 'scale', 'ker...",0.626664,"[0.6863953052872127, 0.5138464613995553, 0.678...","[0.7685634039468109, 0.5371871822407619, 0.495...","[0.7251089487835226, 0.5252119669085269, 0.572..."
9,"{'C': 0.1, 'degree': 5, 'gamma': 'auto', 'kern...",0.629104,"[0.6665153903977636, 0.5398755005855435, 0.657...","[0.7969531783470971, 0.4696845904984206, 0.545...","[0.7258648430199313, 0.502325898639411, 0.5959..."


In [15]:
cl_svm

array([-1,  0,  1])

In [16]:
params_rf = {'n_estimators' : (50, 100, 150),
            'criterion' : ('entropy', 'gini'),
            'max_features' : (None, 'sqrt')}
rf = RandomForestClassifier()
rf_results, cl_rf = test_model(rf, params_rf, X_train, y_train, 4)

Testing {'criterion': 'entropy', 'max_features': None, 'n_estimators': 50}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': None, 'n_estimators': 100}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': None, 'n_estimators': 150}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 50}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 100}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 150}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'gini', 'max_features': None, 'n_estimators': 50}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'criterion': 'gini', 'max_features': None, 'n_estimators': 100}
	Fold 1 of 4
	Fold 2 of 4
	Fold

In [17]:
rf_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'criterion': 'entropy', 'max_features': None,...",0.564774,"[0.6324966919692259, 0.47248353065442894, 0.54...","[0.6976670504534285, 0.4351762411918952, 0.503...","[0.6631561779384035, 0.4529236446220571, 0.523..."
1,"{'criterion': 'entropy', 'max_features': None,...",0.568101,"[0.6351283062276725, 0.4771228613920509, 0.548...","[0.6947298225837697, 0.4358546821081476, 0.521...","[0.663374277883136, 0.4554751359226386, 0.5339..."
2,"{'criterion': 'entropy', 'max_features': None,...",0.568323,"[0.6358951130894158, 0.4798927944357187, 0.541...","[0.7014332577566729, 0.43348595425152536, 0.51...","[0.6668267095203396, 0.4554304410786146, 0.526..."
3,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.570985,"[0.6342234164043281, 0.48318268250169427, 0.55...","[0.7063973669468316, 0.4369062329891243, 0.510...","[0.668249678304811, 0.45878288537136014, 0.529..."
4,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.571872,"[0.6372735740130554, 0.482893036084588, 0.5515...","[0.7031613448528341, 0.4417067636500517, 0.513...","[0.6683870659965626, 0.4613443786917838, 0.531..."
5,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.569321,"[0.6352048085737477, 0.4812966927439434, 0.547...","[0.699219978365538, 0.44065701969698073, 0.511...","[0.6655455141578948, 0.46001672440069824, 0.52..."
6,"{'criterion': 'gini', 'max_features': None, 'n...",0.565883,"[0.6301613045364894, 0.47908281866939484, 0.54...","[0.7008219240399354, 0.4289895925259583, 0.509...","[0.663472674544726, 0.4526032237736892, 0.5250..."
7,"{'criterion': 'gini', 'max_features': None, 'n...",0.565994,"[0.6338134571336904, 0.47279356658437893, 0.54...","[0.7008102696319793, 0.43062891740044473, 0.50...","[0.6654993298234069, 0.45070250013181967, 0.52..."
8,"{'criterion': 'gini', 'max_features': None, 'n...",0.566881,"[0.6329185520361991, 0.4779620953923276, 0.543...","[0.6998760898213647, 0.4320477903434402, 0.511...","[0.6645022113285434, 0.45380636760775217, 0.52..."
9,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.570763,"[0.6382300418686259, 0.48181803021463576, 0.54...","[0.7023039869161782, 0.44131875848219704, 0.51...","[0.6686053801266961, 0.46065366895776794, 0.52..."


In [18]:
cl_rf

array([-1,  0,  1])

In [19]:
params_knn = {'n_neighbors' : (1, 3, 5, 7, 9),
             'metric' : ('minkowski', 'euclidean', 'manhattan', 'cosine')}
knn = KNeighborsClassifier()
knn_results, cl_knn = test_model(knn, params_knn, X_train, y_train, 4)

Testing {'metric': 'minkowski', 'n_neighbors': 1}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 3}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 5}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 7}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'minkowski', 'n_neighbors': 9}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 1}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 3}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 5}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 7}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'metric': 'euclidean', 'n_neighbors': 9}
	Fold 1 of 4
	Fold 2 of 4
	Fold 

In [20]:
knn_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'metric': 'minkowski', 'n_neighbors': 1}",0.530945,"[0.6186520373042088, 0.44239426009181637, 0.48...","[0.6282450237158113, 0.4265040128634734, 0.498...","[0.6231231251209672, 0.43420668397385975, 0.49..."
1,"{'metric': 'minkowski', 'n_neighbors': 3}",0.563332,"[0.6002096175672755, 0.4817356545270763, 0.575...","[0.7294948843739112, 0.39577564955399536, 0.49...","[0.6584329801287998, 0.4343213893152641, 0.530..."
2,"{'metric': 'minkowski', 'n_neighbors': 5}",0.586069,"[0.6239039689723223, 0.4944539721092107, 0.615...","[0.7589815827769899, 0.4267174545290324, 0.493...","[0.6845915821544277, 0.45790311482753737, 0.54..."
3,"{'metric': 'minkowski', 'n_neighbors': 7}",0.597493,"[0.6413084276809116, 0.5091527645386387, 0.607...","[0.7623427598128775, 0.4316031375903987, 0.527...","[0.6963325223337133, 0.46686662725216194, 0.56..."
4,"{'metric': 'minkowski', 'n_neighbors': 9}",0.603039,"[0.6409614494733952, 0.517881541069953, 0.6154...","[0.7746502523371508, 0.4205637191191942, 0.542...","[0.7014156976271575, 0.463965474449575, 0.5761..."
5,"{'metric': 'euclidean', 'n_neighbors': 1}",0.530945,"[0.6186520373042088, 0.44239426009181637, 0.48...","[0.6282450237158113, 0.4265040128634734, 0.498...","[0.6231231251209672, 0.43420668397385975, 0.49..."
6,"{'metric': 'euclidean', 'n_neighbors': 3}",0.563332,"[0.6002096175672755, 0.4817356545270763, 0.575...","[0.7294948843739112, 0.39577564955399536, 0.49...","[0.6584329801287998, 0.4343213893152641, 0.530..."
7,"{'metric': 'euclidean', 'n_neighbors': 5}",0.586069,"[0.6239039689723223, 0.4944539721092107, 0.615...","[0.7589815827769899, 0.4267174545290324, 0.493...","[0.6845915821544277, 0.45790311482753737, 0.54..."
8,"{'metric': 'euclidean', 'n_neighbors': 7}",0.597493,"[0.6413084276809116, 0.5091527645386387, 0.607...","[0.7623427598128775, 0.4316031375903987, 0.527...","[0.6963325223337133, 0.46686662725216194, 0.56..."
9,"{'metric': 'euclidean', 'n_neighbors': 9}",0.603039,"[0.6409614494733952, 0.517881541069953, 0.6154...","[0.7746502523371508, 0.4205637191191942, 0.542...","[0.7014156976271575, 0.463965474449575, 0.5761..."


In [21]:
cl_knn

array([-1,  0,  1])

In [22]:
params_lr = {'penalty' : ('l1', 'l2', 'elasticnet', None),
            'C' : (0.1, 1, 10, 100),
            'solver' : ('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'),
            'max_iter' : [500]}

lr = LogisticRegression()
lr_results, cl_lr = test_model(lr, params_lr, X_train, y_train, 4)

Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_i



	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'liblinear'}
Skipped {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'liblinear'}
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cg'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'sag'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 0.1, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 



	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 1, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_



Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'liblinear'}
Skipped {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'liblinear'}
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cg'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'sag'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 10, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'lbfgs'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cg'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'newton-cholesky'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'sag'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l1', 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'liblinear'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'newton-cg'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'sag'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'l2', 'solver': 'saga'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': 'elasticnet', 'solver': 'lbfgs'}
Skipped {'C': 100, 'max_iter': 500, 'penalty': 'elasticnet', 'solver': 'lbfgs'}
Testing {'C': 100, 'max_iter': 500, 'penalty': 'elasticnet', 'solver



	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': None, 'solver': 'newton-cholesky'}
	Fold 1 of 4
	Fold 2 of 4
	Fold 3 of 4
	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': None, 'solver': 'sag'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4




	Fold 4 of 4
Testing {'C': 100, 'max_iter': 500, 'penalty': None, 'solver': 'saga'}




	Fold 1 of 4




	Fold 2 of 4




	Fold 3 of 4
	Fold 4 of 4




In [23]:
lr_results

Unnamed: 0,Parameters,Accuracy,Precision,Recall,F1
0,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l1', '...",0.627551,"[0.6750696153833136, 0.5275574181824365, 0.663...","[0.7801936176243507, 0.5008823497411132, 0.526...","[0.7237759577129088, 0.5138597572519745, 0.587..."
1,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l1', '...",0.627107,"[0.6783524858802982, 0.5223507754806074, 0.668...","[0.777345938310797, 0.5104774554090362, 0.5174...","[0.724423177843031, 0.5163292397486589, 0.5830..."
2,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.627107,"[0.6797475936281707, 0.5225407480339946, 0.664...","[0.7763170833327142, 0.5115049849149413, 0.517...","[0.7247644526628306, 0.5169473304797638, 0.582..."
3,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.627551,"[0.6755981405901934, 0.5272470477449311, 0.664...","[0.7799287871158762, 0.5029285559613843, 0.524...","[0.7239695032760053, 0.5147900214800234, 0.585..."
4,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.627107,"[0.6797475936281707, 0.5225407480339946, 0.664...","[0.7763170833327142, 0.5115049849149413, 0.517...","[0.7247644526628306, 0.5169473304797638, 0.582..."
5,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.627551,"[0.6755981405901934, 0.5272470477449311, 0.664...","[0.7799287871158762, 0.5029285559613843, 0.524...","[0.7239695032760053, 0.5147900214800234, 0.585..."
6,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.627107,"[0.6797475936281707, 0.5225407480339946, 0.664...","[0.7763170833327142, 0.5115049849149413, 0.517...","[0.7247644526628306, 0.5169473304797638, 0.582..."
7,"{'C': 0.1, 'max_iter': 500, 'penalty': 'l2', '...",0.627107,"[0.6797475936281707, 0.5225407480339946, 0.664...","[0.7763170833327142, 0.5115049849149413, 0.517...","[0.7247644526628306, 0.5169473304797638, 0.582..."
8,"{'C': 0.1, 'max_iter': 500, 'penalty': None, '...",0.626996,"[0.6797562141376886, 0.5225627849653551, 0.664...","[0.7752908315044427, 0.511152180152207, 0.5196...","[0.7243269717551024, 0.516778647897884, 0.5829..."
9,"{'C': 0.1, 'max_iter': 500, 'penalty': None, '...",0.627995,"[0.6803731133652076, 0.524289026344348, 0.6653...","[0.7752908315044427, 0.5135728061326374, 0.520...","[0.7246760873885503, 0.5188557888934499, 0.583..."


In [24]:
cl_lr

array([-1,  0,  1])