In [1]:
import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score, recall_score,classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [6]:
# Para leer directamtente los datos
tf_idf = pd.read_csv("../data/sentiment_analysis/tf_idf/labelled_tf_idf.csv")
tf_idf.drop('Unnamed: 0',inplace=True,axis = 1)
tf_idf.head(2)

Unnamed: 0,conversation_id,abajo,abatido,abel,abelio,abierto,aborrecido,abrazo,abrir,absoluto,...,yeah,yi,yo,yogurt,youtubir,zapatilla,zapato,zona,zono,primary_label
0,0H9OEQs318oQ55Rn0SvK103578,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.089276,0.0,0.0,0.0,0.0,0.0,0.0,negative
1,0H9OEQs318oQ55Rn0SvK913137,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,neutral


In [7]:
# Create sets
X = tf_idf.drop(columns=["conversation_id","primary_label"], axis = 1)
y = tf_idf["primary_label"]

In [9]:
from sklearn.linear_model import LogisticRegression

elastic_net_results = pd.DataFrame()
parameters_column = []
score_column = []
accuracy_column = []
precision_column = []
recall_column = []
intercept_column = []
coef_column = []

for i in range(40):
    # Divide data in training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)
    #print(X_train.shape,X_test.shape)
    #print(y_train.shape, y_test.shape)

    scaler = StandardScaler()
    X_train_std = scaler.fit_transform(X_train)
    X_test_std = scaler.transform(X_test)

    # Validate parameters
    model = LogisticRegression(penalty = "elasticnet",solver='saga',l1_ratio=0.8,class_weight = "balanced", max_iter = 400)
    parameters_grid = {'C':[0.001, 0.01, 0.1, 1, 10, 100]}

    clf = GridSearchCV(model,parameters_grid,cv = 10, n_jobs = -1)
    clf.fit(X_train_std, y_train)

    # Best parameters
    best_parameters = clf.best_params_ 
    best_score = clf.best_score_ 
    best_estimator = clf.best_estimator_

    intercept = best_estimator.intercept_
    coeficiente = best_estimator.coef_.flatten()

    intercept_column.append(intercept)
    coef_column.append(coeficiente)


    print(best_parameters)
    parameters_column.append(best_parameters)
    score_column.append(best_score)

    # Create model with best parameters
    best_estimator.fit(X_train_std,y_train)


    # Prediction
    y_pred = best_estimator.predict(X_test_std)
    accuracy = accuracy_score(y_test,y_pred)
    precision = precision_score(y_test,y_pred,average='micro')
    recall = recall_score(y_test, y_pred,average='micro')
    print(accuracy,precision,recall)

    accuracy_column.append(accuracy)
    precision_column.append(precision)
    recall_column.append(recall)

elastic_net_results["best_score"] = best_score
elastic_net_results["best_parameters"] = best_parameters
elastic_net_results["intercept"] = intercept_column
elastic_net_results["coefs"] = coef_column
elastic_net_results["test_accuracy"] = accuracy_column
elastic_net_results["test_micro_precision"] = precision_column
elastic_net_results["test_micro_recall"] = recall_column



{'C': 0.1}




0.717948717948718 0.717948717948718 0.717948717948718




{'C': 0.1}




0.6923076923076923 0.6923076923076923 0.6923076923076923




{'C': 0.1}




0.5897435897435898 0.5897435897435898 0.5897435897435898




{'C': 0.1}




0.717948717948718 0.717948717948718 0.717948717948718




{'C': 1}




0.717948717948718 0.717948717948718 0.717948717948718




{'C': 1}




0.6410256410256411 0.6410256410256411 0.6410256410256411




{'C': 0.1}




0.717948717948718 0.717948717948718 0.717948717948718




{'C': 0.1}




0.7948717948717948 0.7948717948717948 0.7948717948717948




{'C': 1}




0.717948717948718 0.717948717948718 0.717948717948718




{'C': 0.1}




0.717948717948718 0.717948717948718 0.717948717948718




{'C': 1}




0.7692307692307693 0.7692307692307693 0.7692307692307693




{'C': 1}




0.7948717948717948 0.7948717948717948 0.7948717948717948




{'C': 1}




0.7948717948717948 0.7948717948717948 0.7948717948717948




{'C': 1}




0.6923076923076923 0.6923076923076923 0.6923076923076923


KeyboardInterrupt: ignored