# Logistic Regression Classifier

In [1]:
import numpy as np
import pandas as pd

np.random.seed(409782)
label = "rating_label"
random_state_const = 10987

In [2]:
train = pd.read_csv(f"train_final.csv")
test = pd.read_csv(f"test_final.csv")

train_y = train.pop(label)
train_X = train
test_y = test.pop(label)
test_X = test

In [3]:
from sklearn import metrics

def report(a, b):
    reports = [
                metrics.accuracy_score(a, b), 
                metrics.precision_score(a, b, average="macro"),
                metrics.recall_score(a, b, average="macro"),
                metrics.f1_score(a, b, average="macro")
               ]
    return reports

### Model Selection

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

logistic_classifier = LogisticRegression(random_state=random_state_const, max_iter=10000)
possible_hyperparams = {'penalty': ['l2', 'none']}

grid_search = GridSearchCV(logistic_classifier, possible_hyperparams, scoring=['accuracy','f1_macro'], refit=False)
grid_search.fit(train_X, train_y)

results = pd.DataFrame(grid_search.cv_results_)
results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,params,split0_test_accuracy,split1_test_accuracy,split2_test_accuracy,split3_test_accuracy,...,std_test_accuracy,rank_test_accuracy,split0_test_f1_macro,split1_test_f1_macro,split2_test_f1_macro,split3_test_f1_macro,split4_test_f1_macro,mean_test_f1_macro,std_test_f1_macro,rank_test_f1_macro
0,38.27289,8.861873,0.004383,0.001646,l2,{'penalty': 'l2'},0.705691,0.704336,0.701626,0.700542,...,0.001898,1,0.327137,0.346354,0.31765,0.301085,0.336383,0.325722,0.015579,2
1,39.419557,8.650992,0.003099,0.001433,none,{'penalty': 'none'},0.70542,0.699187,0.702981,0.700813,...,0.002103,2,0.327674,0.34598,0.326158,0.304676,0.342058,0.329309,0.014559,1


### Model Validation

In [8]:
logis = LogisticRegression(random_state=10987, max_iter=10000, penalty='none')
logis.fit(train_X, train_y)
res = logis.predict(test_X)

In [9]:
report(pred_y, test_y)

[0.7049642315196185,
 0.3529648401755386,
 0.5819706074769507,
 0.3214513234996019]

### Model Prediction

In [14]:
predict_X = pd.read_csv("predict_final.csv")
predictions = pd.Series(logis.predict(predict_X))
csv_file = pd.DataFrame(predictions, columns=[label])
csv_file.insert(0, "id", predictions.index + 1)
csv_file.to_csv("logisitic_output.csv", index=False)