# Logistic Regression Classifier

In [1]:
import numpy as np
import pandas as pd

np.random.seed(409782)
label = "rating_label"
random_state_const = 10987

In [2]:
train = pd.read_csv(f"train_final.csv")
test = pd.read_csv(f"test_final.csv")

train_y = train.pop(label)
train_X = train
test_y = test.pop(label)
test_X = test

In [3]:
from sklearn import metrics

def report(a, b):
    reports = [
                metrics.accuracy_score(a, b), 
                metrics.precision_score(a, b, average="macro"),
                metrics.recall_score(a, b, average="macro"),
                metrics.f1_score(a, b, average="macro")
               ]
    return reports

### Model Selection

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression

logistic_classifier = LogisticRegression(random_state=random_state_const, max_iter=10000)
possible_hyperparams = {'penalty': ['l2', 'none'], 'C':[i for i in np.linspace(0.1, 6, 7)]}

grid_search = RandomizedSearchCV(logistic_classifier, possible_hyperparams, n_iter = 10, scoring=['accuracy','f1_macro'], refit=False, random_state=623)
grid_search.fit(train_X, train_y)

results = pd.DataFrame(grid_search.cv_results_)
results



In [None]:
from sklearn.model_selection import GridSearchCV

logistic_classifier = LogisticRegression(random_state=random_state_const, max_iter=10000, penalty='l2', C=5.017)
possible_hyperparams = {'class_weight':[None, 'balanced']}
grid_search = GridSearchCV(logistic_classifier, possible_hyperparams, scoring=['accuracy','f1_macro'], refit=False)
grid_search.fit(train_X, train_y)

results = pd.DataFrame(grid_search.cv_results_)
results


### Model Validation

In [None]:
logis = LogisticRegression(random_state=10987, max_iter=10000, penalty='l2', C=5.017)
logis.fit(train_X, train_y)
res = logis.predict(test_X)

In [None]:
report(test_y, res)

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
cm = confusion_matrix(test_y, res)
cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
# Creating a dataframe for a array-formatted Confusion matrix,so it will be easy for plotting.
cm_df = pd.DataFrame(cmn,
                     index = ['3','4','5'], 
                     columns = ['3','4','5'])
#Plotting the confusion matrix
plt.figure(figsize=(5,4))
sns.heatmap(cm_df, annot=True)
plt.title('Confusion Matrix')
plt.ylabel('Actal Values')
plt.xlabel('Predicted Values')
plt.show()

### Model Prediction

In [None]:
predict_X = pd.read_csv("predict_final.csv")
predictions = pd.Series(logis.predict(predict_X))
csv_file = pd.DataFrame(predictions, columns=[label])
csv_file.insert(0, "id", predictions.index + 1)
csv_file.to_csv("logistic_output.csv", index=False)