# Logistic Regression Modeling

In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# 1.1 Load Data

In [3]:
train = pd.read_csv("train_engineered.csv")
val = pd.read_csv("val_engineered.csv")

# 1.2 PreProcessing

In [5]:
X_train, y_train = train.drop("blueWins", axis=1), train.blueWins
X_val, y_val = val.drop("blueWins", axis=1), val.blueWins

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

pca = PCA(n_components=5, random_state=42)
X_train = pca.fit_transform(X_train)
X_val = pca.transform(X_val)

# 2.1 Hyper Parameter Tuning

In [6]:
clf = LogisticRegression()
params = {"C":[0.1, 1, 10]}

gs = GridSearchCV(clf, params)
gs.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='warn',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='warn',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='warn', n_jobs=None, param_grid={'C': [0.1, 1, 10]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [8]:
model = gs.best_estimator_

# 3.1 Evaluation

In [9]:
model.fit(X_train, y_train)
train_pred = model.predict(X_train)
val_pred = model.predict(X_val)

print("training score:", accuracy_score(y_train, train_pred))
print("validation score:", accuracy_score(y_val, val_pred))

training score: 0.7282217422606192
validation score: 0.7231516459794927


