In [1]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
import matplotlib.image as mpimg

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_curve, average_precision_score, roc_curve, roc_auc_score, f1_score
from sklearn.model_selection import GridSearchCV


In [2]:
train_data = "data/train.csv"
test_data = "data/test.csv"

In [3]:
train = pd.read_csv(train_data)
test = pd.read_csv(test_data)

Clearing data and spliting data

In [4]:
scaler = StandardScaler()

scaler.fit(train.values)

df_standardized = pd.DataFrame(scaler.fit_transform(train.drop(['label'], axis=1)))
y = train['label']

In [5]:
x_train, x_test, y_train, y_test = tts(df_standardized, y, test_size = 0.3)

Building and checking the model

In [6]:
clf = LogisticRegression(max_iter = 4000)

clf.fit(x_train, y_train)  # Training model
predictions = clf.predict(x_test)

pred_test_clf = clf.predict_proba(x_test)[:,1]

In [7]:
prediction = clf.predict(x_test)
prediction

array([5, 4, 5, ..., 0, 5, 3], dtype=int64)

In [8]:
accuracy = clf.score(x_test, y_test)
accuracy

0.9007142857142857

Adding GridCV

In [9]:
params = {
    "penalty" : ["l1", "l2"],  # Подбор значений, которые неконфликтны с solver
    "C" : [100.0, 1.0],  # Разброс
    "solver" : ["liblinear", "saga"],  # Они работают с l1 и l2 в penalty
    "max_iter" : [1000, 5000],  # Разброс
}

In [None]:
Grid = GridSearchCV(clf, params, 'roc_auc', cv=10)
Grid.fit(x_train, y_train)  # Фитим

In [None]:
pred_test_grid = Grid.predict_proba(x_test)[:,1]

In [None]:
prediction = Grid.predict(x_test)
prediction

In [None]:
accuracy = Grid.score(x_test, y_test)
accuracy

Results for submission

In [None]:
results = Grid.predict(test)

results = pd.Series(results, name="Label")

In [None]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("logisticregression_withgrid_digits_data.csv",index=False)