In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

In [3]:
digits = pd.read_csv("digits.csv", header=None)
digits = digits.sample(frac=1, random_state=200).reset_index(drop=True)

In [4]:
digits.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
325,0,0,0,9,15,16,9,0,0,0,...,0,0,0,0,12,4,0,0,0,9
366,0,0,0,10,14,0,0,0,0,0,...,0,0,0,0,8,16,2,0,0,4
1130,0,1,13,16,16,12,1,0,0,12,...,0,0,1,14,16,16,11,1,0,3
791,0,1,8,16,16,16,10,0,0,8,...,0,0,1,13,12,0,0,0,0,5
461,0,0,10,13,2,0,0,0,0,0,...,0,0,0,8,14,8,11,14,1,2


In [5]:
# dimensions of dataset
digits.shape

(1797, 65)

In [6]:
print("Sample size:", digits.shape[0])
print("Number of features in dataset:", digits.shape[1])

Sample size: 1797
Number of features in dataset: 65


In [7]:
# global variables
sample_size = digits.shape[0]
num_of_features = digits.shape[1] - 1
k = 10
train_test_split_ratio = 0.2

In [8]:
X = digits.iloc[:,:-1] # features
y = digits.iloc[:,-1]  # labels

In [9]:
X.shape

(1797, 64)

---

In [11]:
# tr_x = X.iloc[0:500,:]
# tr_y = y.iloc[0:500]
# te_x = X.iloc[9:10,:]
# te_y = y.iloc[9:10]

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [13]:
# Train-test with scikit LogReg
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=train_test_split_ratio, random_state=0)

logreg = LogisticRegression()
logreg.fit(X_train, y_train)
print("Train:", logreg.score(X_train, y_train))
print("Test:", logreg.score(X_test, y_test))

Train: 1.0
Test: 0.9666666666666667


In [14]:
# Kfold with scikit LogReg
from sklearn.model_selection import KFold

kf = KFold(n_splits=k)
kf.get_n_splits(X)

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train)
    print("Train:", logreg.score(X_train, y_train))
    print("Test:", logreg.score(X_test, y_test))
    print("===============================")

Train: 1.0
Test: 0.9055555555555556
Train: 1.0
Test: 0.9777777777777777
Train: 1.0
Test: 0.8777777777777778
Train: 1.0
Test: 0.9555555555555556
Train: 1.0
Test: 0.9444444444444444
Train: 1.0
Test: 0.9666666666666667
Train: 1.0
Test: 0.9555555555555556
Train: 1.0
Test: 0.9329608938547486
Train: 1.0
Test: 0.8770949720670391
Train: 1.0
Test: 0.9217877094972067


---

In [None]:
# def fit_logistic_regressor(X, y):
#     return w

In [None]:
# def get_score(w, X, y):
#     return 0

---

In [None]:
def sigmoid(X, weight):
    z = np.dot(X, weight)
    return 1 / (1 + np.exp(-z))

In [None]:
def log_likelihood(x, y, weights):
    z = np.dot(x, weights)
    ll = np.sum( y*z - np.log(1 + np.exp(z)) )
    return ll

In [None]:
def gradient_ascent(X, h, y):
    return np.dot(X.T, y - h)

def update_weight_mle(weight, learning_rate, gradient):
    return weight - learning_rate * gradient

In [None]:
num_iter = 1000
learning_rate = 0.1

X2 = X

intercept2 = np.ones((X2.shape[0], 1))
X2 = np.concatenate((intercept2, X2), axis=1)
theta2 = np.zeros(X2.shape[1])

for i in range(num_iter):
    h2 = sigmoid(X2, theta2)
    gradient2 = gradient_ascent(X2, h2, y) #np.dot(X.T, (h - y)) / y.size
    theta2 = update_weight_mle(theta2, learning_rate, gradient2)
    
print("Learning rate: {}\nIteration: {}".format(learning_rate, num_iter))

In [None]:
result2 = sigmoid(X2, theta2)

In [None]:
print("Accuracy (Maximum Likelihood Estimation):")
f2 = pd.DataFrame(result2).join(y)
f2.loc[f2[0]==y].shape[0] / f2.shape[0] * 100