## One-vs-Rest

## Import Libraries

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression

## Load the Dataset


In [None]:
df = pd.read_csv('data/modifiedDigits4Classes.csv')

In [None]:
df.head()

In [None]:
df.shape

## Visualize Each Digit

In [None]:
pixel_colnames = df.columns[:-1]

In [None]:
pixel_colnames

In [None]:
# Get all columns except the label column for the first image
image_values = df.loc[0, pixel_colnames].values

In [None]:
plt.figure(figsize=(10,2))
for index in range(0, 4):

    plt.subplot(1, 5, 1 + index )
    image_values = df.loc[index, pixel_colnames].values
    image_label = df.loc[index, 'label']
    plt.imshow(image_values.reshape(8,8), cmap ='gray')
    plt.title('Label: ' + str(image_label))

## Splitting Data into Training and Test Sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[pixel_colnames], df['label'], random_state=0)

## Standardize the Data


In [None]:
scaler = StandardScaler()

# Fit on training set only.
scaler.fit(X_train)

# Apply transform to both the training set and the test set.
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Logistic Regression

In [None]:
# multi_class is specifying one versus rest
clf = LogisticRegression(solver='liblinear',
                         multi_class='ovr', 
                         random_state = 0)

clf.fit(X_train, y_train)
print('Training accuracy:', clf.score(X_train, y_train))
print('Test accuracy:', clf.score(X_test, y_test))

In [None]:
clf.intercept_

Similarly, you can get 4 different coefficient matrices. 

In [None]:
clf.coef_.shape

## Predictions

In [None]:
# The second class is the highest score so it will be the prediction for this data
clf.predict_proba(X_test[0:1])

In [None]:
clf.predict(X_test[0:1])