### Part5: One vs All Logistic Regression for Multi class classification

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv("Data/wine-quality/data.csv",delimiter=";")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,9.2,0.25,0.34,1.2,0.026,31.0,93.0,0.9916,2.93,0.37,11.3,7
1,6.6,0.2,0.27,10.9,0.038,29.0,130.0,0.99496,3.11,0.44,10.5,7
2,5.7,0.22,0.22,16.65,0.044,39.0,110.0,0.99855,3.24,0.48,9.0,6
3,7.2,0.23,0.39,14.2,0.058,49.0,192.0,0.9979,2.98,0.48,9.0,7
4,7.6,0.35,0.47,13.3,0.037,42.0,116.0,0.99822,3.04,0.5,9.2,5


In [3]:
split = int(0.8 * df.shape[0])
training_data = df[:split] # 80% of the total data
testing_data = df[split:]  # 20% of the total data

In [4]:
columns=training_data.shape[1]
X_train=training_data.iloc[:,0:columns-1]# features Sets

mu=X_train.mean()
sigma=X_train.std()

# features normalisation
X_train=(X_train-X_train.mean())/X_train.std()

Y_train=training_data.iloc[:,columns-1:columns] # outputSet
X_train.insert(0, 'Ones', 1)
print(X_train.shape)
print(Y_train.shape)
X_train.head()

(3526, 12)
(3526, 1)


Unnamed: 0,Ones,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,1,2.777966,-0.277666,0.045565,-1.04198,-0.887299,-0.257466,-1.090609,-0.840062,-1.706637,-1.037796,0.658073
1,1,-0.315107,-0.770221,-0.528229,0.899437,-0.354345,-0.375658,-0.206833,0.315732,-0.508057,-0.426652,0.002584
2,1,-1.385786,-0.573199,-0.938081,2.050276,-0.087868,0.215298,-0.68455,1.550642,0.357583,-0.077427,-1.226457
3,1,0.398679,-0.474688,0.455418,1.559919,0.533911,0.806254,1.274088,1.327051,-1.373698,-0.077427,-1.226457
4,1,0.874537,0.707444,1.111182,1.379787,-0.398758,0.392585,-0.541235,1.437126,-0.974172,0.097186,-1.062585


In [5]:
X_train = np.matrix(X_train.values)
Y_train = np.matrix(Y_train.values)

#### Sigmoid Function 
${g(z)}$=${(1+e^{-z})^{-1} }$

In [6]:
def sigmoid(z):
    g=0.0
    g=1+np.exp(-z)
    return np.power(g,-1)

In [7]:
def CostFunction(X,y,theta):
    prediction1= np.log(sigmoid(X*theta.T))
    prediction2= np.log(1-sigmoid(X*theta.T))
    prediction1=np.multiply(y,prediction1)
    prediction2=np.multiply((1-y),prediction2)
    return -(sum(prediction1+prediction2))/len(X)

In [8]:
def gradientDescent(X, y, theta, alpha, iters):
    
    Jhistory=np.zeros(iters)
    
    m=len(X)
    for i in range(iters):
        pre = (X*theta.T)-y
        #print(pre.shape)
        delta=np.dot(np.transpose(pre),X)
        theta=theta-(alpha/m)*delta
        
        
        Jhistory[i] = CostFunction(X, y, theta)
        
    return theta,Jhistory

In [9]:
alpha=.009
iters=1000
theta = np.matrix(np.array([0,0,0,0,0,0,0,0,0,0,0,0]))
finalTheta=np.zeros((11,12))
for q in range(0,11):
    Y=(Y_train==q)
    Y=1*Y
    minTheta, cost2= gradientDescent(X_train, Y, theta, alpha, iters)
    finalTheta[q]=minTheta
#print(finalTheta)

In [12]:
print(finalTheta.shape)
columns=testing_data.shape[1]
X_test=testing_data.iloc[:,0:columns-1]# features Sets
Y_test=testing_data.iloc[:,columns-1:columns] # outputSet

#feature normalisation
X_test=(X_test-mu)/sigma
X_test.insert(0, 'Ones', 1)

print(X_test.shape)
X_test.head()

(11, 12)
(882, 12)


Unnamed: 0,Ones,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
3526,1,-0.553036,1.495531,-0.118376,-1.001951,0.089783,-1.498474,-1.711641,-0.702467,-0.707821,-0.688571,0.740009
3527,1,-0.672,-1.065754,-0.118376,-1.082009,-0.26552,0.215298,-0.493463,-1.782583,0.823698,-0.775877,2.132922
3528,1,-0.790965,0.707444,-0.69217,2.400532,0.223021,-0.434753,1.03523,1.863669,-0.374882,1.144861,-0.980649
3529,1,-1.385786,-0.277666,-0.610199,1.21967,0.134196,1.013089,-0.445692,0.986504,-0.707821,-0.339345,-0.898713
3530,1,-0.315107,-0.277666,-0.200347,1.199656,0.578324,0.983541,1.011344,1.499044,2.155453,-0.164733,-0.570969


In [13]:
def prediction(test_data,threshold=0.5):
   
    z= np.dot(test_data,finalTheta.T)
    z=sigmoid(z)
    return z

In [14]:
holder=np.zeros((882,11))
holder=prediction(X_test)
output=np.argmax(holder,axis=1)
print(output.shape)

(882,)


In [15]:
output=np.matrix(output)
print(Y_test.shape)
print(output.shape)

(882, 1)
(1, 882)


In [16]:
accuracy =np.count_nonzero(Y_test==output.T)/len(Y_test)*100
accuracy

53.06122448979592

In [19]:
# from sklearn.multiclass import OneVsRestClassifier
# from sklearn.svm import LinearSVC
# z=OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_train, Y_train).predict(X_test) 