In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from scipy.special import expit as g

In [2]:
iris_data = load_iris()

In [3]:
y = pd.DataFrame(iris_data.target)
X = pd.DataFrame(iris_data.data)
X = X.T
X.index = iris_data.feature_names

In [4]:
X = X.T

In [5]:
X.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [6]:
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.4,random_state=6)
X_cv,X_test,y_cv,y_test= train_test_split(X_test,y_test,test_size=0.5,random_state=5)


In [7]:
def costFunction(features,target,weights,reg_parameter=100):
                 
    features = np.array(features)
    weights = np.array(weights)
    print(features.shape, weights.shape)
    hypothesis =np.array(g(np.matmul(features,weights.T)))

    
    m = target.shape[1]
    I=np.ones((m,1))
    cost=(-1)*(np.matmul(target.T,np.log(hypothesis)) + np.matmul((I-target).T,np.log(I-hypothesis)))/m

    
    reg_func = (reg_parameter/(2*m))* (sum(weights**2) - weights[0][0]**2)

    return cost


In [8]:
from sklearn.preprocessing import PolynomialFeatures

In [104]:
def trainClassifier(features_train,features_cv,target_train,target_cv):
    model = LogisticRegression(solver='lbfgs',C=1e20,multi_class='ovr')
    model.fit(features_train,np.array(target_train).ravel())
    print("score :",model.score(features_cv,target_cv))
    y_predicted = model.predict(features_cv)
    return [model,y_predicted]


In [105]:
ts_error=[]
degree = [1]
for i in degree:
    degree_processing = PolynomialFeatures(i)
    features_train = degree_processing.fit_transform(X_train)
    features_cv = degree_processing.fit_transform(X_cv)

[lr_classifier, y_predicted] = trainClassifier(features_train,features_cv,y_train,y_cv)

score : 1.0


In [106]:
#     ts_error.append(costFunction(np.array(features_test),np.array(y_test),weights))
#     ts_error.append(costFunction(np.array(features_train),np.array(y_train),weights))

In [107]:
cm=confusion_matrix(y_cv,y_predicted)

print(lr_classifier.coef_)

[[   1.01839035    1.66945802    6.80856793  -10.69982821   -4.82793531]
 [   3.89284304   -0.66591008   -2.65692943    2.28986101   -4.36726215]
 [-515.18868774   16.48733022 -213.18031505   96.61241046  626.61026608]]


In [108]:
def plot_classifier(classifier,X,y):
    #ranges to plot
    x0_min,x0_max = min(X[:,0])-1.0,max(X[:,0])+1.0
    x1_min,x1_max = min(X[:,1])-1.0,max(X[:,1])+1.0
    x2_min,x2_max = min(X[:,2])-1.0,max(X[:,2])+1.0
    
#     x0_values = np.arange(x0_min,x0_max)
    
    x_min,x_max = min(X[:,2])-1.0,max(X[:,2])+1.0
    y_min,y_max = min(X[:,3])-1.0,max(X[:,3])+1.0
    
    
    step_size=0.01
    x_values,y_values=np.meshgrid(np.arange(x_min,x_max,step_size),np.arange(y_min,y_max,step_size))
    print(x_values.ravel())
    mesh_output = classifier.predict(np.c_[x_values.ravel(),x_values.ravel(),x_values.ravel(),x_values.ravel(),y_values.ravel()])
    
    mesh_output = mesh_output.reshape(x_values.shape)
    
    plt.figure()
    plt.pcolormesh(x_values, y_values, mesh_output, cmap=plt.cm.gray)
    
    plt.scatter(X[:,2],X[:,3],c=y,s=80,edgecolors='black',linewidth=1,cmap=plt.cm.Paired)
    plt.show()

In [109]:
plt.matshow(cm)
plt.colorbar()

<IPython.core.display.Javascript object>

<matplotlib.colorbar.Colorbar at 0x7f42075c6780>

In [97]:

num_validations =10

In [110]:
from sklearn.model_selection import cross_val_score


In [111]:
accuracy = cross_val_score(lr_classifier,X,np.array(y).ravel(),scoring='accuracy',cv=num_validations)
print ("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%")

Accuracy: 96.67%


In [112]:
f1 = cross_val_score(lr_classifier,X, np.array(y).ravel(), scoring='f1_weighted', cv=num_validations)
print ("F1: " + str(round(100*f1.mean(), 2)) + "%")
precision = cross_val_score(lr_classifier,X, np.array(y).ravel(), scoring='precision_weighted', cv=num_validations)
print ("Precision: " + str(round(100*precision.mean(), 2)) + "%")
recall = cross_val_score(lr_classifier,X, np.array(y).ravel(), scoring='recall_weighted', cv=num_validations)
print ("Recall: " + str(round(100*recall.mean(), 2)) + "%")

F1: 96.65%
Precision: 97.0%
Recall: 96.67%


In [113]:
from sklearn.metrics import classification_report


In [116]:
plt.figure()
plt.plot(y_cv,y_predicted,'ro')


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f42071a36d8>]

In [119]:
print(classification_report(y_test,lr_classifier.predict(PolynomialFeatures(1).fit_transform(X_test)),target_names=['Class 1','Classs 2','Class 3']))

              precision    recall  f1-score   support

     Class 1       1.00      1.00      1.00        11
    Classs 2       0.83      1.00      0.91        10
     Class 3       1.00      0.78      0.88         9

   micro avg       0.93      0.93      0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.94      0.93      0.93        30

