# Importing Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.metrics import roc_curve
from keras.wrappers.scikit_learn import KerasClassifier

Using TensorFlow backend.


# Data Processing

In [2]:
headers = ['name', 'hair', 'feathers', 'eggs', 'milk', 'airborne', 'aquatic', 'predator', 'toothed', 'backbone', 'breathes', 'venomous', 'fins', 'legs', 'tail', 'domestic', 'catsize', 'type']

In [3]:
df = pd.read_csv('zoo.data', header=None)

In [4]:
df.columns = headers

In [5]:
df.head()

Unnamed: 0,name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1


In [6]:
arr = np.array(df)

In [7]:
X_with_name = arr[:, :-1]

In [8]:
X = X_with_name[:, 1:].astype('int')

In [9]:
Y = arr[:, -1].astype('int')

# Making Model

# (i) SVM Kernals 
      - Linear
      - rbf
      - poly
      - sigmoid

In [10]:
models = [
    SVC(kernel='linear'),
    SVC(kernel='rbf'),
    SVC(kernel='poly'),
    SVC(kernel='sigmoid')
]

In [11]:
cross = KFold(5)

# Training our model and then Calculating Accuracy of each SVM kernel

In [12]:
kernals={}
for model in models:
    best_score = -1
    for train, test in cross.split(X, Y):
        X_train = X[train]
        Y_train = Y[train]
        
        X_test = X[test]
        Y_test = Y[test]
        
        trained = model.fit(X_train, Y_train)
        best_score = max(best_score, trained.score(X_test, Y_test))
    kernals[model.kernel]=best_score
    print("{} - {}".format(model.kernel, best_score))

linear - 1.0
rbf - 0.9523809523809523
poly - 1.0
sigmoid - 0.8095238095238095


# (ii) Neural Network (BPNN)

In [13]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
import keras_metrics

In [14]:
model = Sequential()
model.add(Dense(12, input_dim=16, kernel_initializer='uniform', activation='relu'))
model.add(Dense(10, kernel_initializer='uniform', activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Calculating Accuracy using 

In [15]:
best_score = -1
for train, test in cross.split(X, Y):
    train, test = list(train), list(test)
    X_train = X[train]
    Y_train = Y[train]

    X_test = X[test]
    Y_test = Y[test]
    
    model.fit(X_train, to_categorical(Y_train), epochs=100, verbose=False)
    best_score = max(best_score, model.evaluate(X_test, to_categorical(Y_test, num_classes=8))[-1])



In [20]:
best_score

0.956250011920929

In [33]:
y_pred = model.predict(X_test).ravel()

In [34]:
from sklearn.metrics import precision_score

In [35]:
kernals

{'linear': 1.0,
 'rbf': 0.9523809523809523,
 'poly': 1.0,
 'sigmoid': 0.8095238095238095}

In [37]:
precision_score(Y_test,y_pred, average="macro")

ValueError: Found input variables with inconsistent numbers of samples: [20, 160]

In [38]:
y_pred.shape

(160,)

In [29]:
Y_test.shape

(20,)

In [53]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn import svm
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split

In [54]:
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,random_state=42))
y_score = classifier.fit(X_train, Y_train).decision_function(X_test)

In [69]:
precision = dict()
recall = dict()
average_precision = dict()
for i in range(0,len(headers)-1):
    precision[i], recall[i], _ = precision_recall_curve(Y_test,y_score[:, i])
    average_precision[i] = average_precision_score(Y_test, y_score[:, i])

ValueError: multiclass format is not supported

In [64]:
precision[0], recall[0], _ = precision_recall_curve(Y_test[:, 0],y_score[:, 0])

IndexError: too many indices for array

In [66]:
y_score.shape

(20, 7)

In [68]:
Y_test.shape

(20,)