

## _design a discriminator by Naive Bayes model_

- _assume that we know distribution of data is Gaussian_

###  _preprocessing and Reading the Data_

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split 
import numpy as np

iris = datasets.load_iris()

X = iris.data
y = iris.target

# store the feature matrix (X) and response vector (y) 
X = iris.data
y = iris.target
print("our dataset has " + str(X.shape[1]) + " features. for more information about data surf the web")

# splitting X and y into training and testing sets
#you can change the test size, fit model with more or less data and see results
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) 


our dataset has 4 features. for more information about data surf the web


##### _print the number of train and test data and number of classes_

In [3]:
print("the number of train data is : " + str(X_train.shape[0]))
print("the number of test data is : " + str(X_test.shape[0]))
print("there are " + str(len(set(y_train))) + " different classes in the dataset")

the number of train data is : 90
the number of test data is : 60
there are 3 different classes in the dataset


### **Training a model** (_Assume that the data follows a Gaussian distribution_)

In [4]:
# Naive Bayes model implementation


class Bayes():
    
    def __init__(self):
        self.priors = None
        self.distribution_params = None
    
    def gaussian(self,x,u,var):
        return (1/np.sqrt(2*np.pi*var)) * np.exp(-((x-u)**2)/(2*var))
    
    def _compute_priors(self,yt):

        classes = np.unique(yt)
        targets = np.asarray(yt)
        self.num_classes = (len(classes))
        # compute prior probabilities
        priors = np.zeros((len(classes),))
        for i in range(len(classes)):
            priors[i] = yt[yt==classes[i]].shape[0]/targets.shape[0]

        return priors

    def _estimate_pdf_dist(self, xt,yt):
        classes = np.unique(yt)
        targets = np.asarray(yt)
        distribution_params = np.zeros((len(classes),xt.shape[1],2))
        # compute class conditional pdfs
        for i in range(len(classes)):
            x = xt[yt==classes[i]]
            for j in range(x.shape[1]):
                distribution_params[i,j,0] = np.mean(x[:,j])
                distribution_params[i,j,1] = np.var(x[:,j])
        return distribution_params
    
    def train(self,xt,yt):
        self.priors = self._compute_priors(yt)
        self.distribution_params = self._estimate_pdf_dist(xt,yt)

    def predict(self,sample):
        # compute posterior probabilities
        posteriors = np.zeros((self.num_classes,))
        for i in range(self.num_classes):
            likelihoods = np.asarray([self.gaussian(x,u,var) for x,u,var in zip(sample,self.distribution_params[i,:,0],self.distribution_params[i,:,1]) ])
            posteriors[i] = self.priors[i]* np.prod(likelihoods)
        # return argument of maximum posterior
        return np.argmax(posteriors)


model = Bayes()
model.train(X_train,y_train)

#### _testing and accuracy_

In [8]:
from sklearn.metrics import precision_recall_fscore_support

predictions = []
for i in range(X_test.shape[0]):
    predictions.append(model.predict(X_test[i,:]))
    
prec,rcall,f1,_ = precision_recall_fscore_support(y_test, predictions,average = 'weighted')
print("Precision:" + str(round(prec*100,2)) + "Recall:"  +  str(round(rcall*100,2))  + "F1-score:" +str(round(f1*100,2)))

Precision:95.08  Recall:95.0  F1-score:95.0



### _classification using SVM and MLP_

In [10]:
from sklearn import svm
#train svm model

svm_model = svm.SVC()
svm_model.fit(X_train,y_train)
# predict
predictions = svm_model.predict(X_test)

# evaluate performance
prec,rcall,f1,_ = precision_recall_fscore_support(y_test, predictions,average = 'weighted')
print("Precision:"+str(round(prec*100,2))+"%  Recall:"+str(round(rcall*100,2))+"%  F1-score:"+str(round(f1*100,2))+"%")

Precision:98.41%  Recall:98.33%  F1-score:98.33%


In [11]:
from sklearn.neural_network import MLPClassifier
#use two hidden layers 
#train multi layer perceptron

neural_model = MLPClassifier(hidden_layer_sizes=(256,100 ), activation='logistic',max_iter=1000)
neural_model.fit(X_train,y_train)

# predict
predictions = neural_model.predict(X_test)

# evaluate
prec,rcall,f1,_ = precision_recall_fscore_support(y_test, predictions,average = 'weighted')
print("Precision:"+str(round(prec*100,2))+"%  Recall:"+str(round(rcall*100,2))+"%  F1-score:"+str(round(f1*100,2))+"%")

Precision:100.0%  Recall:100.0%  F1-score:100.0%




---



# _Repeat all steps for new dataset_

In [12]:
wine = datasets.load_wine()
# store the feature matrix (X) and response vector (y) 
X = wine.data 
y = wine.target 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1) 

### _assume that we know distribution of data is Gaussian_

In [13]:
from sklearn.naive_bayes import GaussianNB
# training the model on training set 

# write your code here :
gnb = GaussianNB()
gnb.fit(X_train,y_train)


GaussianNB(priors=None, var_smoothing=1e-09)

In [14]:
#write your code here :
predictions = gnb.predict(X_test)

prec,rcall,f1,_ = precision_recall_fscore_support(y_test, predictions,average = 'weighted')
print("Precision:"+str(round(prec*100,2))+"%  Recall:"+str(round(rcall*100,2))+"%  F1-score:"+str(round(f1*100,2))+"%")

Precision:98.66%  Recall:98.61%  F1-score:98.61%


##### _classification using SVM and MLP_

In [15]:
from sklearn import svm
#train svm model

svm_model = svm.SVC()
svm_model.fit(X_train,y_train)

# predict
predictions = svm_model.predict(X_test)

# evaluate
prec,rcall,f1,_ = precision_recall_fscore_support(y_test, predictions,average = 'weighted')
print("Precision:"+str(round(prec*100,2))+"%  Recall:"+str(round(rcall*100,2))+"%  F1-score:"+str(round(f1*100,2))+"%")

Precision:70.0%  Recall:70.83%  F1-score:69.76%


In [16]:
from sklearn.neural_network import MLPClassifier
#train multi layer perceptron

neural_model = MLPClassifier(hidden_layer_sizes=(256,100 ), activation='logistic',max_iter=1000)
neural_model.fit(X_train,y_train)

# predict
predictions = neural_model.predict(X_test)

# evaluate

prec,rcall,f1,_ = precision_recall_fscore_support(y_test, predictions,average = 'weighted')
print("Precision:"+str(round(prec*100,2))+"%  Recall:"+str(round(rcall*100,2))+"%  F1-score:"+str(round(f1*100,2))+"%")

Precision:94.74%  Recall:94.44%  F1-score:94.42%




---

