In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### **Loading dataset from github**

In [None]:
url = 'https://raw.githubusercontent.com/Aqeel-0/test/build/diabetes.csv'
df = pd.read_csv(url)
df.head()
column_names = list(df.columns.values)


In [None]:
X = df.iloc[:,:-1]
y = df["Outcome"]
y.info(), X.info()

In [None]:
def reports(y_test, y_pred):
  from sklearn.metrics import classification_report, confusion_matrix
  from sklearn.metrics import accuracy_score
  print("Confusion Matrix : ")
  print(confusion_matrix(y_test, y_pred))
  print("**********************************************")
  print("Classification Evaluation : ")
  print(classification_report(y_test, y_pred, zero_division = 0))

### Classification using BernoulliNB Naive Bayes

In [None]:
def FBouBernoulli(split, alpha_value = 1.0, binarize_value = 0.0, fit_prior_value = False):
  from sklearn.naive_bayes import BernoulliNB
  from sklearn.model_selection import train_test_split
  X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = split, random_state=44)
  classifier = BernoulliNB(alpha = alpha_value, binarize = binarize_value, fit_prior = fit_prior_value)
  classifier.fit(X_train, y_train)
  y_pred = classifier.predict(X_test)
  print("Train-test split: " + str(split))
  print("value: alpha: "+str(alpha_value) + " binarize: " + str(binarize_value) + " fit_prior: " +str(fit_prior_value))
  print("**********************************************")

  reports(y_test, y_pred)

## Train-Test split 0.2
FBouBernoulli(0.2)
FBouBernoulli(0.2, 2.0)
FBouBernoulli(0.2, 2.0, 3.5)
FBouBernoulli(0.2, 3.0, 5, True)



In [None]:
## Train-Test split 0.3
FBouBernoulli(0.3)
FBouBernoulli(0.3, 1.0, 1.5)
FBouBernoulli(0.3, 1.0, 1.5, True)

In [None]:
## Train-Test split 0.4
FBouBernoulli(0.4)

In [None]:

## Train-Test split 0.5
FBouBernoulli(0.5)
FBouBernoulli(0.5, 1.0, 7.9)
FBouBernoulli(0.5, 1.0, 7.9, True)

In [None]:
x_points = np.array([0.2, 0.3, 0.4, 0.5])
y_points = np.array([69, 61, 56, 64])
plt.plot(x_points, y_points)
plt.grid(True)
plt.show()

## Classification using Multinomial Naive Bayes

In [None]:
def FMultinomial(split, alpha_value = 1.0):
  from sklearn.naive_bayes import MultinomialNB
  from sklearn.model_selection import train_test_split
  X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = split)
  classifier = MultinomialNB(alpha = alpha_value)
  classifier.fit(X_train, y_train)
  y_pred = classifier.predict(X_test)
  print("Train-test split: " + str(split))
  print("value: alpha: "+str(alpha_value))
  print("**********************************************")
  reports(y_test, y_pred)

## Train-Test split 0.2
FMultinomial(0.2)
FMultinomial(0.2, 2.3)









#61 63 63 64

In [None]:
## Train-Test split 0.3
FMultinomial(0.3)
FMultinomial(0.3, 2.9)

In [None]:
## Train-Test split 0.4
FMultinomial(0.4)
FMultinomial(0.4, 1.1)

In [None]:
## Train-Test split 0.5
FMultinomial(0.5)
FMultinomial(0.5, 4.8)

In [None]:
x_points = np.array([0.2, 0.3, 0.4, 0.5])
y_points = np.array([61, 63, 63, 64])
plt.plot(x_points, y_points)
plt.grid(True)
plt.show()

### **Classification using Guassian Naive Bayes**

In [None]:
def FGaussian(split):
  from sklearn.naive_bayes import GaussianNB
  from sklearn.model_selection import train_test_split
  X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = split)
  classifier = GaussianNB()
  classifier.fit(X_train, y_train)
  y_pred = classifier.predict(X_test)
  print("Train-test split: " + str(split))
  print("**********************************************")
  reports(y_test, y_pred)

## Train-Test split 0.2
FGaussian(0.2)














In [None]:
## Train-Test split 0.3
FGaussian(0.3)

In [None]:
## Train-Test split 0.4
FGaussian(0.4)

In [None]:

## Train-Test split 0.5
FGaussian(0.5)

In [None]:
x_points = np.array([0.2, 0.3, 0.4, 0.5])
y_points = np.array([75, 71, 78, 76])
plt.plot(x_points, y_points)
plt.grid(True)
plt.show()

### **Classification using Decision Tree**

In [None]:
def decision_tree(split, criterion_value):
  from sklearn.model_selection import train_test_split
  from sklearn.tree import DecisionTreeClassifier
  from sklearn import tree
  X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = split)

  classifier = DecisionTreeClassifier(criterion = criterion_value)
  classifier.fit(X_train, y_train)
  y_pred = classifier.predict(X_test)
  print("Train-test split: " + str(split))
  print("Value: Entropy: " + criterion_value)
  print("**********************************************")
  reports(y_test, y_pred)
  fig = plt.figure(figsize=(12,8))
  _ = tree.plot_tree(classifier,
                    feature_names=column_names,
                    class_names='outcome',
                    filled=True)



In [None]:
decision_tree(0.2, 'entropy')

In [None]:
decision_tree(0.2, 'gini')

In [None]:
decision_tree(0.3, 'entropy')

In [None]:
decision_tree(0.3, 'gini')

In [None]:
decision_tree(0.4, 'entropy')

In [None]:
decision_tree(0.4, 'gini')

In [None]:
decision_tree(0.5, 'entropy')

In [None]:
decision_tree(0.5, 'gini')