### Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

### Data Collection and Analysis

In [None]:
# We are working with PIMA Diabetes Dataset
# loading the diabetes dataset to a pandas dataframe
diabetes_dataset = pd.read_csv("diabetes.csv")

In [None]:
#pd.read_csv?

In [None]:
# printing the first 5 rows of the dataset
diabetes_dataset.head()

In [None]:
# number of rows and columns in the dataset
diabetes_dataset.shape

In [None]:
# getting the statistical measure of data
diabetes_dataset.describe()

In [None]:
diabetes_dataset['Outcome'].value_counts()
# 0 -> Non-Diabetic Patient
# 1 -> Diabetic Patient

In [None]:
diabetes_dataset.groupby('Outcome').mean()

In [None]:
# separating the data and labels
X = diabetes_dataset.drop(columns='Outcome',axis=1)
Y = diabetes_dataset['Outcome']

In [None]:
print(X)

In [None]:
print(Y)

### Data Standardization

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
standardirized_data = scaler.transform(X)

In [None]:
print(standardirized_data)

In [None]:
X = standardirized_data
Y = diabetes_dataset['Outcome']

In [None]:
print(X)
print(Y)

### Train test split

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y , random_state=104,test_size=0.15, shuffle=True)

In [None]:
print(X.shape,X_train.shape,X_test.shape)

### Training the model and Model Evaluation

### Support Vector Machine classifier

In [None]:
classifier1 = svm.SVC(kernel='linear',C=1.0, random_state=42)

#training the Support Vector Machine classifier
classifier1.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier1.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier1.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### Random Forest classifier

In [None]:
classifier2 = RandomForestClassifier(max_depth=2,n_estimators=30,min_samples_split=5,max_leaf_nodes=6,random_state=22)

#training the Random Forest classifier
classifier2.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier2.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier2.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### Logistic Regression

In [None]:
classifier3 = LogisticRegression()

#training the Logisitc Regression
classifier3.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier3.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier3.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### Logistic Regression with Cross Validation

In [None]:
classifier4 = LogisticRegressionCV(cv=5)

#training the Logisitc Regression with Cross Validation
classifier4.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier4.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier4.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### Decision Tree classifier

In [None]:
classifier5 = DecisionTreeClassifier(criterion = "gini",random_state = 20,max_depth=3, min_samples_leaf=4)

#training the Decision Tree classifier
classifier5.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier5.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier5.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### K-Nearest Neighbors

In [None]:
classifier6 = KNeighborsClassifier(n_neighbors=3, metric='manhattan', p=2 )  

#training the K-Nearest Neighbors
classifier6.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier6.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier6.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### XGBoost classifier

In [None]:
classifier7 = XGBClassifier(eta=0.1,gamma=10)

#training the XGBoost classifier
classifier7.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier7.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier7.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### Naive Bayes classifier

In [None]:
classifier8 = GaussianNB()

#training the Naive Bayes classifier
classifier8.fit(X_train,Y_train)

In [None]:
#accuracy score on the training data
X_train_prediction = classifier8.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction,Y_train)
print('Accuracy score of training data:',training_data_accuracy*100)

In [None]:
#accuracy score on the testing data
X_test_prediction = classifier8.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of testing data:',testing_data_accuracy*100)

### Making a Predictive system

In [None]:
input_data = (6,148,72,35,0,33.6,0.627,50)

#changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

#reshaping the array as we are predicting for one instance
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

#standardize the input data
std_data = scaler.transform(input_data_reshape)
print(std_data)

prediction = classifier4.predict(std_data)
print(prediction)

if(prediction[0]==0):
    print("The person is not diabetic");
else:
    print("The person is diabetic");