In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.metrics import accuracy_score

import warnings

In [22]:
df=pd.read_csv("dataa.csv")
df

Unnamed: 0,Sample code number,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2
...,...,...,...,...,...,...,...,...,...,...,...
678,776715,3,1,1,1,3,2,1,1,1,2
679,841769,2,1,1,1,2,1,1,1,1,2
680,888820,5,10,10,3,7,3,8,10,2,4
681,897471,4,8,6,4,3,4,10,6,1,4


In [23]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.25, random_state=0)


In [24]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [25]:
def model_evaluation(true,predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [36]:
models = {
    "Logistic Regression": LogisticRegression(random_state=0),
    "K-Neighbors Classifier": KNeighborsClassifier(n_neighbors=5, metric= "minkowski",p=2),
    "Decision Tree": DecisionTreeClassifier(random_state=0,criterion="entropy"),
    "Random Forest Classifier": RandomForestClassifier(random_state=0,n_estimators=10,criterion="entropy"),
    "SVC":SVC(kernel="linear", random_state=0),
    "Kernel SVM":SVC(kernel="rbf", random_state=0),
    "Naive Bayes":GaussianNB()
}

model_list = []
r2_list =[]
accuracy_list=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    y_pred=model.predict(X_test)
    mae, rmse, r2_square=model_evaluation(y_test, y_pred)
    acs=accuracy_score(y_test, y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model performance for Test set')
    print("- Root Mean Squared Error: {:.4f}".format(rmse))
    print("- Mean Absolute Error: {:.4f}".format(mae))
    print("- R2 Score: {:.4f}".format(r2_square))
    print("- Accuracy Score: {:.4f}".format(acs))
    r2_list.append(r2_square)
    accuracy_list.append(acs)
    
    print('='*35)
    print('\n')

Logistic Regression
Model performance for Test set
- Root Mean Squared Error: 0.4588
- Mean Absolute Error: 0.1053
- R2 Score: 0.7753
- Accuracy Score: 0.9474


K-Neighbors Classifier
Model performance for Test set
- Root Mean Squared Error: 0.4588
- Mean Absolute Error: 0.1053
- R2 Score: 0.7753
- Accuracy Score: 0.9474


Decision Tree
Model performance for Test set
- Root Mean Squared Error: 0.4047
- Mean Absolute Error: 0.0819
- R2 Score: 0.8252
- Accuracy Score: 0.9591


Random Forest Classifier
Model performance for Test set
- Root Mean Squared Error: 0.5073
- Mean Absolute Error: 0.1287
- R2 Score: 0.7253
- Accuracy Score: 0.9357


SVC
Model performance for Test set
- Root Mean Squared Error: 0.4837
- Mean Absolute Error: 0.1170
- R2 Score: 0.7503
- Accuracy Score: 0.9415


Kernel SVM
Model performance for Test set
- Root Mean Squared Error: 0.4326
- Mean Absolute Error: 0.0936
- R2 Score: 0.8002
- Accuracy Score: 0.9532


Naive Bayes
Model performance for Test set
- Root Mean Sq

In [41]:
pd.DataFrame(list(zip(model_list, r2_list,accuracy_list)), columns=['Model Name', 'R2_Score','Accuracy Score']).sort_values(by=["R2_Score","Accuracy Score"],ascending=False)


Unnamed: 0,Model Name,R2_Score,Accuracy Score
2,Decision Tree,0.825204,0.959064
5,Kernel SVM,0.800234,0.953216
0,Logistic Regression,0.775263,0.947368
1,K-Neighbors Classifier,0.775263,0.947368
4,SVC,0.750292,0.94152
6,Naive Bayes,0.750292,0.94152
3,Random Forest Classifier,0.725321,0.935673
