In [1]:
#Importing needed python modules
import numpy as np
import pandas as pd
import warnings as wr
#Ignoring warnings
from sklearn.exceptions import UndefinedMetricWarning
wr.filterwarnings("ignore", category=UndefinedMetricWarning)

In [2]:
#Loading data into dataframe(df)
df=pd.read_csv('Prostate_cancer_data.csv')

In [3]:
print(df.head(10))#Print all data of top 10 rows
print(df.shape)#Print the row and clumn count of the data
print(df.isna().sum())#Print all columns with empty data along with sum of empty data

   id diagnosis_result  radius  ...  compactness  symmetry  fractal_dimension
0   1                M      23  ...        0.278     0.242              0.079
1   2                B       9  ...        0.079     0.181              0.057
2   3                M      21  ...        0.160     0.207              0.060
3   4                M      14  ...        0.284     0.260              0.097
4   5                M       9  ...        0.133     0.181              0.059
5   6                B      25  ...        0.170     0.209              0.076
6   7                M      16  ...        0.109     0.179              0.057
7   8                M      15  ...        0.165     0.220              0.075
8   9                M      19  ...        0.193     0.235              0.074
9  10                M      25  ...        0.240     0.203              0.082

[10 rows x 10 columns]
(100, 10)
id                   0
diagnosis_result     0
radius               0
texture              0
perimeter       

In [4]:
df=df.dropna(axis=1)#Drop the column with empty data
df=df.drop(['id'],axis=1)

In [5]:
#Encoding first column
from sklearn.preprocessing import LabelEncoder
labelencoder_X=LabelEncoder()#Calling LabelEncoder
df.iloc[:,0]=labelencoder_X.fit_transform(df.iloc[:,0].values)#Encoding the values of diagnosis column to values

In [6]:
#Splitting data for dependence
X=df.iloc[:,1:].values#Features of cancerous and non cancerous patients
Y=df.iloc[:,0].values#Whether patient has cancer or not

In [7]:
#Train-Test split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,random_state=1)

In [8]:
#Standard scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)#Scaling X_train
X_test=sc.fit_transform(X_test)#Scaling X_test

In [9]:
#Importing algorithm libraries
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [10]:
#Function for  different models
def models(X_train,Y_train):

    #Logistic regression
    log=LogisticRegression(random_state=0)
    log.fit(X_train,Y_train)

    #Decision tree
    tree=DecisionTreeClassifier(criterion='entropy',random_state=0)
    tree.fit(X_train,Y_train)

    #Random forest classifier
    forest=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    forest.fit(X_train,Y_train)

    #GaussianNB
    gnb = GaussianNB()
    gnb.fit(X_train,Y_train)

    #Printing accuracy
    print("Logistic regression:",log.score(X_train,Y_train))
    print("Decision Tree:",tree.score(X_train,Y_train))
    print("Random Forest:",forest.score(X_train,Y_train))
    print("GaussianNB:",gnb.score(X_train,Y_train))
    return log,tree,forest,gnb

In [11]:
#Testing Function for all models
print("Accuracy")
model=models(X_train,Y_train)

Accuracy
Logistic regression: 0.88
Decision Tree: 1.0
Random Forest: 0.9733333333333334
GaussianNB: 0.8666666666666667


In [12]:
#Metrics of the models
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
for i in range(len(model)):
    print("\nModel:",i+1)
    print("Classification Report")
    print(classification_report(Y_test,model[i].predict(X_test)))
    print("Accuracy Score:",accuracy_score(Y_test,model[i].predict(X_test)))


Model: 1
Classification Report
              precision    recall  f1-score   support

           0       0.86      0.60      0.71        10
           1       0.78      0.93      0.85        15

    accuracy                           0.80        25
   macro avg       0.82      0.77      0.78        25
weighted avg       0.81      0.80      0.79        25

Accuracy Score: 0.8

Model: 2
Classification Report
              precision    recall  f1-score   support

           0       0.62      0.50      0.56        10
           1       0.71      0.80      0.75        15

    accuracy                           0.68        25
   macro avg       0.67      0.65      0.65        25
weighted avg       0.67      0.68      0.67        25

Accuracy Score: 0.68

Model: 3
Classification Report
              precision    recall  f1-score   support

           0       0.82      0.90      0.86        10
           1       0.93      0.87      0.90        15

    accuracy                           0.88  