In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_excel("patient_data.xlsx")

In [3]:
data.columns

Index(['Sl.No', 'Patient #', 'Region', 'Gender', 'Age', 'Smokes',
       'No of Ciggarets per day', 'No of Hrs Sleep per Day',
       'No of Hrs Exercise per Day', 'Diet', 'Alcoholic', 'Height',
       'Complexion', 'Cancer Diagnosis Result'],
      dtype='object')

In [4]:
data = data.dropna()

In [5]:

X = data.drop(columns=['Cancer Diagnosis Result'])
y = data['Cancer Diagnosis Result']

In [6]:
X.head()

Unnamed: 0,Sl.No,Patient #,Region,Gender,Age,Smokes,No of Ciggarets per day,No of Hrs Sleep per Day,No of Hrs Exercise per Day,Diet,Alcoholic,Height,Complexion
0,1,Patient 1,NI,Male,24.0,NO,15.0,8.0,2.66,Vegetarian,Occasional,5.687164,Fair
1,2,Patient 2,SI,Male,32.0,YES,27.0,9.0,1.27,NonVegetarian,Regular,4.071704,Dark
2,3,Patient 3,SI,Male,28.0,YES,29.0,10.0,1.18,Vegetarian,Occasional,5.509791,Wheatish
4,5,Patient 5,NE,Female,58.0,NO,21.0,9.0,1.55,Vegetarian,Regular,5.636116,Fair
5,6,Patient 6,SI,Female,27.0,NO,19.0,10.0,1.95,NonVegetarian,Regular,5.466268,Brown


In [7]:
X = pd.get_dummies(X)

In [8]:
X.head()

Unnamed: 0,Sl.No,Age,No of Ciggarets per day,No of Hrs Sleep per Day,Height,Patient #_Patient 1,Patient #_Patient 10,Patient #_Patient 100,Patient #_Patient 1000,Patient #_Patient 1001,...,No of Hrs Exercise per Day_1.84hrs,No of Hrs Exercise per Day_2.32Hrs,Diet_NonVegetarian,Diet_Vegetarian,Alcoholic_Occasional,Alcoholic_Regular,Complexion_Brown,Complexion_Dark,Complexion_Fair,Complexion_Wheatish
0,1,24.0,15.0,8.0,5.687164,1,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
1,2,32.0,27.0,9.0,4.071704,0,0,0,0,0,...,0,0,1,0,0,1,0,1,0,0
2,3,28.0,29.0,10.0,5.509791,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,1
4,5,58.0,21.0,9.0,5.636116,0,0,0,0,0,...,0,0,0,1,0,1,0,0,1,0
5,6,27.0,19.0,10.0,5.466268,0,0,0,0,0,...,0,0,1,0,0,1,1,0,0,0


In [9]:
y.head()

0    Negative
1    Positive
2    Negative
4    Negative
5    Negative
Name: Cancer Diagnosis Result, dtype: object

In [10]:
y =y.apply(lambda x : 0 if x=='Negative' else 1)

In [11]:
y

0       0
1       1
2       0
4       0
5       0
       ..
1005    1
1006    0
1007    1
1008    0
1009    0
Name: Cancer Diagnosis Result, Length: 971, dtype: int64

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42,stratify=y)

In [13]:
import pickle

model = pickle.load(open("algo.pkl", "rb"))

In [14]:
model

<classify_algo.rajkiran_classify at 0x7fc65306c910>

In [15]:
model.fit(X_train,y_train)



In [16]:
predictions = model.predict(X_test,y_test)

              precision    recall  f1-score   support

           0       0.82      0.81      0.82       155
           1       0.83      0.84      0.83       166

    accuracy                           0.83       321
   macro avg       0.83      0.83      0.83       321
weighted avg       0.83      0.83      0.83       321



pickle.dump(favorite_color, open("save.p", "wb"))

In [17]:
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression


xgb_model = XGBClassifier(random_state=42).fit(X_train,y_train)
gb_model = GradientBoostingClassifier(random_state=42).fit(X_train,y_train)
rf_model = RandomForestClassifier(random_state=42).fit(X_train,y_train)
lr_model = LogisticRegression().fit(X_train,y_train)


xgb_pred = xgb_model.predict(X_test)
gb_pred = gb_model.predict(X_test)
rf_pred = rf_model.predict(X_test)
lr_pred = lr_model.predict(X_test)



In [19]:
from sklearn.metrics import classification_report

print(classification_report(y_test,xgb_pred))
print()
print(classification_report(y_test,gb_pred))
print()
print(classification_report(y_test,rf_pred))
print()
print(classification_report(y_test,lr_pred))
print()
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.44      0.43      0.44       155
           1       0.48      0.49      0.48       166

    accuracy                           0.46       321
   macro avg       0.46      0.46      0.46       321
weighted avg       0.46      0.46      0.46       321


              precision    recall  f1-score   support

           0       0.51      0.45      0.48       155
           1       0.54      0.59      0.56       166

    accuracy                           0.52       321
   macro avg       0.52      0.52      0.52       321
weighted avg       0.52      0.52      0.52       321


              precision    recall  f1-score   support

           0       0.50      0.43      0.46       155
           1       0.53      0.60      0.56       166

    accuracy                           0.51       321
   macro avg       0.51      0.51      0.51       321
weighted avg       0.51      0.51      0.51       321


              pr

In [20]:
from sklearn.metrics import classification_report,confusion_matrix

In [23]:
print(confusion_matrix(xgb_pred,y_test))
print()
print(confusion_matrix(gb_pred,y_test))
print()
print(confusion_matrix(rf_pred,y_test))
print()
print(confusion_matrix(lr_pred,y_test))
print()
print(confusion_matrix(predictions,y_test))

[[67 85]
 [88 81]]

[[70 68]
 [85 98]]

[[66 67]
 [89 99]]

[[83 90]
 [72 76]]

[[126  27]
 [ 29 139]]
