# Using Logistic Regression:

### Importing the dataset:

This is the cleaned dataset of classifying cancer(as malignant-1 or benign-0) taken from Kaggle.com 

In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv('breast_cancer.csv')

In [3]:
df.head()

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2
3,6,8,8,1,3,4,3,7,1,2
4,4,1,1,3,2,1,3,1,1,2


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 683 entries, 0 to 682
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   Clump Thickness              683 non-null    int64
 1   Uniformity of Cell Size      683 non-null    int64
 2   Uniformity of Cell Shape     683 non-null    int64
 3   Marginal Adhesion            683 non-null    int64
 4   Single Epithelial Cell Size  683 non-null    int64
 5   Bare Nuclei                  683 non-null    int64
 6   Bland Chromatin              683 non-null    int64
 7   Normal Nucleoli              683 non-null    int64
 8   Mitoses                      683 non-null    int64
 9   Class                        683 non-null    int64
dtypes: int64(10)
memory usage: 53.5 KB


In [5]:
df['Class'].unique()

array([2, 4], dtype=int64)

In [9]:
df['Class'].value_counts()

2    444
4    239
Name: Class, dtype: int64

here 2----0 (benign)  and 4------1(Malignant)

#Changing this to 0 or 1 so that logisitic regression can be used

In [8]:
pd.get_dummies(df['Class'])

Unnamed: 0,2,4
0,1,0
1,1,0
2,1,0
3,1,0
4,1,0
...,...,...
678,1,0
679,1,0
680,0,1
681,0,1


In [10]:
df["Class"]=pd.get_dummies(df['Class'])[2]

In [11]:
df.head()

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,1
1,5,4,4,5,7,10,3,2,1,1
2,3,1,1,1,2,2,3,1,1,1
3,6,8,8,1,3,4,3,7,1,1
4,4,1,1,3,2,1,3,1,1,1


In [12]:
df['Class'].value_counts()

1    444
0    239
Name: Class, dtype: int64

Class---1 means benign  and Class 0 means benign

In [13]:
X=df.drop('Class',axis=1).values
y=df['Class'].values

In [14]:
X.shape

(683, 9)

In [15]:
y.shape

(683,)

# Splitting the data into training and testing part

In [29]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=30)

In [30]:
X_train.shape

(512, 9)

In [32]:
y_train.shape

(512,)

In [19]:
X_test.shape

(171, 9)

### Scaling the data

In [21]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train=scaler.fit_transform(X_test)
X_test=scaler.transform(X_test)

In [33]:
from sklearn.linear_model import LogisticRegression
model1=LogisticRegression()

In [34]:
model1.fit(X_train,y_train)

In [35]:
pred=model1.predict(X_test)

In [39]:
from sklearn.metrics import classification_report
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       0.93      0.94      0.94        54
           1       0.97      0.97      0.97       117

    accuracy                           0.96       171
   macro avg       0.95      0.96      0.95       171
weighted avg       0.96      0.96      0.96       171



# Using Neural Netowrking

In [46]:
from tensorflow import keras 
from keras import layers,Sequential
model2=keras.Sequential([
    keras.layers.Dense(9,input_dim=9,activation='relu'),
    keras.layers.Dense(6,activation='relu'),
    keras.layers.Dense(1,activation='sigmoid'),
    
])
model2.compile(
    optimizer='adam',
    metrics=['accuracy'],
    loss='binary_crossentropy',
)

In [57]:
model2.fit(X_train,y_train,epochs=50,batch_size=24,verbose=0)

<keras.src.callbacks.history.History at 0x2525059f150>

In [58]:
pred=model2.predict(X_test)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [59]:
pred.shape

(171, 1)

In [60]:
y_test.shape

(171,)

In [61]:
pred=pred.reshape(-1,)

In [62]:
pred.shape

(171,)

In [67]:
pred

array([8.12998414e-02, 3.75445157e-01, 5.78585919e-03, 9.82329190e-01,
       9.89720047e-01, 9.70652401e-01, 2.78891563e-01, 9.91243720e-01,
       9.90786016e-01, 5.28842330e-01, 9.89672959e-01, 9.83857095e-01,
       1.12199457e-02, 9.89720047e-01, 9.93660867e-01, 9.82329190e-01,
       9.90267456e-01, 9.81299460e-01, 7.99591660e-01, 2.70928233e-03,
       9.90620613e-01, 1.08858124e-07, 9.69372094e-01, 9.90620613e-01,
       9.22584772e-01, 9.88897026e-01, 2.13598159e-05, 9.90267456e-01,
       5.61053872e-01, 9.70687470e-05, 9.90593374e-01, 7.09878350e-07,
       6.89120069e-02, 9.92209792e-01, 9.91243720e-01, 1.05414549e-02,
       8.83006398e-03, 9.82502520e-01, 9.91246521e-01, 9.90593374e-01,
       9.93209481e-01, 9.90593374e-01, 9.87074494e-01, 9.69372094e-01,
       5.07195527e-03, 9.90168750e-01, 9.89530265e-01, 9.90267456e-01,
       4.51643544e-04, 9.77531970e-01, 2.69068085e-04, 9.03068960e-01,
       6.39486760e-02, 9.64405775e-01, 1.33378152e-03, 1.34833995e-02,
      

In [63]:
#now prediction and y_test have the same shape

In [70]:
pred_=[]
for i in pred:
    pred_.append(np.round(i))
pred=np.array(pred_)

In [71]:
pred

array([0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1.,
       1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1.,
       1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0.,
       1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
       0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1.,
       1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0.,
       1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1., 0., 0.,
       0.], dtype=float32)

In [72]:
pred.shape

(171,)

In [73]:
y_test.shape

(171,)

In [75]:
print(classification_report(y_test,pred))

              precision    recall  f1-score   support

           0       0.94      0.91      0.93        55
           1       0.96      0.97      0.97       116

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.95       171
weighted avg       0.95      0.95      0.95       171



In [87]:
from tensorflow import keras 
from keras import layers,Sequential
model2=keras.Sequential([
    keras.layers.Dense(20,input_dim=9,activation='relu'),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(10,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1,activation='sigmoid'),
    
])
model2.compile(
    optimizer='adam',
    metrics=['accuracy'],
    loss='binary_crossentropy',
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [88]:
model2.fit(X_train,y_train,epochs=50,batch_size=24)

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.5764 - loss: 0.9399
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5866 - loss: 0.8200
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6304 - loss: 0.6974
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6660 - loss: 0.6908
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6720 - loss: 0.6667
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6916 - loss: 0.6233
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6771 - loss: 0.6085
Epoch 8/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7451 - loss: 0.5807
Epoch 9/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x25254867190>

In [89]:
pred=model2.predict(X_test)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


In [90]:
pred=pred.reshape(-1,)

In [91]:
pred_=[]
for i in pred:
    pred_.append(np.round(i))
pred=np.array(pred_)

In [92]:
pred

array([0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1.,
       1., 1., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1.,
       1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0.,
       1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0.,
       0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1.,
       0., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0.,
       1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1., 0., 0.,
       0.], dtype=float32)

In [94]:
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

         0.0       0.96      0.88      0.92        60
         1.0       0.94      0.98      0.96       111

    accuracy                           0.95       171
   macro avg       0.95      0.93      0.94       171
weighted avg       0.95      0.95      0.95       171

