In [69]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import tensorflow as tf

In [70]:
lung_cancer_df = pd.read_csv('./Resources/lung_cancer.csv')

In [71]:
lung_cancer_df.head()

Unnamed: 0,index,Patient Id,Age,Gender,Air Pollution,Alcohol use,Dust Allergy,OccuPational Hazards,Genetic Risk,chronic Lung Disease,...,Fatigue,Weight Loss,Shortness of Breath,Wheezing,Swallowing Difficulty,Clubbing of Finger Nails,Frequent Cold,Dry Cough,Snoring,Level
0,0,P1,33,1,2,4,5,4,3,2,...,3,4,2,2,3,1,2,3,4,Low
1,1,P10,17,1,3,1,5,3,4,2,...,1,3,7,8,6,2,1,7,2,Medium
2,2,P100,35,1,4,5,6,5,5,4,...,8,7,9,2,1,4,6,7,2,High
3,3,P1000,37,1,7,7,7,7,6,7,...,4,2,3,1,4,5,6,7,5,High
4,4,P101,46,1,6,8,7,7,7,6,...,3,2,4,1,4,2,4,2,3,High


In [72]:
lung_cancer_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   index                     1000 non-null   int64 
 1   Patient Id                1000 non-null   object
 2   Age                       1000 non-null   int64 
 3   Gender                    1000 non-null   int64 
 4   Air Pollution             1000 non-null   int64 
 5   Alcohol use               1000 non-null   int64 
 6   Dust Allergy              1000 non-null   int64 
 7   OccuPational Hazards      1000 non-null   int64 
 8   Genetic Risk              1000 non-null   int64 
 9   chronic Lung Disease      1000 non-null   int64 
 10  Balanced Diet             1000 non-null   int64 
 11  Obesity                   1000 non-null   int64 
 12  Smoking                   1000 non-null   int64 
 13  Passive Smoker            1000 non-null   int64 
 14  Chest Pain               

In [73]:
lung_cancer_df.shape

(1000, 26)

In [74]:
lung_cancer_df.columns

Index(['index', 'Patient Id', 'Age', 'Gender', 'Air Pollution', 'Alcohol use',
       'Dust Allergy', 'OccuPational Hazards', 'Genetic Risk',
       'chronic Lung Disease', 'Balanced Diet', 'Obesity', 'Smoking',
       'Passive Smoker', 'Chest Pain', 'Coughing of Blood', 'Fatigue',
       'Weight Loss', 'Shortness of Breath', 'Wheezing',
       'Swallowing Difficulty', 'Clubbing of Finger Nails', 'Frequent Cold',
       'Dry Cough', 'Snoring', 'Level'],
      dtype='object')

In [75]:
# Drop index and patient id columns
lung_cancer_df = lung_cancer_df.drop(columns=['index', 'Patient Id'], axis=1)

In [76]:
# Look at the different levels of risk
lung_cancer_df['Level'].unique()

array(['Low', 'Medium', 'High'], dtype=object)

In [77]:
# Look at the total number of 'Low', 'Medium' and 'High' risk of lung cancer
len(lung_cancer_df[lung_cancer_df['Level'] == 'Low']),len(lung_cancer_df[lung_cancer_df['Level'] == 'Medium']), len(lung_cancer_df[lung_cancer_df['Level'] == 'High'])

(303, 332, 365)

In [78]:
# Change [Low, Medium, High] level's to [0, 1, 2] respectively
lung_cancer_df.loc[lung_cancer_df['Level'] == 'Low', 'Level'] = 0
lung_cancer_df.loc[lung_cancer_df['Level'] == 'Medium', 'Level'] = 1
lung_cancer_df.loc[lung_cancer_df['Level'] == 'High', 'Level'] = 2

lung_cancer_df.head()

Unnamed: 0,Age,Gender,Air Pollution,Alcohol use,Dust Allergy,OccuPational Hazards,Genetic Risk,chronic Lung Disease,Balanced Diet,Obesity,...,Fatigue,Weight Loss,Shortness of Breath,Wheezing,Swallowing Difficulty,Clubbing of Finger Nails,Frequent Cold,Dry Cough,Snoring,Level
0,33,1,2,4,5,4,3,2,2,4,...,3,4,2,2,3,1,2,3,4,0
1,17,1,3,1,5,3,4,2,2,2,...,1,3,7,8,6,2,1,7,2,1
2,35,1,4,5,6,5,5,4,6,7,...,8,7,9,2,1,4,6,7,2,2
3,37,1,7,7,7,7,6,7,7,7,...,4,2,3,1,4,5,6,7,5,2
4,46,1,6,8,7,7,7,6,7,7,...,3,2,4,1,4,2,4,2,3,2


In [79]:
# Create target and feature data
y = lung_cancer_df['Level'].values
X = lung_cancer_df.drop(columns='Level', axis=1).values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=48)

In [80]:
X_train

array([[25,  2,  3, ...,  3,  4,  3],
       [35,  1,  6, ...,  3,  4,  2],
       [23,  2,  4, ...,  4,  3,  1],
       ...,
       [37,  1,  7, ...,  6,  7,  5],
       [35,  1,  4, ...,  2,  4,  1],
       [45,  1,  3, ...,  6,  5,  4]], dtype=int64)

In [81]:
# Preprocess numerical data for neural network
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

X_train_scaled

array([[-0.96431682,  1.19464826, -0.37632986, ..., -0.25171498,
         0.09441247,  0.06505746],
       [-0.13671914, -0.83706647,  1.10918276, ..., -0.25171498,
         0.09441247, -0.61262437],
       [-1.12983635,  1.19464826,  0.11884101, ...,  0.29708222,
        -0.39392788, -1.29030619],
       ...,
       [ 0.0288004 , -0.83706647,  1.60435363, ...,  1.39467663,
         1.55943351,  1.4204211 ],
       [-0.13671914, -0.83706647,  0.11884101, ..., -0.80051219,
         0.09441247, -1.29030619],
       [ 0.69087854, -0.83706647, -0.37632986, ...,  1.39467663,
         0.58275282,  0.74273928]])

In [90]:
y_train

array([0, 1, 0, 2, 1, 1, 1, 0, 2, 0, 2, 1, 0, 1, 0, 0, 1, 2, 2, 1, 1, 2,
       2, 0, 0, 0, 2, 0, 2, 0, 1, 1, 1, 1, 0, 1, 0, 2, 1, 1, 1, 1, 1, 0,
       2, 0, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2,
       2, 0, 1, 0, 1, 0, 0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 0, 2, 2, 1,
       2, 0, 0, 2, 0, 1, 0, 2, 2, 0, 0, 1, 2, 2, 2, 1, 0, 0, 2, 1, 2, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 2, 0, 0, 1, 2, 0, 0, 2, 2, 2, 2, 2, 0, 1,
       2, 2, 1, 1, 2, 0, 2, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 1, 0, 2, 1,
       2, 1, 2, 2, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2, 1, 2, 0, 2, 2, 2, 0,
       1, 1, 0, 1, 2, 1, 0, 2, 1, 1, 2, 2, 1, 1, 2, 0, 0, 1, 2, 0, 1, 0,
       2, 2, 0, 0, 2, 1, 0, 1, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
       2, 0, 1, 0, 1, 1, 2, 2, 1, 2, 1, 0, 0, 2, 0, 1, 2, 0, 1, 1, 2, 2,
       1, 0, 0, 1, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 2, 2, 2, 1, 2, 2,
       1, 2, 2, 1, 1, 1, 2, 1, 1, 0, 0, 2, 2, 0, 0, 1, 0, 2, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0, 2, 1, 0,

In [88]:
# Define the deep learning model
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=42, activation='relu', input_dim=X_train.shape[1]))
nn_model.add(tf.keras.layers.Dense(units=42, activation='relu'))
nn_model.add(tf.keras.layers.Dense(units=3, activation="sigmoid"))

In [89]:
# Complie the sequential model together and customize metris
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled, y_test)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).