# Lung Cancer Prediction: Interactive EDA & Modeling

This notebook performs EDA, interactive insights, and ML modeling on the Lung Cancer Prediction dataset.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output


## Load Dataset

In [None]:

df = pd.read_csv('/kaggle/input/lung-cancer-prediction-dataset/lung_cancer.csv')
df.head()


## Basic EDA

In [None]:

df.info()


## Target Distribution

In [None]:

sns.countplot(x='LUNG_CANCER', data=df)
plt.show()


## Interactive EDA Insights

In [None]:

feature_dropdown = widgets.Dropdown(
    options=[c for c in df.columns if c != 'LUNG_CANCER'],
    description='Feature:'
)
button = widgets.Button(description='Show Insight')
out = widgets.Output()

def on_click(b):
    with out:
        clear_output(wait=True)
        col = feature_dropdown.value
        print('Correlation:', df[col].astype('category').cat.codes.corr(df['LUNG_CANCER'].astype('category').cat.codes))
        sns.countplot(x=col, hue='LUNG_CANCER', data=df)
        plt.show()

button.on_click(on_click)
display(feature_dropdown, button, out)


## Machine Learning Model

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

df_encoded = df.copy()
for c in df_encoded.columns:
    if df_encoded[c].dtype == 'object':
        df_encoded[c] = LabelEncoder().fit_transform(df_encoded[c])

X = df_encoded.drop('LUNG_CANCER', axis=1)
y = df_encoded['LUNG_CANCER']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))
