In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import gradio as gr

# Step 1: Load and prepare the dataset
column_names = ['ID', 'Clump_Thickness', 'Uniformity_Cell_Size', 'Uniformity_Cell_Shape',
                'Marginal_Adhesion', 'Single_Epithelial_Cell_Size', 'Bare_Nuclei',
                'Bland_Chromatin', 'Normal_Nucleoli', 'Mitoses', 'Class']

df = pd.read_csv("/content/breast-cancer-wisconsin.data", names=column_names)

# Preprocessing
df['Bare_Nuclei'].replace('?', np.nan, inplace=True)
df.dropna(inplace=True)
df['Bare_Nuclei'] = df['Bare_Nuclei'].astype(int)
df.drop('ID', axis=1, inplace=True)

X = df.drop('Class', axis=1)
y = df['Class'].map({2: 0, 4: 1})  # 0 = Benign, 1 = Malignant

# Train-test split and scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 2: Define prediction function
def predict_cancer(clump, cell_size, cell_shape, adhesion,
                   epithelial_size, nuclei, chromatin, nucleoli, mitoses):
    input_data = np.array([[clump, cell_size, cell_shape, adhesion,
                            epithelial_size, nuclei, chromatin, nucleoli, mitoses]])
    input_scaled = scaler.transform(input_data)
    prediction = model.predict(input_scaled)[0]
    return "🔴 Malignant" if prediction == 1 else "🟢 Benign"

# Step 3: Create Gradio interface
inputs = [
    gr.Slider(1, 10, step=1, label="Clump Thickness"),
    gr.Slider(1, 10, step=1, label="Uniformity of Cell Size"),
    gr.Slider(1, 10, step=1, label="Uniformity of Cell Shape"),
    gr.Slider(1, 10, step=1, label="Marginal Adhesion"),
    gr.Slider(1, 10, step=1, label="Single Epithelial Cell Size"),
    gr.Slider(1, 10, step=1, label="Bare Nuclei"),
    gr.Slider(1, 10, step=1, label="Bland Chromatin"),
    gr.Slider(1, 10, step=1, label="Normal Nucleoli"),
    gr.Slider(1, 10, step=1, label="Mitoses"),
]

gr.Interface(
    fn=predict_cancer,
    inputs=inputs,
    outputs="text",
    title="🔬 Breast Cancer Prediction",
    description="Enter medical cell sample features to predict whether the tumor is benign or malignant.",
    theme="default"
).launch()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Bare_Nuclei'].replace('?', np.nan, inplace=True)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a54c44cb1bd275b312.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


