## Load The Dataset


In [2]:
import pandas as pd

# Load dataset
df = pd.read_csv("C:/Users/DELL/downloads/breast-cancer.csv")

# Basic inspection
print(df.head())
print(df.info())
print(df.describe())


         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.10430   

   ...  radius_worst  texture_worst  perimeter_worst  area_wor

## Data Preprocessing

In [7]:
# Check for missing values

print(df.isnull().sum())

# Handle missing values and encode categorical data as needed

from sklearn.preprocessing import LabelEncoder

# Encode the 'diagnosis' column (or any other categorical column in your dataset)
label_encoder = LabelEncoder()
df['diagnosis'] = label_encoder.fit_transform(df['diagnosis'])

# Check if encoding was successful
print(df['diagnosis'].head())


id                         0
diagnosis                  0
radius_mean                0
texture_mean               0
perimeter_mean             0
area_mean                  0
smoothness_mean            0
compactness_mean           0
concavity_mean             0
concave points_mean        0
symmetry_mean              0
fractal_dimension_mean     0
radius_se                  0
texture_se                 0
perimeter_se               0
area_se                    0
smoothness_se              0
compactness_se             0
concavity_se               0
concave points_se          0
symmetry_se                0
fractal_dimension_se       0
radius_worst               0
texture_worst              0
perimeter_worst            0
area_worst                 0
smoothness_worst           0
compactness_worst          0
concavity_worst            0
concave points_worst       0
symmetry_worst             0
fractal_dimension_worst    0
dtype: int64
0    1
1    1
2    1
3    1
4    1
Name: diagnosis, dtype: 

## Feature Selection

In [9]:
from sklearn.feature_selection import SelectKBest, f_classif

# Replace 'diagnosis' if your target variable has a different name
X = df.drop('diagnosis', axis=1)  # Features
y = df['diagnosis']              # Target

# Select top k features
selector = SelectKBest(score_func=f_classif, k=10)
X_selected = selector.fit_transform(X, y)

# Get the selected features
selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features)



Selected Features: Index(['radius_mean', 'perimeter_mean', 'area_mean', 'concavity_mean',
       'concave points_mean', 'radius_worst', 'perimeter_worst', 'area_worst',
       'concavity_worst', 'concave points_worst'],
      dtype='object')


## Grid Search CV for Model Tuning

In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

# Define parameter grid
param_grid = {
    'hidden_layer_sizes': [(50,50), (100,)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive']
}

# Initialize model
model = MLPClassifier(max_iter=500)

# Grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_selected, y)

print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)




Best Parameters: {'activation': 'relu', 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'solver': 'adam'}
Best Score: 0.924406148113647


## Implementing an Artificial Neural Network (ANN) Model

In [11]:
from sklearn.metrics import accuracy_score

# Best model from grid search
best_model = grid_search.best_estimator_

# Train model
best_model.fit(X_selected, y)

# Predictions
y_pred = best_model.predict(X_selected)

# Evaluate
print("Accuracy:", accuracy_score(y, y_pred))


Accuracy: 0.9420035149384886


## Building a Streamlit App Locally

In [12]:
import streamlit as st
import pandas as pd

st.title("Breast Cancer Prediction")

# File uploader
uploaded_file = st.file_uploader("Upload CSV file", type="csv")

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("Dataset Preview:", df.head())

    # Placeholder: Add prediction logic
    st.write("Model predictions go here!")


2024-11-25 11:38:44.200 
  command:

    streamlit run C:\Users\DELL\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py [ARGUMENTS]
