In [25]:
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

class ModelContext:

    
    """
    A class to encapsulate the entire lifecycle of a machine learning model,
    including data preprocessing, training, evaluation, saving, and loading.
    """
    
    def __init__(self, model=None, scaler=None):
        """
        Initialize the ModelContext with optional pre-trained model and scaler.
        """
        self.model = model  # Machine learning model (e.g., Random Forest)
        self.scaler = scaler  # Data scaler (e.g., StandardScaler)
    
    def preprocess_data(self, data, target_column, test_size=0.2, random_state=42):
        """
        Preprocess the dataset by splitting it into training and testing sets,
        and scaling the features.
        
        Args:
            data (pd.DataFrame): Input dataset.
            target_column (str): Name of the target column.
            test_size (float): Proportion of data to use for testing.
            random_state (int): Random seed for reproducibility.
        
        Returns:
            X_train, X_test, y_train, y_test: Split datasets.
        """
        # Separate features and target
        X = data.drop(columns=[target_column]) 
        # Drop target column AND delay_probability (or any derived features)
        X = data.drop(columns=[target_column, 'target'])  # 👈 updated
        y = data[target_column]
        
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=random_state
        )
        
        # Scale the features (if no scaler exists, create one)
        if self.scaler is None:
            self.scaler = StandardScaler()
            self.scaler.fit(X_train)  # Fit scaler on training data
        
        X_train_scaled = self.scaler.transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        return X_train_scaled, X_test_scaled, y_train, y_test

    def evaluate_ensemble_models(self, X_train, X_test, y_train, y_test):
        """
        Train and evaluate Logistic Regression, Random Forest, SVC,
        and a VotingClassifier ensemble, printing accuracy for each.
        """
        classifiers = [
            #('Logistic Regression', LogisticRegression(max_iter=5000, C=1, class_weight='balanced', penalty='l2', solver='lbfgs', random_state=42)),
            #('Random Forest', RandomForestClassifier(random_state=42)),
            #('SVC', SVC(probability=True, random_state=42)),
            #('HGB', HistGradientBoostingClassifier(random_state=42)),
            #('Bagging', BaggingClassifier(n_estimators=100, max_samples=0.8, max_features=0.8, bootstrap=True, n_jobs=-1, random_state=42)),
            #('AdaBoost', AdaBoostClassifier(algorithm='SAMME', n_estimators=50, random_state=42)),
            #('GB', GradientBoostingClassifier(n_estimators=200, learning_rate=0.05, min_samples_split=20, min_samples_leaf=5, subsample=0.8, max_features='sqrt', random_state=42)),
            ('MLP', MLPClassifier (hidden_layer_sizes=(10, 10), solver='adam', learning_rate='adaptive', 
                                           max_iter=500, random_state=42)) 
        ]

        print("\n🔍 Accuracy of Individual Classifiers:")
        for name, clf in classifiers:
            clf.fit(X_train, y_train)
            acc = clf.score(X_test, y_test)
            print(f"{name}: {acc:.4f}")

        # Create and evaluate the ensemble
        voting_clf = VotingClassifier(estimators=classifiers, voting='soft')
        voting_clf.fit(X_train, y_train)
        ensemble_acc = voting_clf.score(X_test, y_test)

        print(f"\n🌐 Voting Ensemble Accuracy: {ensemble_acc:.4f}")

       
    def train_model(self, X_train, y_train):
        """
        Train a machine learning model using the provided training data.
        
        Args:
            X_train (array-like): Scaled training features.
            y_train (array-like): Training labels.
        """
        # Initialize a Random Forest Classifier (or any other model)
        self.model = model =  MLPClassifier (hidden_layer_sizes=(100, 50, 25), activation='relu', max_iter=1000,
                                             random_state=42)
        self.model.fit(X_train, y_train)
        print("Model training completed.")
    
    def evaluate_model(self, X_test, y_test):
        """
        Evaluate the trained model on the test dataset.
        
        Args:
            X_test (array-like): Scaled test features.
            y_test (array-like): Test labels.
        
        Returns:
            dict: Evaluation metrics (e.g., accuracy).
        """
        if self.model is None:
            raise ValueError("No trained model found. Please train the model first.")
        
        # Make predictions
        y_pred = self.model.predict(X_test)
        
        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred)
        
        print(f"Accuracy: {accuracy:.2f}")
        print("Classification Report:\n", report)
        
        return {"accuracy": accuracy, "report": report}
    
    def save_model(self, model_path="model.pkl", scaler_path="scaler.pkl"):
        """
        Save the trained model and scaler to disk.
        
        Args:
            model_path (str): Path to save the model.
            scaler_path (str): Path to save the scaler.
        """
        if self.model is None:
            raise ValueError("No trained model found. Please train the model first.")
        
        # Save the model and scaler
        with open(model_path, "wb") as f:
            pickle.dump(self.model, f)
        with open(scaler_path, "wb") as f:
            pickle.dump(self.scaler, f)
        
        print(f"Model saved to {model_path} and scaler saved to {scaler_path}.")
    
    def load_model(self, model_path="model.pkl", scaler_path="scaler.pkl"):
        """
        Load a pre-trained model and scaler from disk.
        
        Args:
            model_path (str): Path to load the model.
            scaler_path (str): Path to load the scaler.
        """
        # Load the model and scaler
        with open(model_path, "rb") as f:
            self.model = pickle.load(f)
        with open(scaler_path, "rb") as f:
            self.scaler = pickle.load(f)
        
        print(f"Model loaded from {model_path} and scaler loaded from {scaler_path}.")
    
    def predict(self, new_data):
        """
        Make predictions on new data using the trained model.
        
        Args:
            new_data (pd.DataFrame): New input data for prediction.
        
        Returns:
            array-like: Predicted labels.
        """
        if self.model is None or self.scaler is None:
            raise ValueError("Model or scaler not found. Please train or load them first.")
        
        # Scale the new data
        new_data_scaled = self.scaler.transform(new_data)
        
        # Make predictions
        predictions = self.model.predict(new_data_scaled)
        return predictions


# Example Usage
if __name__ == "__main__":
    # Load a sample dataset (replace with your dataset)
    data = pd.read_csv("Downloads/sample_classification_dataset.csv")  # Replace with your file path
    
    # Example: Convert probabilities into binary classes using a threshold
    #data['delay_prob_binarized'] = (data['delay_probability'] > 0.5).astype(int)
    target_column = "target"  # Replace with your target column name

    # Initialize ModelContext
    model_context = ModelContext()
    
    # Step 1: Preprocess the data
    X_train, X_test, y_train, y_test = model_context.preprocess_data(data, target_column)
    
    # Step 2: Train the model
    model_context.train_model(X_train, y_train)
    
    # Step 3: Evaluate the model
    evaluation_metrics = model_context.evaluate_model(X_test, y_test)
    
    # Step 4: Save the model and scaler
    model_context.save_model()
    
    # Step 5: Load the model and scaler (optional)
    new_model_context = ModelContext()
    new_model_context.load_model()
    
    # Step 6: Make predictions on new data
    new_data = pd.read_csv("Downloads/sample.csv")
    new_data = new_data.drop(columns=['target'], errors='ignore')

        # Add more features as needed
    predictions = new_model_context.predict(new_data)
    print("Predictions:", predictions)
    # Optional: Evaluate multiple classifiers including Voting Ensemble
    model_context.evaluate_ensemble_models(X_train, X_test, y_train, y_test)


Model training completed.
Accuracy: 0.97
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Model saved to model.pkl and scaler saved to scaler.pkl.
Model loaded from model.pkl and scaler loaded from scaler.pkl.
Predictions: [0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0]

🔍 Accuracy of Individual Classifiers:
MLP: 0.9825

🌐 Voting Ensemble Accuracy: 0.9825


In [26]:
# Load the trained model
with open("model.pkl", "rb") as model_file:
    loaded_model = pickle.load(model_file)

# Load the scaler
with open("scaler.pkl", "rb") as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

In [27]:
# Load your new data
new_data = pd.read_csv("Downloads/sample.csv")

# Drop columns not used during training (e.g., target or derived fields)
new_data = new_data.drop(columns=["target"], errors='ignore')

In [28]:
# Apply the loaded scaler to standardize new input
new_data_scaled = loaded_scaler.transform(new_data)

In [29]:
# Predict using the loaded model
predictions = loaded_model.predict(new_data_scaled)

# Output predictions
print("Predictions:", predictions)

Predictions: [0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0]


In [30]:
import pickle
import pandas as pd

def predict_from_file(model_path, scaler_path, data_path, drop_columns=None):
    """
    Load model and scaler, apply preprocessing to new data, and return predictions.
    
    Args:
        model_path (str): Path to the saved model pickle file.
        scaler_path (str): Path to the saved scaler pickle file.
        data_path (str): Path to the CSV file containing new data.
        drop_columns (list, optional): Columns to drop from the input before prediction.
    
    Returns:
        np.array: Predicted labels.
    """
    # Load model
    with open(model_path, "rb") as f:
        model = pickle.load(f)

    # Load scaler
    with open(scaler_path, "rb") as f:
        scaler = pickle.load(f)

    # Load new data
    new_data = pd.read_csv('Downloads/sample.csv')

    # Drop unnecessary columns (like the target or derived features)
    if drop_columns:
        new_data = new_data.drop(columns=drop_columns, errors='ignore')

    # Standardize features
    new_data_scaled = scaler.transform(new_data)

    # Make predictions
    predictions = model.predict(new_data_scaled)

    return predictions


In [31]:
preds = predict_from_file(
    model_path="model.pkl",
    scaler_path="scaler.pkl",
    data_path="Downloads/sample.csv",
    drop_columns=["delay_probability"]  # if needed
)

print("Predictions:", preds)

Predictions: [0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0]


## ✅ Step-by-Step: Save Your Code as .py File
## 🚀 Run the App with Streamlit
## Open Command Prompt or Anaconda Prompt

In [None]:
import streamlit as st
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

# Set page configuration
st.set_page_config(page_title="🩺 Breast Cancer Prediction App", layout="wide")

# Title and description
st.title("🩺 Breast Cancer Prediction App")
st.markdown("""
Upload your breast cancer dataset in CSV format to predict whether a tumor is benign or malignant using a pre-trained machine learning model.
""")

# Sidebar for file upload
st.sidebar.header("Upload Data")
uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")

# Load model and scaler
@st.cache_resource
def load_model_scaler(model_path="model.pkl", scaler_path="scaler.pkl"):
    with open(model_path, "rb") as f:
        model = pickle.load(f)
    with open(scaler_path, "rb") as f:
        scaler = pickle.load(f)
    return model, scaler

model, scaler = load_model_scaler()

# Perform prediction
if uploaded_file is not None:
    try:
        data = pd.read_csv(uploaded_file)
        st.subheader("📄 Uploaded Data Preview")
        st.write(data.head())

        # Drop unwanted columns if present
        if "diagnosis" in data.columns:
            data_features = data.drop(columns=["diagnosis"])
        else:
            data_features = data.copy()

        # Scale the data
        data_scaled = scaler.transform(data_features)

        # Make predictions
        predictions = model.predict(data_scaled)
        prediction_probabilities = model.predict_proba(data_scaled)[:, 1]

        # Add predictions to the dataframe
        data['Prediction'] = predictions
        data['Prediction Probability'] = prediction_probabilities

        # Map predictions to labels
        data['Prediction Label'] = data['Prediction'].map({0: 'Benign', 1: 'Malignant'})

        st.subheader("✅ Prediction Results")
        st.write(data[['Prediction Label', 'Prediction Probability']])

        # Visualization: Count plot of predictions
        st.subheader("📊 Prediction Distribution")
        fig, ax = plt.subplots(figsize=(10, 10))  # Set width to 4 inches and height to 3 inches
        sns.countplot(x='Prediction Label', data=data, palette='Set2', ax=ax)
        ax.set_title("Count of Predicted Labels")
        st.pyplot(fig)

        # Option to download
        csv = data.to_csv(index=False).encode('utf-8')
        st.download_button("📥 Download Predictions", csv, "predictions.csv", "text/csv")

    except Exception as e:
        st.error(f"An error occurred: {e}")
else:
    st.info("Please upload a CSV file to begin.")

from PIL import Image

# Load and display the image with a specified width
image = Image.open('Capture.jpg')
st.image(image, caption='Breast Cancer Awareness', width=300)