In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import joblib

# Load the dataset from UCI repository
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'
columns = ['ID', 'Diagnosis'] + [f'feature_{i}' for i in range(1, 31)]
data = pd.read_csv(url, header=None, names=columns)

# Preprocess the dataset
X = data.drop(['ID', 'Diagnosis'], axis=1)
y = data['Diagnosis'].map({'M': 1, 'B': 0})  # Map 'M' to 1 and 'B' to 0 for binary classification

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a StandardScaler instance and scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Save the model and scaler
joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [3]:
# Function for making predictions
def predict_new_data(features):
    # Load the model and scaler
    model = joblib.load('model.pkl')
    scaler = joblib.load('scaler.pkl')

    # Scale the input features
    features_scaled = scaler.transform(np.array(features).reshape(1, -1))

    # Make a prediction
    prediction = model.predict(features_scaled)
    probability = model.predict_proba(features_scaled)[0, 1]
    return ('Positive' if prediction[0] == 1 else 'Negative', probability)

# Test the function with sample data
sample_features = X_test.iloc[0].tolist()
prediction, probability = predict_new_data(sample_features)
print(f"Prediction: {prediction}, Probability: {probability:.2f}")

Prediction: Negative, Probability: 0.11


