In [None]:
import pandas as pd
import joblib
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# 1. Load Data
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['diagnosis'] = data.target 

# 2. Select 5 features (Requirement: Part A, Section 2)
features = ['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness']
X = df[features]
y = df['diagnosis']

# 3. Preprocessing (Requirement: Mandatory Scaling)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 4. Train Logistic Regression (Requirement: Part A, Section 3)
model = LogisticRegression()
model.fit(X_train, y_train)

# 5. Evaluation (Requirement: Part A, Section 5)
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# 6. Save Model and Scaler (Requirement: Part A, Section 6)
# We save as a dictionary to keep them together
joblib.dump({'model': model, 'scaler': scaler}, 'model/breast_cancer_model.pkl')