# AI/ML Mini Project Starter Notebook

This notebook demonstrates a minimal workflow:
- Load a dataset
- Split train/test
- Train a simple model (LogisticRegression)
- Evaluate with accuracy and a confusion matrix


In [None]:
# Imports
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid", context="notebook")


In [None]:
# Load dataset
raw = load_breast_cancer()
X = pd.DataFrame(raw.data, columns=raw.feature_names)
y = pd.Series(raw.target, name="target")

print(f"Shapes: X={X.shape}, y={y.shape}")
X.head()


In [None]:
# Train/test split and scaling
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(X_train_scaled.shape, X_test_scaled.shape)


In [None]:
# Train logistic regression
clf = LogisticRegression(max_iter=1000, n_jobs=None)
clf.fit(X_train_scaled, y_train)

# Evaluate
preds = clf.predict(X_test_scaled)
acc = accuracy_score(y_test, preds)
print(f"Accuracy: {acc:.3f}")


In [None]:
# Confusion matrix
cm = confusion_matrix(y_test, preds)
fig, ax = plt.subplots(figsize=(4, 4))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=raw.target_names)
disp.plot(ax=ax, cmap="Blues", colorbar=False)
ax.set_title("Confusion Matrix")
plt.tight_layout()
plt.show()
