### **Load and Test-Train Split the Data**

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', as_frame=False)
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

### **Train a Binary Classifier**

In [None]:
from sklearn.linear_model import SGDClassifier
y_train_5 = (y_train == '5')
y_test_5 = (y_test == '5')
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train_5)
some_digit = X[0]
sgd_clf.predict([some_digit])

---
### Step 1: Get Cross-Validated Predictions
* Use `cross_val_predict()` to generate out-of-sample predictions (i.e., each prediction is made on data not seen during training):Use 3-fold cross-validation to get a more reliable estimate of your model’s accuracy:

In [None]:
from sklearn.model_selection import cross_val_predict

y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

### Step 2: Compute the Confusion Matrix

* Now compare the actual labels `(y_train_5)` with the predicted ones:

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(y_train_5, y_train_pred)
print(conf_matrix)

### Step 3: Compute Precision and Recall

**This means:**

- 83.7% of predicted 5s are actually 5s (high precision)
- But only 65.1% of actual 5s were detected (lower recall)

In [None]:
from sklearn.metrics import precision_score, recall_score

precision = precision_score(y_train_5, y_train_pred)
recall = recall_score(y_train_5, y_train_pred)

print("Precision:", precision)
print("Recall:", recall)