Title: Cross-Validation


Task 1: K-Fold Cross-Validation for House Prices<br>
Apply K-Fold Cross-Validation (K=5) to check variability in performance.

In [1]:
# Write your code here
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

X, y = make_regression(n_samples=1000, n_features=5, noise=0.1, random_state=42)

model = LinearRegression()

cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')

print(f"Cross-Validation Scores: {cv_scores}")
print(f"Mean MSE: {-cv_scores.mean():.2f}")


Cross-Validation Scores: [-0.01102741 -0.01153942 -0.01052537 -0.01036155 -0.01120516]
Mean MSE: 0.01


Task 2: Stratified K-Fold for Imbalanced Churn Dataset<br>
Use Stratified K-Fold to ensure each class is represented.

In [2]:
# Write your code here
from sklearn.datasets import make_classification
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, weights=[0.7, 0.3], random_state=42)

model = RandomForestClassifier(random_state=42)
skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

accuracies = []
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

print(f"Stratified K-Fold Accuracies: {accuracies}")
print(f"Mean Accuracy: {sum(accuracies)/len(accuracies):.2f}")


Stratified K-Fold Accuracies: [0.915, 0.94, 0.905, 0.925, 0.91]
Mean Accuracy: 0.92


Task 3: Leave-One-Out Cross-Validation for Iris<br>
Use LOOCV to assess model prediction for the Iris dataset.

In [None]:

from sklearn.datasets import load_iris
from sklearn.model_selection import LeaveOneOut
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

data = load_iris()
X = data.data
y = data.target

model = DecisionTreeClassifier(random_state=42)
loo = LeaveOneOut()

accuracies = []
for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)

print(f"Leave-One-Out Cross-Validation Accuracies: {accuracies}")
print(f"Mean Accuracy: {sum(accuracies)/len(accuracies):.2f}")


Leave-One-Out Cross-Validation Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
Mean Accuracy: 0.94
