In [8]:
# Final Project Notebook
# Team Members: [Add names here]
# Dataset: MAGIC Gamma Telescope & California Housing Dataset

# === Import Libraries ===
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [None]:
df = pd.read_csv('data/magic04.data')

X = df.iloc[:, 1:]  # Features
y = df.iloc[:, 0]   # Target (House Value)

# Split the data into train, temp (val+test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# Split temp into validation and test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Initialize scaler
scaler = StandardScaler()

# Fit on train set, transform all sets
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Return as DataFrames if needed
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns=X.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X.columns)

# Output for confirmation
print(f"X_train_scaled shape: {X_train_scaled.shape}")
print(f"X_val_scaled shape: {X_val_scaled.shape}")
print(f"X_test_scaled shape: {X_test_scaled.shape}")



In [10]:
k_values = [3, 5, 7, 9]
classification_results = {}

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_class, y_train_class)
    y_pred_class = knn.predict(X_val_class)

    classification_results[k] = {
        'accuracy': accuracy_score(y_val_class, y_pred_class),
        'precision': precision_score(y_val_class, y_pred_class, pos_label='g'),
        'recall': recall_score(y_val_class, y_pred_class, pos_label='g'),
        'f1_score': f1_score(y_val_class, y_pred_class, pos_label='g'),
        'confusion_matrix': confusion_matrix(y_val_class, y_pred_class)
    }

# Display results
for k, metrics in classification_results.items():
    print(f"\nResults for k = {k}")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")



Results for k = 3
accuracy: 0.7522432701894317
precision: 0.7240461401952085
recall: 0.8143712574850299
f1_score: 0.7665570690465007
confusion_matrix: [[816 186]
 [311 693]]

Results for k = 5
accuracy: 0.7666999002991027
precision: 0.7358657243816255
recall: 0.8313373253493014
f1_score: 0.7806935332708529
confusion_matrix: [[833 169]
 [299 705]]

Results for k = 7
accuracy: 0.7602193419740778
precision: 0.7263249348392702
recall: 0.8343313373253493
f1_score: 0.7765908035299582
confusion_matrix: [[836 166]
 [315 689]]

Results for k = 9
accuracy: 0.7627118644067796
precision: 0.7247863247863248
recall: 0.846307385229541
f1_score: 0.7808471454880295
confusion_matrix: [[848 154]
 [322 682]]


In [11]:
# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train_reg, y_train_reg)

y_pred_lr = lr_model.predict(X_val_reg)
mse_lr = mean_squared_error(y_val_reg, y_pred_lr)
mae_lr = mean_absolute_error(y_val_reg, y_pred_lr)

# Lasso Regression
lasso_model = Lasso(alpha=1.0)
lasso_model.fit(X_train_reg, y_train_reg)

y_pred_lasso = lasso_model.predict(X_val_reg)
mse_lasso = mean_squared_error(y_val_reg, y_pred_lasso)
mae_lasso = mean_absolute_error(y_val_reg, y_pred_lasso)

# Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train_reg, y_train_reg)

y_pred_ridge = ridge_model.predict(X_val_reg)
mse_ridge = mean_squared_error(y_val_reg, y_pred_ridge)
mae_ridge = mean_absolute_error(y_val_reg, y_pred_ridge)

# Store all results
regression_results = {
    'linear': (mse_lr, mae_lr),
    'lasso': (mse_lasso, mae_lasso),
    'ridge': (mse_ridge, mae_ridge)
}

# Display results
print("\nRegression Results:")
print(f"Linear Regression: MSE = {mse_lr}, MAE = {mae_lr}")
print(f"Lasso Regression: MSE = {mse_lasso}, MAE = {mae_lasso}")
print(f"Ridge Regression: MSE = {mse_ridge}, MAE = {mae_ridge}")



Regression Results:
Linear Regression: MSE = 4907211997.374585, MAE = 50790.06027105437
Lasso Regression: MSE = 4907215960.794804, MAE = 50790.32397510188
Ridge Regression: MSE = 4907226928.2478, MAE = 50790.607314504065


  model = cd_fast.enet_coordinate_descent(


In [15]:
print("\n=== Final Report ===\n")

# Classification Summary
print("### Classification Results Summary\n")
for k, metrics in classification_results.items():
    print(f"k = {k}")
    for metric, value in metrics.items():
        print(f"  {metric}: {value}")
    print()

# Regression Summary
print("### Regression Results Summary\n")
print(f"Linear Regression: MSE = {mse_lr}, MAE = {mae_lr}")
print(f"Lasso Regression: MSE = {mse_lasso}, MAE = {mae_lasso}")
print(f"Ridge Regression: MSE = {mse_ridge}, MAE = {mae_ridge}\n")




=== Final Report ===

### Classification Results Summary

**k = 3**
  accuracy: 0.7522432701894317
  precision: 0.7240461401952085
  recall: 0.8143712574850299
  f1_score: 0.7665570690465007
  confusion_matrix: [[816 186]
 [311 693]]

**k = 5**
  accuracy: 0.7666999002991027
  precision: 0.7358657243816255
  recall: 0.8313373253493014
  f1_score: 0.7806935332708529
  confusion_matrix: [[833 169]
 [299 705]]

**k = 7**
  accuracy: 0.7602193419740778
  precision: 0.7263249348392702
  recall: 0.8343313373253493
  f1_score: 0.7765908035299582
  confusion_matrix: [[836 166]
 [315 689]]

**k = 9**
  accuracy: 0.7627118644067796
  precision: 0.7247863247863248
  recall: 0.846307385229541
  f1_score: 0.7808471454880295
  confusion_matrix: [[848 154]
 [322 682]]

### Regression Results Summary

Linear Regression: MSE = 4907211997.374585, MAE = 50790.06027105437
Lasso Regression: MSE = 4907215960.794804, MAE = 50790.32397510188
Ridge Regression: MSE = 4907226928.2478, MAE = 50790.607314504065

