# Importing Required Libraries

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler


# Loading and Preprocessing Data

In [None]:
df = pd.read_csv('data/magic04.data')

X = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target

# Split the data into train, temp (val+test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# Split temp into validation and test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Confirming everything is okay (size)
print(f'X_train_scaled shape: {X_train_scaled.shape}')
print(f'X_val_scaled shape: {X_val_scaled.shape}')
print(f'X_test_scaled shape: {X_test_scaled.shape}')


# K-Nearest Neighbors Classification

In [12]:
k_values = [3, 5, 7, 9]
classification_results = {}

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_val)

    classification_results[k] = {
        'accuracy': accuracy_score(y_val, y_pred),
        'precision': precision_score(y_val, y_pred, pos_label='g', zero_division=0),
        'recall': recall_score(y_val, y_pred, pos_label='g', zero_division=0),
        'f1_score': f1_score(y_val, y_pred, pos_label='g', zero_division=0),
        'confusion_matrix': confusion_matrix(y_val, y_pred)
    }

# Display results
for k, metrics in classification_results.items():
    print(f'
Results for k = {k}')
    for metric, value in metrics.items():
        print(f'{metric}: {value}')


# Regression Models: Linear, Lasso, and Ridge

In [13]:
df = pd.read_csv('data/California_Houses.csv')

X = df.iloc[:, 1:]  # Features
y = df.iloc[:, 0]   # Target (House Value)

# Split the data into train, temp (val+test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# Split temp into validation and test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_val)
mse_lr = mean_squared_error(y_val, y_pred_lr)
mae_lr = mean_absolute_error(y_val, y_pred_lr)

# Lasso Regression
lasso_model = Lasso(alpha=1.0)
lasso_model.fit(X_train, y_train)
y_pred_lasso = lasso_model.predict(X_val)
mse_lasso = mean_squared_error(y_val, y_pred_lasso)
mae_lasso = mean_absolute_error(y_val, y_pred_lasso)

# Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_val)
mse_ridge = mean_squared_error(y_val, y_pred_ridge)
mae_ridge = mean_absolute_error(y_val, y_pred_ridge)

# Display results
print(f'Linear Regression: MSE = {mse_lr}, MAE = {mae_lr}')
print(f'Lasso Regression: MSE = {mse_lasso}, MAE = {mae_lasso}')
print(f'Ridge Regression: MSE = {mse_ridge}, MAE = {mae_ridge}')
