In [1]:

import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score


In [2]:

# Load the training and test datasets from the "dataset" folder
X_train = pd.read_csv('Dataset/X_train.csv')
X_test = pd.read_csv('Dataset/X_test.csv')
y_train = pd.read_csv('Dataset/y_train.csv')
y_test = pd.read_csv('Dataset/y_test.csv')


In [3]:

# Linear Regression
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)
y_pred_lr = linear_reg.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)


print(f"Mean Squared Error (MSE): {mse_lr:.4f}")
print(f"R-squared (R²) Score: {r2_lr:.4f}")



Mean Squared Error (MSE): 0.4119
R-squared (R²) Score: 0.4195


In [5]:

# Ridge Regression
ridge_best_alpha = 0.1  # As determined earlier
ridge_best = Ridge(alpha=ridge_best_alpha)
ridge_best.fit(X_train, y_train)
y_pred_ridge = ridge_best.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)


print(f"Mean Squared Error (MSE): {mse_ridge:.4f}")
print(f"R-squared (R²) Score: {r2_ridge:.4f}")



Mean Squared Error (MSE): 0.4119
R-squared (R²) Score: 0.4195


In [6]:

# Lasso Regression
lasso_best_alpha = 0.0001  # As determined earlier
lasso_best = Lasso(alpha=lasso_best_alpha)
lasso_best.fit(X_train, y_train)
y_pred_lasso = lasso_best.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

mse_lasso, r2_lasso

print(f"Mean Squared Error (MSE): {mse_ridge:.4f}")
print(f"R-squared (R²) Score: {r2_ridge:.4f}")


Mean Squared Error (MSE): 0.4119
R-squared (R²) Score: 0.4195


In [3]:
# Train a Random Forest Regressor with default parameters
from sklearn.ensemble import RandomForestRegressor


rf_default = RandomForestRegressor(random_state=42)
rf_default.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_default.predict(X_test)

# Evaluate the Random Forest model
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)


print(f"Mean Squared Error (MSE): {mse_rf:.4f}")
print(f"R-squared (R²) Score: {r2_rf:.4f}")



  return fit_method(estimator, *args, **kwargs)


Mean Squared Error (MSE): 0.0008
R-squared (R²) Score: 0.9989


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, r2_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)  # Input to hidden layer
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)  # Hidden layer to hidden layer
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 1)  # Hidden layer to output layer

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Initialize the neural network and move it to the GPU
input_dim = X_train.shape[1]
model = SimpleNN(input_dim).to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move data to the GPU
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device)

# Training loop
epochs = 5
batch_size = 32
for epoch in range(epochs):
    for i in range(0, len(X_train), batch_size):
        optimizer.zero_grad()
        batch_X = X_train_tensor[i:i+batch_size]
        batch_y = y_train_tensor[i:i+batch_size]
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

# Predict on the test set
y_pred_tensor = model(X_test_tensor)
y_pred_nn = y_pred_tensor.cpu().detach().numpy()

# Evaluate the model
mse_nn = mean_squared_error(y_test, y_pred_nn)
r2_nn = r2_score(y_test, y_pred_nn)
print(f"Mean Squared Error (MSE): {mse_nn:.4f}")
print(f"R-squared (R²) Score: {r2_nn:.4f}")


Mean Squared Error (MSE): 0.0095
R-squared (R²) Score: 0.9867


In [5]:
from sklearn.ensemble import GradientBoostingRegressor

# Initialize Gradient Boosting Regressor with default parameters
gbm = GradientBoostingRegressor(random_state=42)
gbm.fit(X_train, y_train)

# Predict on the test set
y_pred_gbm = gbm.predict(X_test)

# Evaluate the Gradient Boosting model
mse_gbm = mean_squared_error(y_test, y_pred_gbm)
r2_gbm = r2_score(y_test, y_pred_gbm)

print(f"Mean Squared Error (MSE): {mse_gbm:.4f}")
print(f"R-squared (R²) Score: {r2_gbm:.4f}")

  y = column_or_1d(y, warn=True)


Mean Squared Error (MSE): 0.1854
R-squared (R²) Score: 0.7386


In [6]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score,f1_score

# Initialize and train the K-Nearest Neighbors Regressor
knn_reg = KNeighborsRegressor(n_neighbors=5)  # Using 5 neighbors as a start
knn_reg.fit(X_train, y_train)

# Predict on the test set
y_pred_knn = knn_reg.predict(X_test)

# Evaluate the model
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print(f"Mean Squared Error (MSE): {mse_knn:.4f}")
print(f"R-squared (R²) Score: {r2_knn:.4f}")

Mean Squared Error (MSE): 0.1659
R-squared (R²) Score: 0.7661


In [8]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Initialize and train the Decision Tree Regressor
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)

# Predict on the test set
y_pred_dt = dt_reg.predict(X_test)

# Evaluate the model
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

print(f"Mean Squared Error (MSE): {mse_dt:.4f}")
print(f"R-squared (R²) Score: {r2_dt:.4f}")


Mean Squared Error (MSE): 0.0011
R-squared (R²) Score: 0.9984
