In [1]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score

# Load the datasets
train_data = pd.read_csv('training_testing_validation_data/training_set.csv')
validation_data = pd.read_csv('training_testing_validation_data/validation_set.csv')
test_data = pd.read_csv('training_testing_validation_data/test_set.csv')

# Combine training and validation data
combined_data = pd.concat([train_data, validation_data])
X_combined = combined_data.drop(columns=['Alpha'])
y_combined = combined_data['Alpha']

X_test = test_data.drop(columns=['Alpha'])
y_test = test_data['Alpha']

# Define the models
models = {
    'RandomForest': RandomForestRegressor(),
    'Lasso': Lasso()
}

results = {}

# Train models using PCA (70% variance)
pca_70 = PCA(n_components=0.7)
X_combined_pca_70 = pca_70.fit_transform(X_combined)
X_test_pca_70 = pca_70.transform(X_test)

for name, model in models.items():
    model.fit(X_combined_pca_70, y_combined)
    y_pred = model.predict(X_test_pca_70)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[f"{name}_PCA70"] = {'MSE': mse, 'R2': r2}

# Display the results
for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print(f"  Mean Squared Error: {metrics['MSE']}")
    print(f"  R2 Score: {metrics['R2']}")
    print()

print("Final model training and evaluation completed. Results saved to 'final_model_results.csv'.")


Model: RandomForest_PCA70
  Mean Squared Error: 1005.5921552665295
  R2 Score: 0.5437804331634974

Model: Lasso_PCA70
  Mean Squared Error: 1000.9281512082458
  R2 Score: 0.5458964102025485

Final model training and evaluation completed. Results saved to 'final_model_results.csv'.
