In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt


kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Load the Excel file into a DataFrame
excel_file = r"your path"
df = pd.read_csv(excel_file)

# Remove rows with missing values
df_clean = df.dropna()

# Separate features (variables) and target
X = df_clean.iloc[:, :-1]
y = df_clean.iloc[:, -1]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the number of cross-validation folds
cv_folds = 10  # You can adjust this number

# Linear Regression with Cross-Validation
linear_reg = LinearRegression()
linear_reg_cv_scores = cross_val_score(linear_reg, X_train_scaled, y_train, cv=kf, scoring='r2')
linear_reg.fit(X_train_scaled, y_train)

# Lasso Regression with GridSearchCV and Cross-Validation
lasso = Lasso()
lasso_params = {'alpha': [0.01, 0.1, 1.0, 10.0]}
lasso_grid = GridSearchCV(lasso, lasso_params, cv=kf, scoring='r2')
lasso_grid.fit(X_train_scaled, y_train)

# Ridge Regression with GridSearchCV and Cross-Validation
ridge = Ridge()
ridge_params = {'alpha': [0.01, 0.1, 1.0, 10.0]}
ridge_grid = GridSearchCV(ridge, ridge_params, cv=kf, scoring='r2')
ridge_grid.fit(X_train_scaled, y_train)

# Random Forest with GridSearchCV and Cross-Validation
rf_regressor = RandomForestRegressor()
rf_params = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [10,15,20,25]
}
rf_grid = GridSearchCV(rf_regressor, rf_params, cv=kf, scoring='r2')
rf_grid.fit(X_train_scaled, y_train)

# Decision Tree Regressor with GridSearchCV and Cross-Validation
dt_regressor = DecisionTreeRegressor()
dt_params = {'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]}
dt_grid = GridSearchCV(dt_regressor, dt_params, cv=kf, scoring='r2')
dt_grid.fit(X_train_scaled, y_train)

# Predictions
linear_reg_pred = linear_reg.predict(X_test_scaled)
lasso_pred = lasso_grid.predict(X_test_scaled)
ridge_pred = ridge_grid.predict(X_test_scaled)
rf_pred = rf_grid.predict(X_test_scaled)
dt_pred = dt_grid.predict(X_test_scaled)

# Model evaluation
linear_reg_r2 = r2_score(y_test, linear_reg_pred)
lasso_r2 = r2_score(y_test, lasso_pred)
ridge_r2 = r2_score(y_test, ridge_pred)
rf_r2 = r2_score(y_test, rf_pred)
dt_r2 = r2_score(y_test, dt_pred)

linear_reg_mse = mean_squared_error(y_test, linear_reg_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)
ridge_mse = mean_squared_error(y_test, ridge_pred)
rf_mse = mean_squared_error(y_test, rf_pred)
dt_mse = mean_squared_error(y_test, dt_pred)

linear_reg_mae = mean_absolute_error(y_test, linear_reg_pred)
lasso_mae = mean_absolute_error(y_test, lasso_pred)
ridge_mae = mean_absolute_error(y_test, ridge_pred)
rf_mae = mean_absolute_error(y_test, rf_pred)
dt_mae = mean_absolute_error(y_test, dt_pred)

# Create a DataFrame for coefficients and importances
results = {
    'Model': ['Linear Regression', 'Lasso', 'Ridge', 'Random Forest', 'Decision Tree'],
    'R-squared': [linear_reg_r2, lasso_r2, ridge_r2, rf_r2, dt_r2],
    'MSE': [linear_reg_mse, lasso_mse, ridge_mse, rf_mse, dt_mse],
    'MAE': [linear_reg_mae, lasso_mae, ridge_mae, rf_mae, dt_mae]
}

coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Linear Regression Coefficients': linear_reg.coef_,
    'Lasso Coefficients': lasso_grid.best_estimator_.coef_,
    'Ridge Coefficients': ridge_grid.best_estimator_.coef_
})

importances = pd.DataFrame({
    'Feature': X.columns,
    'Random Forest Importances': rf_grid.best_estimator_.feature_importances_,
    'Decision Tree Importances': dt_grid.best_estimator_.feature_importances_
})

# Save to Excel
with pd.ExcelWriter(r"D:/programming/Parsian/ML Models/Results/decision_tree_results.xlsx") as writer:
    pd.DataFrame(results).to_excel(writer, sheet_name='Model Evaluation', index=False)
    coefficients.to_excel(writer, sheet_name='Coefficients', index=False)
    importances.to_excel(writer, sheet_name='Importances', index=False)

print("Results saved to D:/programming/Parsian/ML Models/Results/decision_tree_results.xlsx")

# Print results
print("Linear Regression:")
print(f"R-squared: {linear_reg_r2}")
print(f"MSE: {linear_reg_mse}")
print(f"MAE: {linear_reg_mae}")

print("\nLasso:")
print(f"R-squared: {lasso_r2}")
print(f"MSE: {lasso_mse}")
print(f"MAE: {lasso_mae}")

print("\nRidge:")
print(f"R-squared: {ridge_r2}")
print(f"MSE: {ridge_mse}")
print(f"MAE: {ridge_mae}")

print("\nRandom Forest Regressor:")
print(f"R-squared: {rf_r2}")
print(f"MSE: {rf_mse}")
print(f"MAE: {rf_mae}")

print("\nDecision Tree Regressor:")
print(f"R-squared: {dt_r2}")
print(f"MSE: {dt_mse}")
print(f"MAE: {dt_mae}")


Results saved to D:/programming/Parsian/ML Models/Results/decision_tree_results.xlsx
Linear Regression:
R-squared: 0.9999982803305351
MSE: 0.012384680824799382
MAE: 0.08960994532578354

Lasso:
R-squared: 0.9999982922868341
MSE: 0.01229857419184808
MAE: 0.08936470640315984

Ridge:
R-squared: 0.9999983275808078
MSE: 0.012044394765242742
MAE: 0.08740994561260172

Random Forest Regressor:
R-squared: 0.7863711136434794
MSE: 1538.508199685181
MAE: 24.724019729380863

Decision Tree Regressor:
R-squared: 0.8707292168083766
MSE: 930.9797158616673
MAE: 24.74595915952063
