In [ ]:
%run data_loader.ipynb

In [ ]:
# https://deap.readthedocs.io/en/master/index.html
# https://deap.readthedocs.io/en/master/tutorials/basic/part1.html


In [3]:
%pip install deap

Collecting deap
  Downloading deap-1.4.1-cp39-cp39-macosx_11_0_x86_64.whl.metadata (13 kB)
Downloading deap-1.4.1-cp39-cp39-macosx_11_0_x86_64.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.7/104.7 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: deap
Successfully installed deap-1.4.1
Note: you may need to restart the kernel to use updated packages.


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
from deap import base, creator, tools, algorithms
import numpy as np
from sklearn.model_selection import train_test_split

# Define problem to DEAP (we aim to minimize RMSE, hence weights=(-1.0,))
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("attr_bool", np.random.randint, 0, 2)
toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_bool, n=X.shape[1])
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Define the fitness function for regression
def evalModel(individual):
    X_selected = X[:, [i for i, bit in enumerate(individual) if bit == 1]]
    X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

    if X_train.shape[1] == 0:  # Prevent training with 0 features
        return (float("inf"),)  # Return infinite RMSE as a penalty

    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = sqrt(mean_squared_error(y_test, predictions))  # Calculate RMSE
    return (rmse,)

toolbox.register("evaluate", evalModel)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

# Parameters for GA
population_size = 50
crossover_probability = 0.5
mutation_probability = 0.2
number_of_generations = 40

pop = toolbox.population(n=population_size)

# Run the Genetic Algorithm
result = algorithms.eaSimple(pop, toolbox, cxpb=crossover_probability,
                             mutpb=mutation_probability, ngen=number_of_generations,
                             verbose=True)




gen	nevals
0  	50    
1  	30    
2  	36    
3  	31    
4  	31    
5  	31    
6  	30    
7  	21    
8  	26    
9  	28    
10 	27    
11 	29    


In [ ]:
print("Best Score:", best_score[-1])
# mse = mean_squared_error(y_test, gbr_pred)
# r_two_score = r2_score(y_test, gbr_pred)
# ex_variance_score = explained_variance_score(y_test, gbr_pred)

print(f"GA algorithm  took {duration:.2f} seconds.")
# print(f'Mean Squared Error on Test Set: {mse}')
# print(f'r2 score: {r_two_score}')
# print(f'explained variance score: {ex_variance_score}')

In [ ]:

x_train_genetic = x_train[:, best_chromo[-1]]
x_test_genetic = x_test[:, best_chromo[-1]]
n_estimators=140
# Training classifiers
grad_boosting_regressor.fit(x_train_genetic, y_train)
ada_boosting_regressor.fit(x_train_genetic, y_train)
bagging_regressor.fit(x_train_genetic, y_train)
random_forest_regressor.fit(x_train_genetic, y_train)
linear_regr.fit(x_train_genetic, y_train)
voting_reg.fit(x_train_genetic, y_train) 

In [ ]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score

print('mean squared error', 'r2 score', 'explained variance score')
print('GradientBoostingRegressor',mean_squared_error(y_test, gdb_prediction),r2_score(y_test, gdb_prediction), explained_variance_score(y_test, gdb_prediction))
print('RandomForestRegressor',mean_squared_error(y_test, rf_prediction),r2_score(y_test, rf_prediction), explained_variance_score(y_test, rf_prediction))
print('LinearRegression',mean_squared_error(y_test, lg_prediction),r2_score(y_test, lg_prediction), explained_variance_score(y_test, lg_prediction))
print('AdaBoostRegressor',mean_squared_error(y_test, ada_b_prediction),r2_score(y_test, ada_b_prediction), explained_variance_score(y_test, ada_b_prediction))
print('BaggingRegressor',mean_squared_error(y_test, bagging_r_prediction),r2_score(y_test, bagging_r_prediction), explained_variance_score(y_test, bagging_r_prediction))
print('VotingRegressor',mean_squared_error(y_test, voting_reg_prediction),r2_score(y_test, voting_reg_prediction), explained_variance_score(y_test, voting_reg_prediction))