In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from deap import base, creator, tools, algorithms
from sklearn.neural_network import MLPClassifier

In [2]:
# Load the dataset
# df = pd.read_excel('../../dataset/Dataset_IoV.xlsx')  
df = pd.read_csv('..//..//dataset//preliminary_dataset.csv')  


# Separating features and target
X = df[['Number of Lanes', 'Status of driver', 'Nature of environment', 'Velocity of vehicle', 'Distance between vehicles', 'Breaking capability']]
y = df['Collussion']
# scaler=RobustScaler()
# X_scaled = scaler.fit_transform(X)

# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)
X_scaled = X

# Reshape input to be 3D [samples, timesteps, features] for LSTM
# X_scaled = np.reshape(X_scaled, (X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [3]:
print(X_train.shape, y_train.shape,X_scaled.shape)

(32000, 6) (32000,) (40000, 6)


# Model

In [4]:
# BPANN model (1 hidden layer)
def build_bpann():
    model = MLPClassifier(hidden_layer_sizes=(50,), activation='relu', solver='adam', max_iter=50, batch_size=64)
    return model

def fitness_function(individual):
    # Convert the individual values to integers
    hidden_layer_size = int(individual[0])  # Ensure it's an integer
    model = MLPClassifier(hidden_layer_sizes=(hidden_layer_size,), activation='relu', solver='adam', max_iter=200, tol=1e-4, early_stopping=True)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse,


In [5]:
# Genetic Algorithm setup
if not hasattr(creator, "FitnessMin"):
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))  # Minimize MSE

if not hasattr(creator, "Individual"):
    creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("attr_int", np.random.randint, 1, 5)  # Random hidden layer size
# Modify the individual length by adding more parameters (hidden layers)
toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.attr_int, toolbox.attr_int, toolbox.attr_int), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=100, sigma=50, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", fitness_function)

# Create initial population
population = toolbox.population(n=10)

# Algorithm parameters
generations = 5
cx_prob = 0.7
mut_prob = 0.2

In [6]:
# Custom progress tracking within the evolutionary loop
for generation in range(generations):
    print(f"Generation {generation + 1}/{generations}")

    # Evaluate the population
    fitnesses = list(map(toolbox.evaluate, population))
    
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    # Select the next generation
    offspring = toolbox.select(population, len(population))
    offspring = list(map(toolbox.clone, offspring))

    # Apply crossover and mutation
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if len(child1) > 1 and np.random.rand() < cx_prob:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if np.random.rand() < mut_prob:
            toolbox.mutate(mutant)
            del mutant.fitness.values

    # Evaluate the fitness of the new offspring
    invalid_individuals = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = list(map(toolbox.evaluate, invalid_individuals))

    for ind, fit in zip(invalid_individuals, fitnesses):
        ind.fitness.values = fit

    # Replace the old population with the new offspring
    population[:] = offspring

    # Print best fitness for current generation
    best_individual = tools.selBest(population, 1)[0]
    print(f"Best Fitness (MSE): {best_individual.fitness.values[0]:.5f}")


Generation 1/5
Best Fitness (MSE): 0.01787
Generation 2/5
Best Fitness (MSE): 0.01787
Generation 3/5
Best Fitness (MSE): 0.01725
Generation 4/5
Best Fitness (MSE): 0.01787
Generation 5/5
Best Fitness (MSE): 0.01675


In [8]:
best_individual = tools.selBest(population, 1)[0]

# Use the best individual to train and evaluate the final BPANN model
best_model = MLPClassifier(hidden_layer_sizes=(int(best_individual[0]), int(best_individual[1]), int(best_individual[2])), activation='relu', solver='adam', max_iter=50)
best_model.fit(X_train, y_train)



In [9]:
from sklearn.metrics import mean_squared_error
import numpy as np
# Make predictions
y_pred_best  = best_model.predict(X_test)

In [10]:
# Calculate MSE and RMSE for the best model
mse_best = mean_squared_error(y_test, y_pred_best)
rmse_best = np.sqrt(mse_best)

print(f'Mean Squared Error for BPANN-GA (MSE): {mse_best:.16f}')
print(f'Root Mean Squared Error for BPANN-GA (RMSE): {rmse_best:.16f}')

Mean Squared Error for BPANN-GA (MSE): 0.0177500000000000
Root Mean Squared Error for BPANN-GA (RMSE): 0.1332291259447423
