In [None]:
import numpy as np
import random
import pandas as pd
from keras.layers import Input, Dense, Dropout, Bidirectional, SimpleRNN, Reshape
from keras.models import Model
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

data = pd.read_csv('/content/insurance.csv')

In [None]:
#sex
le = LabelEncoder()
le.fit(data.sex.drop_duplicates()) 
data.sex = le.transform(data.sex)
# smoker or not
le.fit(data.smoker.drop_duplicates()) 
data.smoker = le.transform(data.smoker)
#region
le.fit(data.region.drop_duplicates()) 
data.region = le.transform(data.region)

In [None]:
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,3,16884.924
1,18,1,33.77,1,0,2,1725.5523
2,28,1,33.0,3,0,2,4449.462
3,33,1,22.705,0,0,1,21984.47061
4,32,1,28.88,0,0,1,3866.8552


In [None]:
df = data.copy()

In [None]:
y = df["charges"]
x = df.drop(['charges'], axis = 1)

In [None]:
# convert 'charges' to 4 classes
y = pd.cut(y, bins=4, labels=[0, 1, 2, 3])

In [None]:
y.shape

(1338,)

In [None]:
x.shape

(1338, 6)

In [None]:
x.head()

Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,0,27.9,0,1,3
1,18,1,33.77,1,0,2
2,28,1,33.0,3,0,2
3,33,1,22.705,0,0,1
4,32,1,28.88,0,0,1


In [None]:
y.head()

0    1
1    0
2    0
3    1
4    0
Name: charges, dtype: category
Categories (4, int64): [0 < 1 < 2 < 3]

In [None]:
y = to_categorical(y)

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y, random_state = 0,train_size=0.8)

In [None]:
print("Accuracy:", (accuracy_score(y_test, ((RandomForestClassifier(n_estimators=25).fit(x_train,y_train)).predict(x_test)))))

Accuracy: 0.8992537313432836


In [None]:
# Define the search space for the Random Forest Classifier
rfc_search_space = {
    'n_estimators': range(10, 101),
    'criterion': ['gini', 'entropy'],
    'max_depth': range(2, 51),
    'min_samples_split': range(2, 11),
    'min_samples_leaf': range(1, 11),
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False]
}

# Initialize a Random Forest Classifier
rfc = RandomForestClassifier()

# Perform Random Search with cross-validation to find the best hyperparameters
random_search = RandomizedSearchCV(estimator=rfc, param_distributions=rfc_search_space, n_iter=100, cv=5)
random_search.fit(x_train, y_train)

# Initialize a Random Forest Classifier with the best hyperparameters
best_params = random_search.best_params_
rfc = RandomForestClassifier(**best_params)

# Fit the Random Forest Classifier to the training set
rfc.fit(x_train, y_train)

# Make predictions on the testing set
y_pred = rfc.predict(x_test)

# Evaluate the accuracy of the Random Forest Classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9216417910447762


In [None]:
# Search space
layer_choices = [2, 3, 4, 5]
unit_choices = [1, 2, 4]
dropout_choices = [0.25, 0.5]

# Hyperparameters
population_size = 250
num_generations = 25
mutation_rate = 0.25
crossover_rate = 0.75

In [None]:
def generate_random_architecture():
    num_layers = random.choice(layer_choices)
    architecture = []
    for i in range(num_layers):
        units = random.choice(unit_choices)
        dropout = random.choice(dropout_choices)
        layer = [units, dropout]
        architecture.append(layer)
    return architecture

def create_model(architecture):
    input_layer = Input(shape=(6,))
    x = input_layer
    for layer in architecture:
        units = layer[0]
        dropout = layer[1]
        x = Dense(units, activation='relu')(x)
        x = Dropout(dropout)(x)
    output_layer = Dense(4, activation='softmax')(x)
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

def evaluate_architecture(architecture, X_train, y_train, X_val, y_val):
    model = create_model(architecture)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, batch_size=256, epochs=10, validation_data=(X_val, y_val), verbose=0, shuffle=True)
    _, accuracy = model.evaluate(X_val, y_val, verbose=0)
    return accuracy

def create_new_population(population, fitness_scores):
    sum_fitness = sum(fitness_scores)
    fitness_probs = [fitness_score / sum_fitness for fitness_score in fitness_scores]
    new_population = []
    for i in range(len(population)):
        parent1_idx = np.random.choice(len(population), p=fitness_probs)
        parent2_idx = np.random.choice(len(population), p=fitness_probs)
        parent1 = population[parent1_idx]
        parent2 = population[parent2_idx]
        if random.random() < crossover_rate:
            crossover_point = random.randint(1, len(parent1)-1)
            child1 = parent1[:crossover_point] + parent2[crossover_point:]
            child2 = parent2[:crossover_point] + parent1[crossover_point:]
            children = [child1, child2]
        else:
            children = [parent1, parent2]
        for child in children:
            for i in range(len(child)):
                if random.random() < mutation_rate:
                    child[i][0] = random.choice(unit_choices)
                    child[i][1] = random.choice(dropout_choices)
            new_population.append(child)
    return new_population

In [None]:
# Run GA to search for best architecture
best_architecture = None
best_fitness = 0.0
population = [generate_random_architecture() for _ in range(population_size)]
for generation in range(num_generations):
    fitness_scores = [evaluate_architecture(architecture, x_train, y_train, x_test, y_test) for architecture in population]
    best_architecture_idx = np.argmax(fitness_scores)
    if fitness_scores[best_architecture_idx] > best_fitness:
        best_architecture = population[best_architecture_idx]
        best_fitness = fitness_scores[best_architecture_idx]
    print(f"Generation {generation + 1} out of {num_generations}: Best Fitness = {best_fitness:.4f}")
    population = create_new_population(population, fitness_scores)

Generation 1 out of 25: Best Fitness = 0.7463
Generation 2 out of 25: Best Fitness = 0.7463
Generation 3 out of 25: Best Fitness = 0.7463
