# FIS Optimization Experiment: Genetic Algorithm

In [1]:
import sys
import os

current_dir = os.getcwd()

project_root = os.path.abspath(os.path.join(current_dir, '../'))

if project_root not in sys.path:
    sys.path.append(project_root)

In [2]:
import time
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, classification_report

from optimizations.ga.ga import GAFISOptimizer
from fis.fis_model import FISChatbot

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
DATA_DIR = os.path.join(project_root, "..", "data", "processed")

In [4]:
X_train = np.load(os.path.join(DATA_DIR, "X_train.npy"))
X_test = np.load(os.path.join(DATA_DIR, "X_test.npy"))
y_train = np.load(os.path.join(DATA_DIR, "y_train.npy"))
y_test = np.load(os.path.join(DATA_DIR, "y_test.npy"))

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape:  {X_test.shape}")
print(f"y_test shape:  {y_test.shape}")

X_train shape: (12800, 6)
y_train shape: (12800,)
X_test shape:  (3200, 6)
y_test shape:  (3200,)


In [5]:
# Cell: Run Experiments

N_EXPERIMENTS = 5       
POPULATION_SIZE = 20    
GENERATIONS = 30        
BATCH_SIZE = 200
EARLY_STOPPING = 10     # Stop if no improvement

results = []
all_histories = []

print(f"{'='*60}")
print(f"GA-FIS Optimization")
print(f"{'='*60}")
print(f"Config: {N_EXPERIMENTS} runs, Pop={POPULATION_SIZE}, Gen={GENERATIONS}")
print(f"Batch={BATCH_SIZE}, Early Stop={EARLY_STOPPING}")
print(f"{'='*60}\n")

for i in range(N_EXPERIMENTS):
    print(f"\n[Run {i+1}/{N_EXPERIMENTS}]")
    
    optimizer = GAFISOptimizer(X_train, y_train, batch_size=BATCH_SIZE)
    
    start_time = time.time()
    best_fitness = optimizer.run(
        num_generations=GENERATIONS, 
        population_size=POPULATION_SIZE,
        early_stopping_rounds=EARLY_STOPPING
    )
    exec_time = time.time() - start_time
    
    best_model = optimizer.get_optimized_model()
    y_pred_opt = best_model.predict_batch(X_test)
    
    acc = accuracy_score(y_test, y_pred_opt)
    f1 = f1_score(y_test, y_pred_opt, average='weighted')
    
    print(f"  >> Test Acc: {acc:.4f} | F1: {f1:.4f} | Time: {exec_time:.2f}s")
    
    results.append({
        "run_id": i + 1,
        "accuracy": acc,
        "f1_weighted": f1,
        "execution_time": exec_time,
        "best_fitness_train": best_fitness
    })
    all_histories.append(optimizer.get_history())

GA-FIS Optimization
Config: 5 runs, Pop=20, Gen=30
Batch=200, Early Stop=10


[Run 1/5]
  GA Config: Pop=20, Gen=30, Batch=200, Chromosome=18 genes
    Gen 5/30 | Best: 0.3586 | Avg: 0.3013
    Gen 10/30 | Best: 0.3788 | Avg: 0.3093
    Gen 15/30 | Best: 0.3838 | Avg: 0.3068
    Gen 20/30 | Best: 0.3838 | Avg: 0.2972
    Gen 25/30 | Best: 0.4192 | Avg: 0.3061
    Gen 30/30 | Best: 0.4192 | Avg: 0.3215
  GA Complete! Best Fitness: 0.4192
  >> Test Acc: 0.5091 | F1: 0.4475 | Time: 994.91s

[Run 2/5]
  GA Config: Pop=20, Gen=30, Batch=200, Chromosome=18 genes
    Gen 5/30 | Best: 0.3737 | Avg: 0.2942
    Gen 10/30 | Best: 0.3889 | Avg: 0.3003
    Gen 15/30 | Best: 0.3889 | Avg: 0.3051
    Early stopping at gen 19 (no improvement for 10 gens)
  GA Complete! Best Fitness: 0.3889
  >> Test Acc: 0.4913 | F1: 0.4338 | Time: 640.62s

[Run 3/5]
  GA Config: Pop=20, Gen=30, Batch=200, Chromosome=18 genes
    Gen 5/30 | Best: 0.3788 | Avg: 0.3030
    Gen 10/30 | Best: 0.3838 | Avg: 0.3056
    Gen 

In [14]:
df_results = pd.DataFrame(results)

In [15]:
df_results.sort_values(by='f1_weighted', ascending=False)

Unnamed: 0,run_id,accuracy,f1_weighted,execution_time,best_fitness_train
3,4,0.509062,0.449572,573.045822,0.383838
4,5,0.509062,0.449207,459.652227,0.40404
0,1,0.509062,0.447536,994.911885,0.419192
1,2,0.49125,0.433844,640.621485,0.388889
2,3,0.487187,0.430299,477.203022,0.383838


In [12]:
best_f1 = df_results["f1_weighted"].max()
print(f"Best F1 Weighted: {best_f1:.4f}")

Best F1 Weighted: 0.4496
