In [8]:
from PyCROSL.CRO_SL import *
from PyCROSL.AbsObjectiveFunc import *
from PyCROSL.SubstrateReal import *
from PyCROSL.SubstrateInt import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score

import pandas as pd
import numpy as np
import xarray as xr

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.insert(1, '../Modules/')
from optimisation import optimisation

In [9]:
region="Cluj-Napoca"

In [10]:
# 1 - FEATURE SELECTION SET-UP #
# Aim: Identify optimal combinations of potential predictors 
# Method: An optimization algorithm is used to test various combinations of predictors used to recreate the HW record.
# The algorithm optimizes the F1-score of validation period.
num_eval = 1000
max_lag = 30
max_duration = 5

first_train = "7002-04-30" # training period 1600 years #
last_train = "8600-04-30" # test period 250 years
first_test = last_train
last_test = "8851-04-30"

sol_data = pd.DataFrame(columns=['CV', 'Test', 'Sol'])
indiv_file = 'Output/optimisation_output.csv'
solution_file = 'Output/optimisation_output_sol.csv'
sol_data.to_csv(indiv_file, sep=' ', header=sol_data.columns, index=None)


In [11]:
# 3 - LOAD DATA #
# Potential Predictor dataset
pred_dataframe = pd.read_csv('Predictors_dataset_past2k_weekly.csv', index_col=0)

# Number of HW days per month in past2k period, threshold = 90th percentile of 8821-8850
#ds=xr.open_dataset(f"NumberHWdays_past2k_{region}.nc")
#NDQ90=ds.NumberHWDays

df = pd.read_csv(f"Output/NumberHWdays_past2k_{region}.csv")
print (df.NumberHWDays)

target_dates=[] # dummy date for target
train_years=range(7000,8850,1)

for year in train_years:
    target_dates.append(str(year).zfill(4)+"-04-30")

target_dates=target_dates

df_NDQ90=pd.DataFrame(df.NumberHWDays.values,columns=['Target'])
df_NDQ90.index = target_dates
target_dataset=df_NDQ90


first_train_index=int(np.argwhere(df_NDQ90.index==first_train))
last_train_index=int(np.argwhere(df_NDQ90.index==last_train))


0        2.0
1        4.0
2        1.0
3        2.0
4        2.0
        ... 
1845     9.0
1846     9.0
1847    30.0
1848     3.0
1849     1.0
Name: NumberHWDays, Length: 1850, dtype: float64


In [12]:
# Initialize the objective function with size = 3 * number of predictors
objfunc = optimisation(
    size=3*pred_dataframe.shape[1],
    pred_dataframe=pred_dataframe,
    target_dataset=target_dataset,
    first_train_index=first_train_index,
    last_train_index=last_train_index,
    indiv_file=indiv_file
)

# Define CRO-SL parameters
params = {
    "popSize": 100,
    "rho": 0.6,
    "Fb": 0.98,
    "Fd": 0.2,
    "Pd": 0.8,
    "k": 3,
    "K": 20,
    "group_subs": True,

    "stop_cond": "Neval",
    "time_limit": 4000.0,
    "Ngen": 10000,
    "Neval": num_eval,
    "fit_target": 1000, 

    "verbose": True,
    "v_timer": 1,
    "Njobs": 1,

    "dynamic": True,
    "dyn_method": "success",
    "dyn_metric": "avg",
    "dyn_steps": 10,
    "prob_amp": 0.01
}

# Define operators for the algorithm
operators = [
    SubstrateInt("BLXalpha", {"F": 0.8}),
    SubstrateInt("Multipoint"),
    SubstrateInt("HS", {"F": 0.7, "Cr": 0.8, "Par": 0.2}),
    SubstrateInt("Xor"),
]

In [13]:
# Initialize and run the optimization algorithm
cro_alg = CRO_SL(objfunc, operators, params)
solution, obj_value = cro_alg.optimize()

# Save final solution
solution.tofile(solution_file, sep=',')
print(f"Optimization completed. Best objective value: {obj_value}")
print(f"Solution saved to {solution_file}")

Starting optimization of Some Function
--------------------------------------

1.0813174332045854 1.0542710980245327
1.0619765654590048 1.0585823225902617
1.0744047297958659 1.0342811667720868
1.0901033997231617 1.039115793703766
1.0596120829458486 1.0370384078008963
1.0936576521315755 1.047162416007452
1.0815163241815073 1.0256486202738528
1.0729644705398496 1.0458688103809082
1.059919011859265 1.058898511964852
1.0761797348890627 1.0323232570121121
1.0943148606732394 1.0454317281629297
1.039899860870213 1.0395504835563498
1.0781823773950348 1.0256097281792085
1.0786127043509102 1.019433319057715
1.068034101220157 1.0202689886871346
1.0795674808300828 1.057749588893859
1.0559039390168443 1.0420465216772956
1.068266853853637 1.0288259726107576
1.0981308726918644 1.0569298467934114
1.0727554303622098 1.0423581724655195
1.0517322154581732 1.0801362277598547
1.0610759890733263 1.0405421116320195
1.0794759995165388 1.0385105382546025
1.062311871604954 1.0807137566030467
1.080733548418495 1