In [2]:
! pip install catboost
! pip install deap

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2
Collecting deap
  Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deap
Successfully installed deap-1.4.1


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.base import clone
from catboost import CatBoostClassifier
from deap import base, creator, tools
from scipy.stats import randint, uniform, loguniform
import random
import operator
import math
import multiprocessing
import time

random.seed(42)

In [4]:
data = pd.read_csv(
    'https://raw.githubusercontent.com/antbartash/australian_rain/main/data/data_transformed.csv',
    index_col=0
)
X, y = data.drop(columns=['RainTomorrow', 'RainToday']), data['RainTomorrow']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for column in ['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm']:
    X_train[column] = X_train[column].astype(np.float32).fillna(-1).apply(lambda x: str(x))
    X_test[column] = X_test[column].astype(np.float32).fillna(-1).apply(lambda x: str(x))

model = CatBoostClassifier(
    cat_features=['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm'],
    objective='Logloss',
    random_state=42, verbose=False, task_type='GPU'#, gpu_ram_part=0.15, used_ram_limit='2gb'
)

In [5]:
# PSO parameters
N = 20
w = 0.5 # inertia weight coefficient
c1 = 0.3 # cognitive coefficient
c2 = 0.5 # social coefficient
NUM_TRIALS = 15

creator.create('FitnessMax', base.Fitness, weights=(1.0,))
creator.create('Particle', list, fitness=creator.FitnessMax,
               speed=list, smin=list, smax=list, best=None)
toolbox = base.Toolbox()

In [6]:
# Register hyperparameters with their distributions
PARAM_NAMES = [
    'n_estimators', 'learning_rate', 'depth', 'l2_leaf_reg',
    'bagging_temperature', 'grow_policy', 'scale_pos_weight'
]
toolbox.register('n_estimators', randint.rvs, 10, 500)
toolbox.register('learning_rate', loguniform.rvs, 1e-6, 0.5)
toolbox.register('depth', randint.rvs, 1, 8)
toolbox.register('l2_leaf_reg', uniform.rvs, 0.0, 100.0)
toolbox.register('bagging_temperature', uniform.rvs, 0.0, 100.0)
toolbox.register('grow_policy', random.choice, ['SymmetricTree', 'Depthwise'])
toolbox.register('scale_pos_weight', uniform.rvs, 1.0, 4.0)

In [7]:
def generate(speed_bound):
    part = tools.initCycle(creator.Particle,
                           [toolbox.n_estimators,
                            toolbox.learning_rate,
                            toolbox.depth,
                            toolbox.l2_leaf_reg,
                            toolbox.bagging_temperature,
                            toolbox.grow_policy,
                            toolbox.scale_pos_weight])
    part.speed = [random.uniform(speed_bound[i]['smin'], speed_bound[i]['smax']) for i in range(len(part))]
    part.smin = [speed_bound[i]['smin'] for i in range(len(part))]
    part.smax = [speed_bound[i]['smax'] for i in range(len(part))]
    return part

# Define genes of partickes
toolbox.register('particle', generate,
                 speed_bound=[{'smin': -100, 'smax': 100},
                              {'smin': -0.1, 'smax': 0.1},
                              {'smin': -1, 'smax':1},
                              {'smin': -10, 'smax': 10},
                              {'smin': -10, 'smax': 10},
                              {'smin': -1, 'smax': 1},
                              {'smin': -0.5, 'smax': 0.5}])
toolbox.register('population', tools.initRepeat, list, toolbox.particle)

In [8]:
def updateParticle(part, best, c1, c2, w, is_int):
    for i in range(len(part)):
      if part[i] == 'SymmetricTree':
        part[i] = 0
      elif part[i] == 'Depthwise':
        part[i] = 1
      if part.best[i] == 'SymmetricTree':
        part.best[i] = 0
      elif part.best[i] == 'Depthwise':
        part.best[i] = 1
      if best[i] == 'SymmetricTree':
        best[i] = 0
      elif best[i] == 'Depthwise':
        best[i] = 1

    w = [w for _ in range(len(part))]
    u1 = (random.uniform(0, 1)*c1 for _ in range(len(part)))
    u2 = (random.uniform(0, 1)*c2 for _ in range(len(part)))
    v_u1 = map(operator.mul, u1, map(operator.sub, part.best, part))
    v_u2 = map(operator.mul, u2, map(operator.sub, best, part))
    part.speed = list(map(operator.add, map(operator.mul, w, part.speed), map(operator.add, v_u1, v_u2)))
    for i, speed in enumerate(part.speed):
        if abs(speed) < part.smin[i]:
            part.speed[i] = math.copysign(part.smin[i], speed)
        elif abs(speed) > part.smax[i]:
            part.speed[i] = math.copysign(part.smax[i], speed)
    part[:] = list(map(operator.add, part, part.speed))

    for i, pos in enumerate(part):
        if is_int[i]:
            part[i] = int(pos)

# # Register the update strategy
#     'n_estimators', 'learning_rate', 'depth', 'l2_leaf_reg',
#     'bagging_temperature', 'grow_policy', 'scale_pos_weight'
toolbox.register('update', updateParticle, c1=c1, c2=c2, w=w,
                 is_int=[True, False, True, False, False, True, False])

In [9]:
def evaluate(individual):
    # convert list of parameter values into dictionary of kwargs
    strategy_params = {k: v for k, v in zip(PARAM_NAMES, individual)}
    strategy_params['grow_policy'] = 'SymmetricTree' if strategy_params['grow_policy'] == 0 else 'Depthwise'
    if strategy_params['scale_pos_weight'] < 1:
        return [-np.inf]
    tuned_model = clone(model).set_params(**strategy_params)
    score = np.mean(cross_val_score(tuned_model, X_train, y_train, cv=3, scoring='roc_auc'))
    return score

toolbox.register('evaluate', evaluate)

In [10]:
pool = multiprocessing.Pool(1)
toolbox.register('map', pool.map)

In [None]:
pop = toolbox.population(n=N)
mean_arr = np.ndarray(NUM_TRIALS)
best_arr = np.ndarray(NUM_TRIALS)
hall_of_fame = tools.HallOfFame(maxsize=3)
print(f"Start time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}")

best = None
for g in range(NUM_TRIALS):
    fitnesses = toolbox.map(toolbox.evaluate, pop)
    for part, fit in zip(pop, fitnesses):
        if type(fit) != list:
          fit = [fit]
        part.fitness.values = fit

        if not part.best or part.fitness.values > part.best.fitness.values:
            part.best= creator.Particle(part)
            part.best.fitness.values = part.fitness.values
        if not best or part.fitness.values > best.fitness.values:
            best = creator.Particle(part)
            best.fitness.values = part.fitness.values
    for part in pop:
        toolbox.update(part, best)

    hall_of_fame.update(pop)

    print(
      f"\nHALL OF FAME - generation {g} - {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}:\n"
      + "\n".join(
          [
              f"    {_}: {ind}, Fitness: {ind.fitness.values[0]}"
              for _, ind in enumerate(hall_of_fame)
          ]
      )
    )

    fitnesses = [
        ind.fitness.values[0] for ind in pop if not np.isinf(ind.fitness.values[0])
    ]
    mean_arr[g] = np.mean(fitnesses)
    best_arr[g] = np.max(fitnesses)

Start time: 2024-01-04 09:19:08

HALL OF FAME - generation 0 - 2024-01-04 09:23:24:
    0: [372, 0.044045188751503414, 1, 45.317253206279645, -3.8670775573332317, 0, 4.3798521506225985], Fitness: 0.873678800568721
    1: [348, -0.006963835199651426, 6, 49.94012037785961, -1.0570849000102234, 0, 2.5946740158628843], Fitness: 0.8629322355562015
    2: [358, 0.01949378951425093, 5, 33.22300369315022, 8.223897729117857, 0, 1.8871548673225291], Fitness: 0.8504975257966979


In [10]:
best

[415,
 0.2727151286620181,
 3,
 94.4312945901962,
 2.1183111467986904,
 0,
 3.8435038259524594]