# Fire from forest

First we set some settings for our training

In [1]:
# Enable some logging capabilities
import logging
logging.basicConfig(filename='notebook.ipynb.log', level='DEBUG')

RANDOM_SEED = 100
# The number of subprocesses to use
POOL_SIZE = 4


## Data load

The data is loaded with pandas, we load all the (train, test) pairs.

In [2]:
import pandas as pd

PARTITIONS = 4

datasets = [
    [pd.read_csv(f"dataset/{ftype}_{i}.csv") for ftype in ["train", "test"]]
    for i in range(PARTITIONS)
]

datasets[0][0]


Unnamed: 0,rgb__mean_c0,rgb__mean_c1,rgb__mean_c2,rgb__stdev_c0,rgb__stdev_c1,rgb__stdev_c2,rgb__median_c0,rgb__median_c1,rgb__median_c2,rgb__cov_0,...,hsv__cov_0,hsv__cov_1,hsv__cov_2,hsv__cov_3,hsv__cov_4,hsv__cov_5,gray__mean,gray__stdev,gray__median,tag
0,-0.343603,-0.399791,-0.483646,0.155106,0.143023,0.015099,-0.407115,-0.510204,-0.578059,-0.108204,...,-0.181116,0.032211,-0.181116,-0.388510,0.032211,-0.388510,-0.380955,0.216305,-0.480820,OTHER
1,-0.148882,-0.023510,-0.462608,-0.170048,-0.102284,-0.205109,-0.146245,-0.036735,-0.527426,-0.496538,...,-0.180401,0.149459,-0.180401,-0.204963,0.149459,-0.204963,-0.063179,-0.071409,-0.071546,OTHER
2,-0.384243,-0.314732,-0.390246,-0.084516,-0.023385,-0.078241,-0.470356,-0.436735,-0.485232,-0.386402,...,-0.284572,0.137453,-0.284572,-0.140462,0.137453,-0.140462,-0.321888,0.026419,-0.441796,OTHER
3,-0.348830,0.000724,-0.632997,-0.155131,0.059389,-0.243767,-0.351779,-0.036735,-0.696203,-0.439717,...,-0.190342,0.176021,-0.190342,-0.328477,0.176021,-0.328477,-0.103680,0.039713,-0.128140,OTHER
4,-0.425188,-0.155956,-0.593988,-0.368570,-0.152293,-0.440119,-0.375494,-0.151020,-0.603376,-0.656547,...,-0.172915,0.170774,-0.172915,-0.161318,0.170774,-0.161318,-0.231420,-0.183313,-0.212932,OTHER
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1529,0.348918,-0.459141,-0.850878,-0.960172,-0.977257,-0.980148,0.351779,-0.469388,-0.848101,-0.990671,...,-0.196171,0.151483,-0.196171,0.004657,0.151483,0.004657,-0.293153,-0.976295,-0.300791,SMOKE
1530,0.711822,-0.237385,-0.887499,-0.167512,-0.382122,-0.799823,0.889328,-0.175510,-0.898734,-0.647891,...,-0.194703,0.181633,-0.194703,0.004211,0.181633,0.004211,-0.052989,-0.336232,0.031360,SMOKE
1531,1.000000,0.173099,-0.926741,-0.983273,-0.672096,-0.856934,0.992095,0.142857,-0.940928,-0.991329,...,-0.198950,0.151451,-0.198950,0.004869,0.151451,0.004869,0.306354,-0.748378,0.284923,SMOKE
1532,0.212767,-0.332275,-0.954472,-0.583690,-0.683132,-0.841309,0.177866,-0.379592,-0.966245,-0.900064,...,-0.196516,0.154068,-0.196516,-0.010151,0.154068,-0.010151,-0.240067,-0.658883,-0.285511,SMOKE


A label encoder is used to transform the tags of the data frames to numbers.

In [3]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(
    [tag for experiment in datasets for df in experiment for tag in df["tag"].unique()]
)

label_encoder.classes_


array(['FIRE', 'OTHER', 'SMOKE'], dtype='<U5')

A `DataExperiment` is created for each (train, test) dataframe pair.

In [4]:
from experimenter.model import DataExperiment


def to_data_experiment(train_test_df) -> DataExperiment:
    train, test = train_test_df

    return DataExperiment(
        train.loc[:, train.columns != "tag"].to_numpy(),
        label_encoder.transform(train["tag"]),
        test.loc[:, train.columns != "tag"].to_numpy(),
        label_encoder.transform(test["tag"]),
    )

all_experiments = [to_data_experiment(pair) for pair in datasets]

## Experiment configuration

Now we configure the runner

In [14]:
from pprint import pprint
from functools import reduce
import numpy as np

CLASSES = label_encoder.classes_.size
# There must be at least one dataset
ATTRIBUTES = datasets[0][0].columns.size - 1

NEURONS_RANGE = np.arange(CLASSES, ATTRIBUTES + CLASSES + 1, 1)
LAYERS_RANGE = np.arange(1, 5 + 1, 1)
EPOCHS_RANGE = np.arange(50, 500 + 1, 10)
LEARNING_RATE_RANGE = np.linspace(0.001, 0.200, 50)
MOMENTUM_RANGE = np.linspace(0.01, 0.4, 50)

total_experiments = reduce(
    lambda x, y: x * y,
    map(
        lambda arr: np.size(arr, 0),
        [
            NEURONS_RANGE,
            LAYERS_RANGE,
            EPOCHS_RANGE,
            LEARNING_RATE_RANGE,
            MOMENTUM_RANGE
        ],
    ),
)
print(f"Total experiments: {total_experiments}")
print(EPOCHS_RANGE.size)

Total experiments: 31625000
46


## Experimentation

A `ParallelExperimenter` is created with the `DataExperiment`'s that were created in the previous step.

In [6]:
from experimenter.parallel import ParallelExperimenter
experimenter = ParallelExperimenter(all_experiments, POOL_SIZE, RANDOM_SEED)

The initial experimentation values are set:

In [7]:
INITIAL_NEURONS = NEURONS_RANGE[0]
INITIAL_LAYERS = LAYERS_RANGE[0]
INITIAL_EPOCHS = EPOCHS_RANGE[EPOCHS_RANGE.size // 10]
INITIAL_LEARNING_RATE = LEARNING_RATE_RANGE[LEARNING_RATE_RANGE.size // 2]
INITIAL_MOMENTUM = MOMENTUM_RANGE[MOMENTUM_RANGE.size // 2]

pprint(
    {k: v for k, v in locals().items() if k.isupper() and k.startswith("INITIAL")},
    sort_dicts=False,
)


{'INITIAL_NEURONS': 3,
 'INITIAL_LAYERS': 1,
 'INITIAL_EPOCHS': 90,
 'INITIAL_LEARNING_RATE': 0.10253061224489797,
 'INITIAL_MOMENTUM': 0.2089795918367347}


In [8]:
all_experiments[0].y_train

array([1, 1, 1, ..., 2, 2, 2])

In [13]:
from experimenter.model import ModelParams
from experimenter.train import train_multiple, train_single
import dataclasses

initial_experiment = ModelParams(
    INITIAL_NEURONS,
    layers=INITIAL_LAYERS,
    epochs=INITIAL_EPOCHS,
    learning_rate=INITIAL_LEARNING_RATE,
    momentum=INITIAL_MOMENTUM,
)

# r1 = train_multiple(initial_experiment, all_experiments, RANDOM_SEED)

# r2 = experimenter.run_all(
#     [
#         dataclasses.replace(initial_experiment, neurons=neurons)
#         for neurons in NEURONS_RANGE
#     ]
# )
# print(r1)
# print(r2)
train_multiple(
    dataclasses.replace(
        initial_experiment, neurons=NEURONS_RANGE[-1], epochs=EPOCHS_RANGE[-1]
    ),
    all_experiments,
    RANDOM_SEED,
)


ExperimentResult(params=ModelParams(neurons=57, layers=1, epochs=500, learning_rate=0.10253061224489797, momentum=0.2089795918367347), precisions=array([0.92207792, 0.9375    , 0.9140625 , 0.93472585]), mean=0.9270915676604727, stddev=0.009506720712325127)

In [15]:
import os
print(os.cpu_count())

8
