In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import random
import pandas as pd
from src.functions import create_model, plot_data, predict_with_uncertainty, create_train_val_datasets, \
    load_stations_from_path, create_test_datasets, create_test_dataset, check_gpus

In [None]:
check_gpus()

In [None]:
TRAIN_PATH = '../data/labeled_daily/train'
TEST_PATH = '../data/labeled_daily/test'
SEQUENCE_LENGTH = 20
TARGET_START_INDEX = SEQUENCE_LENGTH - 1
FEATURE_COLUMNS = ['HS', 'day_sin', 'day_cos', 'month_sin', 'month_cos']
TARGET_COLUMN = 'no_snow'
DATE_COLUMN = 'measure_date'
SPLIT_PERCENTAGE = 0.8
DATASET_BATCH_SIZE = 64

# Active learning
UNCERTAINTY_ITERATIONS = 5

# Model configuration
# MODEL_ARCHITECTURE = "128(l)-64-8(d)-1"
MODEL_ARCHITECTURE = "64(l)-8(d)-1"
MODEL_INPUT_SHAPE = (SEQUENCE_LENGTH, len(FEATURE_COLUMNS))
MODEL_DROPOUT_RATE = 0.5
MODEL_OPTIMIZER = 'adam'
MODEL_METRICS = ['accuracy']
MODEL_LOSS = 'binary_crossentropy'
MODEL_BATCH_SIZE = 64
MODEL_EPOCHS = 10

In [None]:
model = create_model(MODEL_ARCHITECTURE, MODEL_INPUT_SHAPE, logging=None)
model.compile(
    optimizer=MODEL_OPTIMIZER,
    metrics=MODEL_METRICS,
    loss=MODEL_LOSS
)

training_stations = {station.iloc[0]['station_code']: station for station in load_stations_from_path(TRAIN_PATH)}
testing_stations = {station.iloc[0]['station_code']: station for station in load_stations_from_path(TEST_PATH)}
active_stations = [random.choice(list(training_stations.keys()))]
summary = []

for i, _ in enumerate(training_stations.items()):
    print(f'====================================================')
    print(f'Active Learning Iteration #{i}')
    print(f'Current Training Stations: {active_stations}')

    train_dataset, val_dataset, mean, std = create_train_val_datasets(
        [training_stations[name] for name in active_stations],
        SPLIT_PERCENTAGE,
        FEATURE_COLUMNS,
        TARGET_COLUMN,
        SEQUENCE_LENGTH,
        TARGET_START_INDEX,
        DATASET_BATCH_SIZE
    )

    # print(f'Training samples: {len(current_iteration_train_df)}')
    # print(f'Validation samples: {len(current_iteration_val_df)}')

    history = model.fit(train_dataset, epochs=MODEL_EPOCHS, batch_size=MODEL_BATCH_SIZE, validation_data=val_dataset)

    test_datasets = create_test_datasets(
        testing_stations.values(), FEATURE_COLUMNS, TARGET_COLUMN, SEQUENCE_LENGTH, TARGET_START_INDEX,
        DATASET_BATCH_SIZE, mean, std
    )

    print(f'Evaluation model on out of sample data...')
    evaluation_results = np.array([model.evaluate(dataset) for dataset in test_datasets]).mean(axis=0)

    uncertainties = {}

    print(f'Calculating uncertainty scores...')
    for station_name, station_df in training_stations.items():
        if station_name in active_stations:
            continue

        test_dataset = create_test_dataset(
            station_df, FEATURE_COLUMNS, TARGET_COLUMN, SEQUENCE_LENGTH, TARGET_START_INDEX, DATASET_BATCH_SIZE, mean, std
        )

        _, uncertainty_score = predict_with_uncertainty(model, test_dataset, n_iter=UNCERTAINTY_ITERATIONS)
        uncertainties[station_name] = uncertainty_score

    most_uncertain_station_name = max(uncertainties, key=uncertainties.get) if uncertainties else ''

    print(f'Most uncertain {most_uncertain_station_name}')

    summary.append({
        'iteration': i,
        'active_learning_train': active_stations,
        'active_learning_predict': uncertainties.keys(),
        'evaluation_results': evaluation_results,
        'uncertainty_scores': uncertainties,
        'most_uncertain': most_uncertain_station_name
    })

    active_stations.append(most_uncertain_station_name)
   

In [None]:
predictions = [model.predict(td).reshape((-1,)) > 0.5 for td in test_datasets]

plot_data(
    [test_station[TARGET_START_INDEX:] for test_station in testing_stations.values()],
    predictions=predictions
)

In [None]:
summary = pd.DataFrame(summary)
results_dir = '../active-learning-results/2023-11-10'
summary.to_csv(f'{results_dir}/summary.csv', index=False)
model.save(f'{results_dir}/model.keras')