---
# Linear model experiments
---

This notebook does:
- Make predictions in a variety of settings and save the results.

# Modules

In [1]:
import sys, os, datetime
sys.path.append("../src")

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

from exp_helpers import format_results, result_path
from dataset import ds_generator
from config import data_configuration

In [2]:
df = pd.read_pickle('data_details.pkl')
idxs = df[~(df['lack of data']) & (df['network exist']) & ~(df['other dams']) & (df['gauge available'])].index
model = LinearRegression()

### Data format experiment

In [3]:
for pca in [True, False]:
    for agg in [True, False]:
        config_args = data_configuration(catchment_aggregation=agg,
                                         pca_exec=pca)
        results = {}

        for (idx, te_ds, train_x, train_y, train_date, val_x, val_y, val_date,
             test_x, test_y, test_date) in ds_generator(idxs, **config_args):

            model.fit(train_x, train_y)
            predict = model.predict(test_x)
            results[idx] = format_results(predict, test_y, te_ds, test_date)

        # Save results
        dirname = result_path(config_args)
        os.makedirs(dirname, exist_ok=True)

        for k, result in results.items():
            result.to_pickle(f"{dirname}/{k}.pkl")

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

### Input data

In [9]:
for snow_melt in [True, False]:
    for monthes in [True, False]:
        for past_discharge in [True, False]:
            config_args = data_configuration(input_snmlt = snow_melt,
                                             input_discharge = past_discharge,
                                             input_month=monthes,
                                             pca_exec=True)
            results = {}

            for (idx, te_ds, train_x, train_y, train_date, val_x, val_y, val_date,
                 test_x, test_y, test_date) in ds_generator(idxs, **config_args):

                model.fit(train_x, train_y)
                predict = model.predict(test_x)
                results[idx] = format_results(predict, test_y, te_ds, test_date)

            # Save results
            dirname = result_path(config_args)
            os.makedirs(dirname, exist_ok=True)

            for k, result in results.items():
                result.to_pickle(f"{dirname}/{k}.pkl")

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

### Past discharge

In [17]:
for past in [1, 2, 5, 10, 20, 30]:
    config_args = data_configuration(past = past,
                                     pca_exec = True)
    results = {}

    for (idx, te_ds, train_x, train_y, train_date, val_x, val_y, val_date,
         test_x, test_y, test_date) in ds_generator(idxs, **config_args):

        model.fit(train_x, train_y)
        predict = model.predict(test_x)
        results[idx] = format_results(predict, test_y, te_ds, test_date)

    # Save results
    dirname = result_path(config_args)
    os.makedirs(dirname, exist_ok=True)

    for k, result in results.items():
        result.to_pickle(f"{dirname}/{k}.pkl")

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

### Rain obeservation

In [18]:
for rain_obs in ["gauge", "msm", "gsmap", "gsmap+gauge"]:
    config_args = data_configuration(rain_obs = rain_obs,
                                     pca_exec = True)
    results = {}

    for (idx, te_ds, train_x, train_y, train_date, val_x, val_y, val_date,
         test_x, test_y, test_date) in ds_generator(idxs, **config_args):

        model.fit(train_x, train_y)
        predict = model.predict(test_x)
        results[idx] = format_results(predict, test_y, te_ds, test_date)

    # Save results
    dirname = result_path(config_args) 
    os.makedirs(dirname, exist_ok=True)

    for k, result in results.items():
        result.to_pickle(f"{dirname}/{k}.pkl")

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]