# Final Results

Recalculate metrics and plot them, but done at many more stations for each datetime. Basically, a more robust version of [002](./002_calc_some_metrics.ipynb)

In [2]:
from pathlib import Path

from typing import TypedDict

from random import sample, seed
from utils.constants import WINDOW_SIZE, Events
from utils import load_data, calc_metrics, plot_metrics, read_metrics_file

import multiprocessing as mp

import warnings


warnings.filterwarnings("ignore")

seed(37)

EVENT: Events = "Forbush Decrease"
MAX_SAMPLES: int = 10

event_replace: str = EVENT.replace(" ", "")

# Relevant dates for the event
datetimes = {
    "2023-04-23": ["2023-04-23 23:00:00", "2023-04-24 06:00:00"],
    "2024-03-24": ["2024-03-24 14:00:00", "2024-03-25 02:00:00"],
    "2024-05-10": ["2024-05-10 18:00:00", "2024-05-11 01:00:00"],
}

## Calculate metrics

In [3]:
class StationsToChoose(TypedDict):
    stations: list[str]
    num_sample: int


stations: dict[str, list[str]] = {
    date: list(
        load_data(f"./data/{event_replace}/{date}/all.txt")
        .drop(columns="datetime")
        .columns
    )
    for date in datetimes
}

choosen_stations: dict[str, list[str]] = {
    date: list(
        map(
            lambda filename: filename.name.strip().split("_", 1)[0].upper(),
            Path(f"./data/{event_replace}/{date}").glob("*.csv"),
        )
    )
    for date in datetimes
}

stations_to_choose: dict[str, StationsToChoose] = {
    date: {
        "stations": list(set(stations[date]) - set(choosen_stations[date])),
        "num_sample": MAX_SAMPLES - len(choosen_stations[date]),
    }
    for date in datetimes
}

# Without repetition of stations already calculated
plot_stations = {
    date: sample(items["stations"], k=items["num_sample"])
    for date, items in stations_to_choose.items()
}


In [4]:
# 5 minutes approximate to calculate all metrics with my pc
arguments = [
    (
        load_data(f"./data/{event_replace}/{date}/all.txt").set_index("datetime"),
        station,
        date,
    )
    for date, stations in plot_stations.items()
    for station in stations
]

with mp.Pool(processes=mp.cpu_count()) as pool:
    results = pool.starmap(
        calc_metrics,
        arguments,
    )


Error: ZeroDivisionError('division by zero') -- Index: 0 & 5 -- Metric: higuchi_fd
Error: ValueError('cannot embed data of length 5 with embedding dimension 10 and lag 1, minimum required length is 10') -- Index: 0 & 5 -- Metric: lyap_r
Error: ZeroDivisionError('division by zero') -- Index: 1 & 5 -- Metric: higuchi_fd
Error: ValueError('cannot embed data of length 5 with embedding dimension 10 and lag 1, minimum required length is 10') -- Index: 1 & 5 -- Metric: lyap_r
Error: ZeroDivisionError('division by zero') -- Index: 2 & 5 -- Metric: higuchi_fd
Error: ValueError('cannot embed data of length 5 with embedding dimension 10 and lag 1, minimum required length is 10') -- Index: 2 & 5 -- Metric: lyap_r
Error: ZeroDivisionError('division by zero') -- Index: 3 & 5 -- Metric: higuchi_fd
Error: ValueError('cannot embed data of length 5 with embedding dimension 10 and lag 1, minimum required length is 10') -- Index: 3 & 5 -- Metric: lyap_r
Error: ZeroDivisionError('division by zero') -- Inde

ValueError: All arrays must be of the same length

## Plotting

In [2]:
plot_stations: dict[str, list[str]] = {
    date: list(
        map(
            # Get Station name from filename
            lambda filename: filename.name.strip().split("_", 1)[0].upper(),
            Path(f"./data/{event_replace}/{date}").glob("*.csv"),
        )
    )
    for date in datetimes
}

In [3]:
plot_stations

{'2023-04-23': ['PWNK',
  'JUNG1',
  'NANM',
  'DOMB',
  'MWSN',
  'ATHN',
  'NRLK',
  'JUNG',
  'KERG',
  'TXBY'],
 '2024-03-24': ['SOPO',
  'INVK',
  'NANM',
  'KIEL2',
  'CALM',
  'PWNK',
  'DOMC',
  'DRBS',
  'JUNG',
  'KERG'],
 '2024-05-10': ['CALG',
  'MXCO',
  'CALM',
  'JUNG1',
  'BKSN',
  'KIEL2',
  'TXBY',
  'TERA',
  'APTY',
  'DOMC']}

In [6]:
def plot_metrics_wrapper(args_tuple) -> None:
    date, station, suffix = args_tuple

    df = read_metrics_file(
        event=event_replace,
        date=date,
        station=station,
        window_size=WINDOW_SIZE,
        datetime_cols={"datetime": ""},
    )

    if suffix == 1:
        df = df.drop(columns=["lepel_ziv"], errors="ignore")
        relevant_metrics = ["*"]
    else:
        relevant_metrics = ["lepel_ziv"]

    plot_metrics(
        window_size=WINDOW_SIZE,
        relevant_metrics=relevant_metrics,
        df=df,
        event=event_replace,
        date=date,
        station=station,
        min_datetime=datetimes[date][0],
        max_datetime=datetimes[date][1],
        freq_date_range_1="1h",
        freq_date_range_2="1h",
        save_format="pdf",
        suffix=str(suffix),
    )


with mp.Pool(processes=mp.cpu_count()) as pool:
    arguments_plot = [
        (date, station, suffix)
        for date, stations in plot_stations.items()
        for station in stations
        for suffix in [1, 2]
    ]

    pool.map(plot_metrics_wrapper, arguments_plot)

ConversionError: Failed to convert value(s) to axis units: 0        2023-04-23 00:00:00
1                    215.482
2                         65
3                   4.174278
4                   1.791759
                ...         
48955               2.066617
48956               5.010739
48957               1.066172
48958                0.02221
48959               0.834602
Name: y, Length: 48958, dtype: object

In [17]:
results = read_metrics_file(
    event=event_replace,
    date=sorted(datetimes.keys())[0],
    station="txby",
    window_size=WINDOW_SIZE,
    datetime_cols={"datetime": ""},
)
results.drop(columns=["entropy"] if True else [], errors="ignore")

Unnamed: 0,datetime,value,window_shape,sampen,permutation_entropy,shannon_entropy,spectral_entropy,app_entropy,hurst,dfa,mfhurst_b,higuchi_fd,katz_fd,petrosian_fd,lepel_ziv,lyap_r,corr_dim
0,2023-04-23 00:00:00,98.133,65,1.358123,2.572659,1.887127,0.876785,0.416660,0.639141,0.732704,0.042451,2.125811,5.344989,1.055651,29,0.015150,0.841387
1,2023-04-23 00:01:00,95.600,66,1.358123,2.570985,1.878534,0.871436,0.410293,0.641933,0.819490,0.036285,2.129681,5.445491,1.055934,30,0.016170,0.829399
2,2023-04-23 00:02:00,95.333,67,1.358123,2.571731,1.876693,0.875754,0.437221,0.641933,0.806197,0.031786,2.142589,5.397593,1.054935,30,0.017392,0.833296
3,2023-04-23 00:03:00,95.200,68,1.440362,2.574979,1.868131,0.873800,0.430734,0.641933,0.818318,0.032114,2.138426,5.341083,1.055215,31,0.018922,0.849363
4,2023-04-23 00:04:00,96.133,69,1.440362,2.570886,1.859267,0.879928,0.450786,0.641933,0.864660,0.031642,2.138995,5.276668,1.054258,31,0.022410,0.869924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,2023-04-24 23:55:00,93.467,70,2.442347,2.507490,1.896201,0.870708,0.353280,0.627145,0.907899,0.024382,2.001312,3.295734,1.058124,29,-0.003241,0.915923
2876,2023-04-24 23:56:00,94.667,69,3.091042,2.513231,1.900966,0.873359,0.358769,0.356060,0.975763,0.016863,2.006938,3.322121,1.059131,29,0.004582,0.903827
2877,2023-04-24 23:57:00,93.867,68,2.944439,2.518014,1.905034,0.872340,0.327030,0.427543,0.918086,0.020689,2.005496,4.087208,1.060170,28,0.007765,0.881854
2878,2023-04-24 23:58:00,96.667,67,2.890372,2.519984,1.907048,0.878251,0.310973,0.521191,0.902125,0.017564,2.003942,4.714364,1.059989,27,0.000537,0.889806
