# Compare optimized flh with previous dataset
- the previous dataset was extracted from the ptx atlas
- Use ``collect_cached_optimization_results.py`` on server to collect optimization results for all settings
- copy collected results like this:

````
scp ptxboa2:/home/ptxboa/ptx-boa_offline_optimization/optimization_cache/*.csv .
````

In [None]:
import pandas as pd
from ptxboa import DEFAULT_CACHE_DIR, DEFAULT_DATA_DIR
from ptxboa.api import PtxboaAPI

In [None]:
api = PtxboaAPI(
    data_dir=DEFAULT_DATA_DIR,
    cache_dir=DEFAULT_CACHE_DIR,  # TODO: maybe disable in test environment?
)

In [None]:
filename_old_data = "ptxboa/data/flh.csv"
filename_new_data_main = (
    "optimization_results/cached_optimization_data_main_process_chain.csv"
)
filename_new_data_secondary = (
    "optimization_results/cached_optimization_data_secondary_process.csv"
)
filename_new_data_network = "optimization_results/network_statistics.csv"

In [None]:
flh_old_raw = pd.read_csv(filename_old_data, index_col=1)
flh_new_main = pd.read_csv(filename_new_data_main)
flh_new_secondary = pd.read_csv(filename_new_data_secondary)
flh_new_network = pd.read_csv(filename_new_data_network)

In [None]:
flh_old_raw

In [None]:
# add long names to old flh data:


api.get_dimension("res_gen")

flh_old = (
    flh_old_raw.merge(
        api.get_dimension("process")[["process_code", "process_name"]],
        left_on="process_res",
        right_on="process_code",
        how="left",
    )
    .drop("process_code", axis=1)
    .rename({"process_name": "process_name_res"}, axis=1)
)

flh_old = (
    flh_old.merge(
        api.get_dimension("process")[["process_code", "process_name"]],
        left_on="process_ely",
        right_on="process_code",
        how="left",
    )
    .drop("process_code", axis=1)
    .rename({"process_name": "process_name_ely"}, axis=1)
)

flh_old = (
    flh_old.merge(
        api.get_dimension("process")[["process_code", "process_name"]],
        left_on="process_deriv",
        right_on="process_code",
        how="left",
    )
    .drop("process_code", axis=1)
    .rename({"process_name": "process_name_deriv"}, axis=1)
)

flh_old["source_region_code"] = flh_old["key"].str.split(",", n=1).str.get(0)
flh_old = flh_old.rename({"process_flh": "process_code"}, axis=1)
flh_old

In [None]:
flh_new_secondary.columns
flh_new_main.columns
flh_old.columns

In [None]:
flh_all = pd.merge(
    flh_new_main,
    flh_new_secondary[["optimization_hash", "chain", "res_gen", "scenario"]],
    on="optimization_hash",
    how="left",
)

chain_info = api.get_dimension("chain")[["chain", "ELY", "DERIV"]].rename(
    {"ELY": "process_ely", "DERIV": "process_deriv"}, axis=1
)

flh_all = flh_all.merge(chain_info, on="chain", how="left")

flh_all

In [None]:
flh_all = flh_all.merge(
    flh_old,
    on=["source_region_code", "process_code", "process_ely", "process_deriv"],
    how="left",
)

In [None]:
flh_all.to_csv("testfile.csv")