In [34]:
import os
import sys
main_path = os.path.abspath(os.path.join(os.path.dirname("./quantity_search.ipynb"), '..'))
sys.path.insert(0, main_path)
import numpy as np
import argparse
import pandas as pd


def add_count_and_file_Qs(df, results_file):
    df.loc[:, "count_Qs"] = [len(x.split("-")) for x in df["quantity"]]
    print(df[["quantity", "count_Qs"]].drop_duplicates())
    df.loc[:, "cv_results_file"] = [results_file] * len(df)
    return df


def comb_filter_only_bests(df):
    df2 = df[df["count_Qs"] == 2]
    cv_mean = df2["cv_mean"].to_numpy()
    highest_idx = np.argmax(cv_mean)
    line = df2.iloc[highest_idx]
    df_filter = ((df["count_Qs"] == 2) & (df2["quantity"] == line.quantity))
    n_max = np.max(df["count_Qs"])
    for i in range(3, n_max + 1):
        df2 = df[df["count_Qs"] == i]
        cv_mean = df2["cv_mean"].to_numpy()
        highest_idx = np.argmax(cv_mean)
        line = df2.iloc[highest_idx]
        df_filter |= ((df["count_Qs"] == i) & (df2["quantity"] == line.quantity))

    return df[df_filter]


def concat_single_best(df1, df2):
    cv_mean = df1["cv_mean"].to_numpy()
    highest_idx = np.argmax(cv_mean)
    highest_Q = df1.iloc[highest_idx].quantity
    df3 = df1[df1["quantity"] == highest_Q]
    return pd.concat([df3, df2])


def resume_quantity_search(dfs):
    df1 = dfs[0].reset_index()
    df2 = dfs[1].reset_index()
    df3 = dfs[2].reset_index()

    qs1 = np.unique(df1["quantity"])
    qs2 = np.unique(df2["quantity"])
    qs3 = np.unique(df3["quantity"])
    print(qs2)

    idxs = []
    for q in qs1:
        df_tmp = df1[df1["quantity"] == q]
        high_idx = np.argmax(df_tmp["cv_mean"])
        idxs.append(high_idx)
    df1_bests = df1.iloc[idxs]
    df1_bests.loc[:, "type"] = np.array(["Single-Q"] * len(df1_bests))

    idxs = []
    for q in qs2:
        high_idx = np.argmax(df2[df2["quantity"] == q]["cv_mean"])
        idxs.append(high_idx)
    df2_bests = df2.iloc[idxs]
    df2_bests.loc[:, "type"] = np.array(["Double-Q"] * len(df2_bests))

    idxs = []
#     print(df3["quantity"])
    for q in qs3:
        high_idx = np.argmax(df3[df3["quantity"] == q]["cv_mean"])
        print(high_idx)
        idxs.append(high_idx)
    df3_bests = df3.iloc[idxs]
#     print(df3_bests["quantity"])
    df3_bests.loc[:, "type"] = np.array(["Triple-Q"] * len(df3_bests))

    res_df = pd.concat([df1_bests, df2_bests, df3_bests])
    return res_df[["quantity", "type", "cv_mean", "cv_std", "win", "wl",
                   "alpha", "dropped", "bopf_shape", "cv_time",
                   "q_search_path", "cv_results_file"]]


def create_resume_file(C, timestamp, out_path):

    dfs = []
    for key in ["single", "double", "triple"]:
        comb_quantity_file = os.path.join(out_path, "comb_%s_quantity_%s_%s" % (key, C.lower(), timestamp))
        quantity_file = os.path.join(out_path, "%s_quantity_%s_%s" % (key, C.lower(), timestamp))
        data_folder = os.path.join(out_path, "%s_quantity_%s_data" % (key, C.lower()))

        df_q = pd.read_csv(quantity_file, index_col=None)
        df_q = df_q[df_q["valid_cv"]]
        df_q = add_count_and_file_Qs(df_q, quantity_file)

        df_comb_q = pd.read_csv(comb_quantity_file, index_col=None)
        df_comb_q = df_comb_q[df_comb_q["valid_cv"]]
        df_comb_q = add_count_and_file_Qs(df_comb_q, comb_quantity_file)

        df_comb_q2 = comb_filter_only_bests(df_comb_q)
        df_comb_q2 = concat_single_best(df_q, df_comb_q2)
        df_comb_q2.loc[:, "q_search_path"] = [data_folder] * len(df_comb_q2)
        dfs.append(df_comb_q2)

    resume_df = resume_quantity_search(dfs)
    resume_file = os.path.join(out_path, "quantity_search_resume.csv")
    resume_df.to_csv(resume_file, index=False)
    
    return resume_df

In [35]:
compact_method = "LSA"
timestamp = "20210914-031605"
out_path = os.path.join("..", "data", "plasticc", "MMMBOPF", "quantity_search", "lsa")
print(os.path.exists(out_path))

resume_df = create_resume_file(compact_method, timestamp, out_path)

True
    quantity  count_Qs
4       (Me)         1
94      (Tr)         1
184     (Va)         1
274     (Mm)         1
364     (Mn)         1
454     (Mx)         1
              quantity  count_Qs
4              (Tr-Mm)         2
94             (Tr-Va)         2
184            (Tr-Mn)         2
274            (Tr-Me)         2
364            (Tr-Mx)         2
454         (Tr-Mm-Va)         3
544         (Tr-Mm-Mn)         3
634         (Tr-Mm-Me)         3
724         (Tr-Mm-Mx)         3
814      (Tr-Mm-Mn-Va)         4
904      (Tr-Mm-Mn-Me)         4
994      (Tr-Mm-Mn-Mx)         4
1084  (Tr-Mm-Mn-Mx-Va)         5
1174  (Tr-Mm-Mn-Mx-Me)         5
    quantity  count_Qs
0     (TrMe)         1
60    (TrVa)         1
120   (TrMm)         1
180   (TrMn)         1
240   (TrMx)         1
300   (MmMe)         1
360   (MmVa)         1
420   (MmMn)         1
480   (MmMx)         1
540   (VaMe)         1
600   (VaMn)         1
660   (VaMx)         1
                        quantity  count_

In [36]:
resume_df

Unnamed: 0,quantity,type,cv_mean,cv_std,win,wl,alpha,dropped,bopf_shape,cv_time,q_search_path,cv_results_file
12,(Tr),Single-Q,0.283,0.036,589.853,2,4,0,144,4.174,..\data\plasticc\MMMBOPF\quantity_search\lsa\s...,..\data\plasticc\MMMBOPF\quantity_search\lsa\s...
42,(Tr-Mm),Single-Q,0.374,0.036,710.551,2,4,0,287,5.58,..\data\plasticc\MMMBOPF\quantity_search\lsa\s...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
79,(Tr-Mm-Mn),Single-Q,0.395,0.015,710.551,3,4,0,2098,8.031,..\data\plasticc\MMMBOPF\quantity_search\lsa\s...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
100,(Tr-Mm-Mn-Mx),Single-Q,0.408,0.028,710.551,2,4,0,558,6.665,..\data\plasticc\MMMBOPF\quantity_search\lsa\s...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
129,(Tr-Mm-Mn-Mx-Me),Single-Q,0.411,0.028,710.551,2,4,0,641,7.301,..\data\plasticc\MMMBOPF\quantity_search\lsa\s...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
6,(TrMm),Double-Q,0.425,0.022,91.671,1,4,0,96,3.864,..\data\plasticc\MMMBOPF\quantity_search\lsa\d...,..\data\plasticc\MMMBOPF\quantity_search\lsa\d...
51,(TrMm-MmMn),Double-Q,0.456,0.026,110.428,1,4,0,192,4.652,..\data\plasticc\MMMBOPF\quantity_search\lsa\d...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
94,(TrMm-MmMn-MmMx),Double-Q,0.468,0.02,91.671,1,4,0,288,5.714,..\data\plasticc\MMMBOPF\quantity_search\lsa\d...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
139,(TrMm-MmMn-MmMx-TrMn),Double-Q,0.486,0.019,110.428,1,4,0,384,6.745,..\data\plasticc\MMMBOPF\quantity_search\lsa\d...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...
186,(TrMm-MmMn-MmMx-TrMn-VaMn),Double-Q,0.491,0.023,193.035,1,4,0,480,7.744,..\data\plasticc\MMMBOPF\quantity_search\lsa\d...,..\data\plasticc\MMMBOPF\quantity_search\lsa\c...


In [39]:
resume_df["cv_results_file"].to_numpy()[0]

'..\\data\\plasticc\\MMMBOPF\\quantity_search\\lsa\\single_quantity_lsa_20210914-031605'