In [3]:
import os
import shutil
import pandas as pd
import json
import re

###### PLEASE DEFINE PATH TO YOUR ROBYN OUTPUT FILE AND MODEL ID

# Example usage:
source_directory = r"C:\Users\furfirat\Desktop\Robyn_202401291649_init"
solID = "3_346_3" 

def copy_json_file(source_dir, json_file_name, new_json_file_name):
    source_file_path = os.path.join(source_dir, json_file_name)
    destination_file_path = os.path.join(source_dir, new_json_file_name)

    try:
        if not os.path.isfile(source_file_path):
            raise FileNotFoundError(f"Source JSON file '{json_file_name}' not found in directory '{source_dir}'.")

        shutil.copy(source_file_path, destination_file_path)

    except Exception as e:
        print(f"An error occurred: {e}")

json_filename = "RobynModel-models.json"
new_json_file_name = f"RobynModel-{solID}.json"
copy_json_file(source_directory, json_filename, new_json_file_name)
json_file_path = os.path.join(source_directory, new_json_file_name)
output_file = os.path.join(source_directory, new_json_file_name)

custom_separators = (',', ':')

csv_file_path = None
for file_name in os.listdir(source_directory):
    if file_name.startswith("pareto_aggregated") and file_name.endswith(".csv"):
        csv_file_path = os.path.join(source_directory, file_name)
        break
        
if csv_file_path:
    df = pd.read_csv(csv_file_path)

if "solID" in df.columns:
    df = df[df["solID"].astype(str) == str(solID)]

else:
    print("Column 'solID' not found in the CSV file.")
error_values = df.iloc[0][["rsq_train", "nrmse_train", "nrmse", "decomp.rssd", "mape"]].apply(lambda x: round(float(x), 4)).to_dict()

df.rename(columns={'rn': 'variable', 'xDecompPerc': 'decompPer', 'xDecompAgg': 'decompAgg', 'cpa_total': 'performance'}, inplace=True)

column_order1 = ['variable', 'coef', 'decompPer', 'decompAgg', 'performance', 'mean_response', 'mean_spend']
column_order2 = ['variable', 'coef', 'decompPer', 'decompAgg']

df = df[column_order1]

decimal_cols = ['coef', 'decompPer', 'decompAgg', 'performance', 'mean_response', 'mean_spend']
df[decimal_cols] = df[decimal_cols].round(4)

variables_to_filter = ["(Intercept)", "trend", "season", "weekday", "holiday", "monthly"]
if any(var in df['variable'].values for var in variables_to_filter):
    filtered_df1 = df[df['variable'].isin(variables_to_filter)][column_order2]
    filtered_df2 = df[~df['variable'].isin(variables_to_filter)]
else:
    filtered_df1 = df
    filtered_df2 = pd.DataFrame()

result1 = filtered_df1.to_dict(orient='records')
result2 = filtered_df2.to_dict(orient='records')

result = result1 + result2

for record in result:
    if "performance" in record and record["performance"] == float("inf"):
        record["performance"] = 0

exported_model = {
    "ExportedModel": {
        "select_model": [solID],
        "ts_validation": [False],
        "summary": result,
        "errors": [error_values]
    }
}

json_file_path = os.path.join(source_directory, new_json_file_name)

with open(json_file_path) as file:
    existing_data = json.load(file)

updated_data = {
    "InputCollect": existing_data["InputCollect"],
    "ModelsCollect": existing_data["ModelsCollect"],
    "ExportedModel": exported_model
}


with open(output_file, 'w') as file:
    json.dump(updated_data, file, indent=3, separators=custom_separators)

with open(output_file, 'r') as json_file:
    json_str = json_file.read()

json_str = json_str.replace(', ', ',')
json_str = json_str.replace(',\n ', ',')

with open(output_file, 'w') as json_file:
    json_file.write(json_str)
    
csv_file_path = None
for file_name in os.listdir(source_directory):
    if file_name.startswith("pareto_hyperparameters") and file_name.endswith(".csv"):
        csv_file_path = os.path.join(source_directory, file_name)
        break
        
if csv_file_path:
    df = pd.read_csv(csv_file_path)

with open(json_file_path) as file:
    existing_data = json.load(file)

adstock_type = existing_data["InputCollect"]["adstock"]    

filtered_df = df[df["solID"].astype(str) == str(solID)]


if "geometric" in adstock_type : 
    columns_to_include = ["lambda", "train_size"]
    columns_to_include += [col for col in filtered_df.columns if col.endswith(("_alphas", "_gammas", "_penalty", "_thetas"))]
else:
    columns_to_include = ["lambda", "train_size"]
    columns_to_include += [col for col in filtered_df.columns if col.endswith(("_alphas", "_gammas", "_penalty", "_scales", "_shapes"))]

hyper_values = {col: [value] for col, value in filtered_df[columns_to_include].iloc[0].items() if pd.notna(value)}

hyper_values = dict(sorted(hyper_values.items(), key=lambda x: x[0].lower()))

for key, value in hyper_values.items():
    hyper_values[key] = [round(value[0], 4)]

with open(json_file_path) as file:
    existing_data = json.load(file)

exported_model = {
    "ExportedModel": {
        "select_model": [solID],
        "ts_validation": [False],
        "summary": result,
        "errors": [error_values],
        "hyper_values": hyper_values
    }
}

updated_data = {
    "InputCollect": existing_data["InputCollect"],
    "ModelsCollect": existing_data["ModelsCollect"],
    **exported_model  
}

with open(output_file, 'w') as file:
    json.dump(updated_data, file, indent=3, separators=custom_separators)

with open(output_file, 'r') as json_file:
    json_str = json_file.read()

json_str = json_str.replace(', ', ',')

with open(output_file, 'w') as json_file:
    json_file.write(json_str)

with open(json_file_path) as file:
    data = json.load(file)

hyperparameters = data["InputCollect"]["hyperparameters"]

hyper_updated = hyperparameters.copy()

hyper_updated["lambda"] = [0, 1]

csv_file_path = None
for file_name in os.listdir(source_directory):
    if file_name.startswith("pareto_aggregated") and file_name.endswith(".csv"):
        csv_file_path = os.path.join(source_directory, file_name)
        break
        
if csv_file_path:
    df = pd.read_csv(csv_file_path)
    train_size = df["train_size"].max()
    hyper_updated["train_size"] = [train_size]

if "holiday_penalty" in data.get("ExportedModel", {}).get("hyper_values", {}):
    hyper_updated["holiday_penalty"] = [0, 1]
if "monthly_penalty" in data.get("ExportedModel", {}).get("hyper_values", {}):
    hyper_updated["monthly_penalty"] = [0, 1]  
if "season_penalty" in data.get("ExportedModel", {}).get("hyper_values", {}):
    hyper_updated["season_penalty"] = [0, 1]
if "trend_penalty" in data.get("ExportedModel", {}).get("hyper_values", {}):
    hyper_updated["trend_penalty"] = [0, 1]
if "weekday_penalty" in data.get("ExportedModel", {}).get("hyper_values", {}):
    hyper_updated["weekday_penalty"] = [0, 1]


    
hyper_values = data.get("ExportedModel", {}).get("hyper_values", {})
if "trend_penalty" in hyper_values:
    hyper_updated["trend_penalty"] = [0, 1]
if "holiday_penalty" in hyper_values:
    hyper_updated["holiday_penalty"] = [0, 1]
if "weekday_penalty" in hyper_values:
    hyper_updated["weekday_penalty"] = [0, 1]
if "monthly_penalty" in hyper_values:
    hyper_updated["monthly_penalty"] = [0, 1]

keys_to_reorder = [key for key in hyper_values if key.endswith("_penalty")]

variable_order = [entry["variable"] for entry in data["ExportedModel"]["summary"]]

reordered_keys = sorted(keys_to_reorder, key=lambda key: variable_order.index(key.split("_penalty")[0]))

for key in reordered_keys:
    hyper_updated[key] = [0, 1]

data["ExportedModel"]["hyper_updated"] = hyper_updated

with open(output_file, 'r') as json_file:
    json_str = json_file.read()

data["ExportedModel"]["calibration_constraint"] = [0.1]
data["ExportedModel"]["cores"] = [7]

csv_file_path = None
for file_name in os.listdir(source_directory):
    if file_name.startswith("pareto_aggregated") and file_name.endswith(".csv"):
        csv_file_path = os.path.join(source_directory, file_name)
        break

if csv_file_path:
    df = pd.read_csv(csv_file_path)
    max_iterations = df["iterations"].max()
    max_iterations_rounded = int(max_iterations // 1000) * 1000
    max_trials = int(df["trial"].max())

    data["ExportedModel"]["iterations"] = [max_iterations_rounded]
    data["ExportedModel"]["trials"] = [max_trials]

data["ExportedModel"]["intercept_sign"] = ["non_negative"]
data["ExportedModel"]["nevergrad_algo"] = ["TwoPointsDE"]

has_penalty_key = any(key.endswith("_penalty") for key in data["ExportedModel"]["hyper_updated"])

add_penalty_factor_dict = {
    "add_penalty_factor": [True] if has_penalty_key else [False]
}

data["ExportedModel"]["seed"] = [123]
data["ExportedModel"]["pareto_fronts"] = [3]

non_penalty_keys = [key for key in hyper_updated.keys() if not key.endswith("_penalty")]
all_same_values = len(set(hyper_updated[key][0] for key in non_penalty_keys)) == 1

data["ExportedModel"]["hyper_fixed"] = [True] if all_same_values else [False]

json_folder_path = json_file_path.replace("\\", "/")

data["ExportedModel"]["plot_folder"] = [json_folder_path + "/"]

with open(json_file_path, 'w') as file:
    json.dump(data, file, indent=3, separators=custom_separators)
    
with open(output_file, 'r') as json_file:
    json_str = json_file.read()

json_str = re.sub(r'\[\s+', '[', json_str)  
json_str = re.sub(r'\s+\]', ']', json_str) 
json_str = re.sub(r',[\s\n]+(?=[^\[\]]*])', ',', json_str)

with open(output_file, 'w') as json_file:
    json_file.write(json_str)
    