# Imports

In [2]:
import os
import re
import json
import pandas as pd
import numpy as np

from src.permutation.loader import CSVLoader

from sklearn.linear_model import ElasticNet
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

topo_features = [
    "RADIUS",
    "LENGTH",
    "WALL",
    "SHEAR",
    "CIRCUM",
    "FLOW",
    "NODES",
    "EDGES",
    "GRADIUS",
    "GDIAMETER",
    "AVG_ECCENTRICITY",
    "AVG_SHORTEST_PATH",
    "AVG_IN_DEGREES",
    "AVG_OUT_DEGREES",
    "AVG_DEGREE",
    "AVG_CLUSTERING",
    "AVG_CLOSENESS",
    "AVG_BETWEENNESS",
    "AVG_CORENESS",
    "GRADIUS:INVERSE_DISTANCE",
    "GDIAMETER:INVERSE_DISTANCE",
    "AVG_ECCENTRICITY:INVERSE_DISTANCE",
    "AVG_SHORTEST_PATH:INVERSE_DISTANCE",
    "AVG_CLOSENESS:INVERSE_DISTANCE",
    "AVG_BETWEENNESS:INVERSE_DISTANCE",
    "GRADIUS:FLOW",
    "GDIAMETER:FLOW",
    "AVG_ECCENTRICITY:FLOW",
    "AVG_SHORTEST_PATH:FLOW",
    "AVG_CLOSENESS:FLOW",
    "AVG_BETWEENNESS:FLOW",
    "GRADIUS:WALL",
    "GDIAMETER:WALL",
    "AVG_ECCENTRICITY:WALL",
    "AVG_SHORTEST_PATH:WALL",
    "AVG_CLOSENESS:WALL",
    "AVG_BETWEENNESS:WALL",
    "GRADIUS:SHEAR",
    "GDIAMETER:SHEAR",
    "AVG_ECCENTRICITY:SHEAR",
    "AVG_SHORTEST_PATH:SHEAR",
    "AVG_CLOSENESS:SHEAR",
    "AVG_BETWEENNESS:SHEAR",
    "GRADIUS:RADIUS",
    "GDIAMETER:RADIUS",
    "AVG_ECCENTRICITY:RADIUS",
    "AVG_SHORTEST_PATH:RADIUS",
    "AVG_CLOSENESS:RADIUS",
    "AVG_BETWEENNESS:RADIUS",
    "GRADIUS:PRESSURE_AVG",
    "GDIAMETER:PRESSURE_AVG",
    "AVG_ECCENTRICITY:PRESSURE_AVG",
    "AVG_SHORTEST_PATH:PRESSURE_AVG",
    "AVG_CLOSENESS:PRESSURE_AVG",
    "AVG_BETWEENNESS:PRESSURE_AVG",
    "GRADIUS:PRESSURE_DELTA",
    "GDIAMETER:PRESSURE_DELTA",
    "AVG_ECCENTRICITY:PRESSURE_DELTA",
    "AVG_SHORTEST_PATH:PRESSURE_DELTA",
    "AVG_CLOSENESS:PRESSURE_DELTA",
    "AVG_BETWEENNESS:PRESSURE_DELTA",
    "GRADIUS:OXYGEN_AVG",
    "GDIAMETER:OXYGEN_AVG",
    "AVG_ECCENTRICITY:OXYGEN_AVG",
    "AVG_SHORTEST_PATH:OXYGEN_AVG",
    "AVG_CLOSENESS:OXYGEN_AVG",
    "AVG_BETWEENNESS:OXYGEN_AVG",
    "GRADIUS:OXYGEN_DELTA",
    "GDIAMETER:OXYGEN_DELTA",
    "AVG_ECCENTRICITY:OXYGEN_DELTA",
    "AVG_SHORTEST_PATH:OXYGEN_DELTA",
    "AVG_CLOSENESS:OXYGEN_DELTA",
    "AVG_BETWEENNESS:OXYGEN_DELTA",
]

spatial_features = topo_features + [
    "AVG_ECCENTRICITY_WEIGHTED",
    "AVG_CLOSENESS_WEIGHTED",
    "AVG_CORENESS_WEIGHTED",
    "AVG_BETWEENNESS_WEIGHTED",
    "AVG_OUT_DEGREES_WEIGHTED",
    "AVG_IN_DEGREES_WEIGHTED",
    "AVG_DEGREE_WEIGHTED",
]

In [3]:
def get_hparams(feature_type, context, response, model, tp=None):
    if tp is None:
        path = "stored_results"
    else:
        path = os.path.join("stored_results", "temporal")

    for exp_folder in os.listdir(path):
        if feature_type not in exp_folder:
            continue

        if not exp_folder.endswith(context):
            continue
        
        chunks = exp_folder.split("_")

        if tp is None:
            if not chunks[3] == "15" or not chunks[1] == "0":
                continue
        else:
            if not chunks[3] == "15" or not chunks[1] == str(int(tp)):
                continue
                

        for resp_folder in os.listdir(os.path.join(path, exp_folder)):
            if response not in resp_folder:
                continue

            full_path = os.path.join(path, exp_folder, resp_folder, model)

            for file in os.listdir(full_path):
                if not file.endswith(".json"):
                    continue

                print(os.path.join(full_path, file))
                hparams = json.load(open(os.path.join(full_path, file), "r"))
                return hparams

In [6]:
context_list = ["C", "CH"]
response_list = ["ACTIVITY", "GROWTH", "SYMMETRY"]
feature = "topo"
# model_list = ["MLR", "MLP", "RF", "SVR"]
model_list = ["SVR"]

In [7]:
ci_df = pd.DataFrame(columns=["feature", "timepoint", "context", "response", "model", "train_ci", "test_ci", "train_mean", "test_mean"])

features = spatial_features if feature == "spatial" else topo_features
for context in context_list:
    for tp in range(16):
        tp = float(tp)
        data_path = f"data/ARCADE/{context}-feature_{tp}_metric_15-04032023.csv"
        for response in response_list:
            loader = CSVLoader(data_path, features=topo_features, response=response)
            loader.clean_data()

            for model in model_list:
                hparams = get_hparams(feature, context, response, model, tp)
                print(hparams)
                if model == "MLR":
                    regressor = ElasticNet(**hparams)
                elif model == "MLP":
                    regressor = MLPRegressor(**hparams)
                elif model == "RF":
                    regressor = RandomForestRegressor(**hparams)
                elif model == "SVR":
                    regressor = SVR(**hparams)
                
                print(f"Running {model} on {context} {response} {tp}")
                train_r2s = []
                test_r2s = []
                for i in range(1):
                    X_train, y_train = loader.load_training_data()
                    regressor.fit(X_train, y_train)
                    X_test, y_test = loader.load_testing_data()

                    train_r2s.append(regressor.score(X_train, y_train))
                    test_r2s.append(regressor.score(X_test, y_test))

                train_mean = np.mean(train_r2s)
                test_mean = np.mean(test_r2s)
                train_ci = np.percentile(train_r2s, [2.5, 97.5])
                test_ci = np.percentile(test_r2s, [2.5, 97.5])

                ci_df = ci_df.append({
                    "feature": feature,
                    "timepoint": int(tp),
                    "context": context,
                    "response": response,
                    "model": model,
                    "train_ci": train_ci,
                    "train_mean": train_mean,
                    "test_ci": test_ci,
                    "test_mean": test_mean
                }, ignore_index=True)

ci_df.to_csv("ci.csv", index=False)

stored_results/temporal/topo_0_metric_15_C/C_0-ACTIVITY/SVR/b3809d87-3a1f-4e91-b40e-7d7e3c3a01ac.json
{'C': 0.5623413251903492, 'epsilon': 0.1778279410038923, 'kernel': 'rbf'}
Running SVR on C ACTIVITY 0.0
Epoch: 0
Training
Testing
APPENDING
stored_results/temporal/topo_0_metric_15_C/C_0-GROWTH/SVR/f5fed2a2-b60f-4164-82d7-67b19f7b31a8.json
{'C': 0.23713737056616557, 'epsilon': 0.07498942093324559, 'kernel': 'rbf'}
Running SVR on C GROWTH 0.0
Epoch: 0
Training
Testing
APPENDING
stored_results/temporal/topo_0_metric_15_C/C_0-SYMMETRY/SVR/0862635e-30d3-405e-93bc-3027c0751575.json
{'C': 0.23713737056616557, 'epsilon': 0.07498942093324559, 'kernel': 'rbf'}
Running SVR on C SYMMETRY 0.0
Epoch: 0
Training
Testing
APPENDING
stored_results/temporal/topo_1_metric_15_C/C_1-ACTIVITY/SVR/ae32e544-3b40-43fe-ba27-ef3d4e93c006.json
{'C': 0.5623413251903492, 'epsilon': 0.1778279410038923, 'kernel': 'rbf'}
Running SVR on C ACTIVITY 1.0
Epoch: 0
Training
Testing
APPENDING
stored_results/temporal/topo_1_m

In [None]:
print(f"{model} performance on {response} ({context}) with {feature} features")
print("Train R-squared: ", regressor.score(X_train, y_train))
print("Test R-squared: ", regressor.score(X_test, y_test))

RF performance on GROWTH (C) with topo features
Train R-squared:  0.5905894890443987
Test R-squared:  0.26248403098635376
