In [1]:
# do this if you use a custom virtual environment to specify the directory to look for installed packages
import sys
import os
## goes back to the project directory
os.chdir("..")
# switch to the name of your virtual environment
kernel_name = ".venv_mp"
sys.path.append("\\".join([os.getcwd(), kernel_name, "Lib\\site-packages"]))

In [2]:
from data_reader import read_data
from node import get_node_data
import pandas as pd
import numpy as np

In [3]:
raw_data = read_data("gnfuv")
# nodes = {"pi"+str(i+2):i for i in range(4)}
nodes = ["pi2", "pi3", "pi4", "pi5"]

In [4]:
from modelling import select_model_data  
from hypothesis_testing import test_hypothesis
from similar import get_similar_pairs_nodes

def get_results(data, standardised):
    results = []
    for experiment in range(1,4):
        node_data, similar_pairs, similar_nodes, asmmd, mmd_scores, ocsvm_scores = get_similar_pairs_nodes(data, [experiment, standardised])
        if similar_pairs != []:
            print(f"Experiment {experiment} {similar_pairs}", end=" ")
            
            model_data = select_model_data(node_data, similar_nodes)
            svr_df = test_hypothesis("svr", model_data, similar_pairs, similar_nodes, mmd_scores, ocsvm_scores)
            svr_df["kernel"] = ["rbf"] * svr_df.shape[0]
            lsvr_df = test_hypothesis("lsvr", model_data, similar_pairs, similar_nodes, mmd_scores, ocsvm_scores)
            lsvr_df["kernel"] = ["linear"] * svr_df.shape[0]
            
            df = pd.concat([svr_df, lsvr_df], ignore_index = True)
            df["experiment"] = [experiment] * df.shape[0]
            df["std"] = [standardised] * df.shape[0]
            df["asmmd"] = [asmmd] * df.shape[0]
            results.append(df)

    return pd.concat(results, ignore_index = True)

In [5]:
from node import create_samples, save_samples
standardised = True
n_samples = 100
std_samples = create_samples(n_samples, raw_data, standardised)
save_samples(std_samples, "other", standardised)

In [6]:
standardised = False
samples = create_samples(n_samples, raw_data, standardised)
save_samples(samples, "other", standardised)

In [7]:
def run(samples, standardised):
    if standardised:
        data_type = "standardised"
    else:
        data_type = "original"
    for sample_id in range(len(samples[1])):
        print(f"Sample {sample_id+1}", end=": ")
        data = {1: [], 2: [], 3 : []}
        for experiment in range(1,4):
            data[experiment] = samples[experiment][sample_id]

        results = get_results(data, standardised)
        results.to_csv(f"results/GNFUV/{data_type}/sample_{sample_id+1}.csv", index=False)
        print()

In [None]:
run(std_samples, standardised = True)

Sample 1: Experiment 1 [('pi2', 'pi3'), ('pi3', 'pi4')] Experiment 2 [('pi2', 'pi3'), ('pi3', 'pi5')] Experiment 3 [('pi2', 'pi4')] 
Sample 2: Experiment 1 [('pi2', 'pi3'), ('pi3', 'pi4')] Experiment 2 [('pi2', 'pi3'), ('pi3', 'pi5')] 

In [None]:
run(samples, standardised = False)