In [1]:
import os
import pandas as pd
from data_copying_tests import C_T
from sklearn.cluster import KMeans


In [None]:
os.chdir(os.path.expanduser('~/git/mf-copula/'))

In [3]:
human_dir = './HNTT/preprocessed/human'
hybrid_dir = './HNTT/preprocessed/hybrid'
symbolic_dir = './HNTT/preprocessed/symbolic'

In [4]:
human_paths = [pd.read_csv(os.path.join(human_dir, csv)) for csv in os.listdir(human_dir) if csv.endswith('.csv')]
hybrid_paths = [pd.read_csv(os.path.join(hybrid_dir, csv)) for csv in os.listdir(hybrid_dir) if csv.endswith('.csv')]
symbolic_paths = [pd.read_csv(os.path.join(symbolic_dir, csv)) for csv in os.listdir(symbolic_dir) if csv.endswith('.csv')]

In [5]:
experiments_dir = './synthetic/output/ntt-human-mf-ban10-chol2-10'
mf_ban10_chol2_10_paths = [
    pd.read_csv(os.path.join(experiments_dir, synthetic_dir, csv), index_col=0)
    for synthetic_dir in os.listdir(experiments_dir)
    if len(os.listdir(os.path.join(experiments_dir, synthetic_dir))) > 1
    for csv in os.listdir(os.path.join(experiments_dir, synthetic_dir))
    if csv != 'original.csv'    
]

In [6]:
experiments_dir = './synthetic/output/ntt-human-mf-ban10-chol2-150'
mf_ban10_chol2_150_paths = [
    pd.read_csv(os.path.join(experiments_dir, synthetic_dir, csv), index_col=0)
    for synthetic_dir in os.listdir(experiments_dir)
    if len(os.listdir(os.path.join(experiments_dir, synthetic_dir))) > 1
    for csv in os.listdir(os.path.join(experiments_dir, synthetic_dir))
    if csv != 'original.csv'    
]

In [7]:
human_masks = [pd.read_csv(os.path.join('./HNTT/preprocessed/human', mask)) for mask in human_dir if mask.endswith('.txt')]
hybrid_masks = [pd.read_csv(os.path.join('./HNTT/preprocessed/hybrid', mask)) for mask in hybrid_dir if mask.endswith('.txt')]
symbolic_masks = [pd.read_csv(os.path.join('./HNTT/preprocessed/symbolic', mask)) for mask in symbolic_dir if mask.endswith('.txt')]

In [8]:
def reshape(df_list, dim=15):
    df = pd.concat(df_list, axis=0, ignore_index=True)
    df_reshaped = df.to_numpy()[:(-(df.shape[0] % dim) if df.shape[0] % dim > 0 else df.shape[0])].reshape(-1, dim)

    return df_reshaped

In [9]:
def C_T_fit(T, Qm, Pn, tau=0, n_clusters=5):
    KM = KMeans(n_clusters).fit(T)

    T_labels = KM.predict(T)
    Pn_labels = KM.predict(Pn)
    Qm_labels = KM.predict(Qm)

    return C_T(Pn, Pn_labels, Qm, Qm_labels, T, T_labels, tau)

In [10]:
dim=15

human_reshaped = reshape(human_paths, dim=dim)
hybrid_reshaped = reshape(hybrid_paths, dim=dim)
symbolic_reshaped = reshape(symbolic_paths, dim=dim)

### Human - Hybrid

In [11]:
C_T_fit(human_reshaped, hybrid_reshaped, human_reshaped, tau=0.001, n_clusters=len(human_paths))

17.406137930899995

In [12]:
C_T_fit(hybrid_reshaped, human_reshaped, hybrid_reshaped, tau=0.001, n_clusters=len(hybrid_paths))

16.319875329728834

### Human - Symbolic

In [13]:
C_T_fit(human_reshaped, symbolic_reshaped, human_reshaped, tau=0.001, n_clusters=len(human_paths))

14.95854642959297

In [14]:
C_T_fit(symbolic_reshaped, human_reshaped, symbolic_reshaped, tau=0.001, n_clusters=len(symbolic_paths))

16.026263986031097

### Hybrid - Symbolic

In [15]:
C_T_fit(hybrid_reshaped, symbolic_reshaped, hybrid_reshaped, tau=0.001, n_clusters=len(hybrid_paths))

14.009729982166633

In [16]:
C_T_fit(symbolic_reshaped, hybrid_reshaped, symbolic_reshaped, tau=0.001, n_clusters=len(symbolic_paths))

15.289552473302415

### Human - Synthetic

In [17]:
mf_ban10_chol2_10_reshaped = reshape(mf_ban10_chol2_10_paths, dim=dim)
mf_ban10_chol2_150_reshaped = reshape(mf_ban10_chol2_150_paths, dim=dim)

In [18]:
C_T_fit(human_reshaped, mf_ban10_chol2_10_reshaped, hybrid_reshaped, tau=0, n_clusters=len(human_paths)//8)

-10.00079246206611

In [19]:
C_T_fit(human_reshaped, mf_ban10_chol2_10_reshaped, symbolic_reshaped, tau=0, n_clusters=len(human_paths)//8)

-6.006523840146959

In [20]:
C_T_fit(human_reshaped, mf_ban10_chol2_150_reshaped, hybrid_reshaped, tau=0, n_clusters=len(human_paths)//8)

9.82990729524092

In [21]:
C_T_fit(human_reshaped, mf_ban10_chol2_150_reshaped, symbolic_reshaped, tau=0, n_clusters=len(human_paths)//8)

13.621126082126887