In [2]:
import sys
sys.path.insert(0, '../../src')
from utils import preprocession as prep
from utils.metrics import compute_metric, add_to_metrics, columns

import os
import pickle
import pandas as pd
import numpy as np

from openTSNE import TSNE

from tqdm.notebook import tqdm
tqdm.pandas()

## TSNE Embeddings

In [11]:
def embedTSNE(folder_name, data_name):

    train, test = prep.load_data(folder_name, data_name)
    _, train_reactions = train
    _, test_reactions = test

    method_path = f"../../embeddings/TSNE/{folder_name}/{data_name}"
    os.makedirs(method_path, exist_ok=True)

    metrics = []
    for p in tqdm(train_reactions.keys()):
        train_data = prep.impute_dataframe(train_reactions[p])
        tsne = TSNE(n_components=2).fit(train_data.values)
        with open(f'{method_path}/TSNE_{p}.pkl', 'wb') as file:
            pickle.dump(tsne, file)

        X = pd.DataFrame(tsne.transform(train_data.values),
                         index=train_reactions[p].index, columns=['x','y'])
        X.to_csv(f'{method_path}/train_embedding_{p}.csv')

        test_list = test_reactions.keys() if p == 0 else [p]
        for q in test_list:
            test_data  = prep.impute_dataframe(test_reactions[q], mean=train_data.mean())

            Y = pd.DataFrame(tsne.transform(test_data.values), 
                             index=test_reactions[p].index, columns=['x','y'])
            Y.to_csv(f'{method_path}/test_embedding_{p}_{q}.csv')

## Datasets

### Smartvote Original

In [13]:
embedTSNE(folder_name = 'Smartvote', data_name = 'Original')

  0%|          | 0/10 [00:00<?, ?it/s]

### Smartvote Binary

In [12]:
embedTSNE(folder_name = 'Smartvote', data_name = 'Binary')

  0%|          | 0/10 [00:00<?, ?it/s]

### Synthetic Multiclass

In [7]:
embedTSNE(folder_name = 'Synthetic_60_50', data_name = 'Original')

  0%|          | 0/10 [00:00<?, ?it/s]

### Synthetic Binary

In [6]:
embedTSNE(folder_name = 'Synthetic_60_50', data_name = 'Binary')

  0%|          | 0/10 [00:00<?, ?it/s]