In [1]:
import json

import pandas as pd
import numpy as np

import networkx as nx
from graph_encoders import *

from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression

In [2]:
X_train = pd.read_csv('../data/crop_mapping/nodewise_PCA_train.csv')
X_test = pd.read_csv('../data/crop_mapping/nodewise_PCA_test.csv')

with open('../data/crop_mapping/reduced_nodes.json', 'r') as file:
    nodes = json.load(file)

In [3]:
node_labels = list(nodes.keys())

In [4]:
k = 5
kfold = KFold(n_splits = k,
              shuffle = True,
              random_state = 12345)

In [5]:
# Determine representation maps by fitting linear maps between each pair of nodes
all_rep_maps = {node: {} for node in node_labels}
for head in node_labels:
    tails = (node for node in node_labels if node != head)
    for tail in tails:   
        rep_maps = [[] for a in range(k)]
        X_1 = X_train[nodes[head]]
        X_2 = X_train[nodes[tail]]
        kfold.split(X_1)

        for j, (train_index, test_index) in enumerate(kfold.split(X_train)):
            X_1_train_train = X_1.iloc[train_index,:]
            X_2_train_train = X_2.iloc[train_index]
            X_1_holdout = X_1.iloc[test_index,:].to_numpy()
            X_2_holdout = X_2.iloc[test_index].to_numpy()
        
            for i in range(len(list(X_2.columns))):
                y = X_2_train_train.iloc[:,i]
                model = LinearRegression(fit_intercept=False)
                model.fit(X_1_train_train,y)
                rep_maps[j].append(model.coef_)
            rep_maps[j] = np.array(rep_maps[j])
            
        mean_rep_map = np.array(rep_maps).mean(axis=0)
        all_rep_maps[head][tail] = mean_rep_map

In [6]:
with open('../data/crop_mapping/edge_maps.json', 'w') as file:
    json.dump(
        all_rep_maps, file,
        cls=NumpyArrayEncoder
    )