In [1]:
import json

import pandas as pd
import numpy as np

import networkx as nx
from graph_encoders import *

from linear_algebra_utils import *
from digraph_algorithms import *
from quiver_pca import *

In [2]:
X_train = pd.read_csv('../data/crop_mapping/node_reduced_train.csv')
X_test = pd.read_csv('../data/crop_mapping/node_reduced_test.csv')

with open('../data/crop_mapping/quiver_representation.json', 'r') as file:
    Q = json.load(file, cls=NetworkXQuiverRepresentationDecoder)

with open('../data/crop_mapping/reduced_nodes.json', 'r') as file:
    Q_nodes = json.load(file)

In [4]:
# AUgment quiver with root vertex and generate space of sections
Qplus = augment_DAG_with_root(Q)
generate_space_of_sections(Qplus)

In [5]:
# Generate orthonormal basis for the space of sections
sections = Qplus.graph['sections']
normalized_sections = orthogonalize_section_basis(sections, Q_nodes)

In [7]:
# Check that the sections are orthogonal
[[float(np.round(a.reshape(-1).dot(b.reshape(-1)), 1)) for a in normalized_sections] for b in normalized_sections]

[[1.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, -0.0],
 [-0.0, 1.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, -0.0],
 [0.0, -0.0, 1.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0],
 [-0.0, -0.0, 0.0, 1.0, 0.0, 0.0, -0.0, -0.0, -0.0],
 [0.0, 0.0, 0.0, 0.0, 1.0, -0.0, 0.0, -0.0, 0.0],
 [0.0, -0.0, -0.0, 0.0, -0.0, 1.0, -0.0, -0.0, -0.0],
 [-0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 1.0, 0.0, -0.0],
 [0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 1.0, -0.0],
 [-0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 1.0]]

In [8]:
# Project onto orthonormal basis of sections, in the section basis' coordinates
X_train_proj = [project_onto_subspace(X_train.iloc[i], normalized_sections) for i in range(len(X_train))]
X_train_proj = pd.concat(X_train_proj, axis=1).T
X_test_proj = [project_onto_subspace(X_test.iloc[i], normalized_sections) for i in range(len(X_test))]
X_test_proj = pd.concat(X_test_proj, axis=1).T

In [9]:
Q_cols = ['Q1_' + str(i) for i in range(len(normalized_sections))]
X_train_proj.columns = Q_cols
X_test_proj.columns = Q_cols

In [11]:
X_train_proj.head()

Unnamed: 0,Q1_0,Q1_1,Q1_2,Q1_3,Q1_4,Q1_5,Q1_6,Q1_7,Q1_8
0,0.969148,-0.981289,0.259957,2.119393,0.15936,-0.441277,-0.167317,0.947974,0.52421
1,-1.191991,-0.800139,1.348693,2.880147,-0.514214,-0.537201,0.539185,-1.276819,0.153182
2,-1.625763,1.478058,-1.139027,-0.07562,0.4908,-0.671859,0.581867,-0.081676,-0.232386
3,2.239337,0.527406,1.256352,-0.328381,-1.223439,1.641494,-1.120455,0.766963,0.221125
4,0.978996,-0.681845,1.901411,-0.297084,-0.949661,1.015061,-0.190105,-0.48883,-0.269128


In [12]:
X_train_proj.to_csv('../data/crop_mapping/quiver_invariant_train.csv', mode='w', index=False)
X_test_proj.to_csv('../data/crop_mapping/quiver_invariant_test.csv', mode='w', index=False)