In [18]:
import sys
sys.path.append("../") 

from src.utils.geospatial import build_distances_mx 
from src.utils.gwnet_utils import get_adjacency_matrix
import pandas as pd
import numpy as np
from itertools import combinations
from typing import List, Dict, Tuple

# Getting Specific

In [2]:
centroids = pd.read_hdf('../data/04_feature/power-centroids-positions-2000-2015.hdf')

In [3]:
all_districts = centroids.columns.get_level_values('nuts_id').unique()
node_pairs = list( combinations(all_districts, 2) )

In [4]:
targets = ['DEF07', 'DEF0C', 'DEF05', 'DEF0E', 'DEF0B']

In [7]:
centroids_median = centroids.loc['2005-01-01':'2015-12-31', all_districts].median()

D = build_distances_mx(
    targets=targets, 
    nodes_coords={district_id: tuple( centroids_median[district_id] ) for district_id in targets}
)

# Distance Matrix should be squared

In [8]:
if D.shape[0] != D.shape[1]:
    raise RuntimeError('D is not squared')

# Building Adj Matrix

In [11]:
def build_adjacency_mx(D: pd.DataFrame) -> pd.DataFrame:
    std = D.values.std()
    A = np.exp( -np.square(D / std) )
    return A

def prune_adjacency_mx(A: pd.DataFrame, threshold_distance: float) -> pd.DataFrame:
    A_sparse = A.copy(deep=True)
    A_sparse[D > threshold_distance] = 0
    return A_sparse

In [19]:
def get_adj_mx(targets: List[str], nodes_coords: Dict[str, Tuple[float]], threshold_distance: float) -> pd.DataFrame:
    
    D = build_distances_mx(
        targets=targets, 
        nodes_coords={district_id: tuple( centroids_median[district_id] ) for district_id in targets}
    )
    
    A = build_adjacency_mx(D)
    A_sparse = prune_adjacency_mx(A, threshold_distance)
    return A_sparse

In [12]:
A = get_adj_mx(targets, nodes_coords, threshold_distance)

A

Unnamed: 0,DEF07,DEF0C,DEF05,DEF0E,DEF0B
DEF07,1.0,0.348723,0.006654,4.9e-05,0.002148
DEF0C,0.348723,1.0,0.004867,0.000388,0.059107
DEF05,0.006654,0.004867,1.0,0.193216,0.018243
DEF0E,4.9e-05,0.000388,0.193216,1.0,0.075377
DEF0B,0.002148,0.059107,0.018243,0.075377,1.0


In [13]:
A_sparse = A.copy(deep=True)
A_sparse[D > 100] = 0

A_sparse

Unnamed: 0,DEF07,DEF0C,DEF05,DEF0E,DEF0B
DEF07,1.0,0.348723,0.006654,4.9e-05,0.002148
DEF0C,0.348723,1.0,0.004867,0.000388,0.059107
DEF05,0.006654,0.004867,1.0,0.193216,0.018243
DEF0E,4.9e-05,0.000388,0.193216,1.0,0.075377
DEF0B,0.002148,0.059107,0.018243,0.075377,1.0


In [14]:
A.size - (A==0).sum().sum() 

25

In [138]:
A_sparse.size - (A_sparse==0).sum().sum() 

7645

In [140]:
A_tril = pd.DataFrame(
    data= np.tril(A_sparse) - np.identity(len(A_sparse)), # exclusive lower triangle matrix of A 
    columns=A_sparse.columns,
    index=A_sparse.index
)

A_tril

Unnamed: 0,DEF0C,DEF08,DEA43,DEF07,DEA5B,DEA34,DEF0B,DEE0E,DE40F,DEF05,...,DE21C,DE251,DE943,DE24C,DE266,DEA2C,DE221,DE239,DE21L,DE715
DEF0C,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEF08,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEA43,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEF07,0.966466,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEA5B,0.000000,0.0,0.757773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DEA2C,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DE221,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.719866,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DE239,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.687294,0.0,0.0,0.000000,0.0,0.0,0.0
DE21L,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.963364,0.0,0.0,0.000000,0.0,0.0,0.717727,0.0,0.0,0.0


In [2]:
import pickle

In [None]:

pickle.dump(A_sparse, open('../data/05_model_input/adj_mx.pkl', 'wb'))

In [None]:
A_sparse = pickle.load(open('../data/05_model_input/adj_mx.pkl','rb'))

A_sparse

In [None]:
targets = ['DEF07', 'DEF0C', 'DEF05', 'DEF0E', 'DEF0B']

A_sparse_short = A_sparse.loc[ A_sparse.columns.isin(targets), A_sparse.index.isin(targets)]

pickle.dump(A_sparse_short, open('../data/05_model_input/adj_mx_short.pkl', 'wb'))

In [None]:
A_sparse_short

In [5]:
A_sparse = pickle.load(open('../data/05_model_input/adj_mx.pkl','rb'))

A_sparse

Unnamed: 0,DEF0C,DEF08,DEA43,DEF07,DEA5B,DEA34,DEF0B,DEE0E,DE40F,DEF05,...,DE21C,DE251,DE943,DE24C,DE266,DEA2C,DE221,DE239,DE21L,DE715
DEF0C,1.000000,0.0,0.000000,0.966466,0.000000,0.000000,0.911064,0.0,0.0,0.833130,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEF08,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.756653,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEA43,0.000000,0.0,1.000000,0.000000,0.757773,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEF07,0.966466,0.0,0.000000,1.000000,0.000000,0.000000,0.818730,0.0,0.0,0.845349,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEA5B,0.000000,0.0,0.757773,0.000000,1.000000,0.646228,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DEA2C,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,1.0,0.000000,0.0,0.000000,0.0
DE221,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.719866,0.0,0.0,0.000000,0.0,0.0,1.000000,0.0,0.717727,0.0
DE239,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.687294,0.0,0.0,0.000000,1.0,0.000000,0.0
DE21L,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.963364,0.0,0.0,0.000000,0.0,0.0,0.717727,0.0,1.000000,0.0


In [8]:
targets = ['DEF07', 'DEF0C', 'DEF05', 'DEF0E', 'DEF0B']

A_sparse_short = A_sparse.loc[ A_sparse.columns.isin(targets), A_sparse.index.isin(targets)]

pickle.dump(A_sparse_short, open('../data/05_model_input/adj_mx_short.pkl', 'wb'))

In [9]:
A_sparse_short

Unnamed: 0,DEF0C,DEF07,DEF0B,DEF05,DEF0E
DEF0C,1.0,0.966466,0.911064,0.83313,0.764974
DEF07,0.966466,1.0,0.81873,0.845349,0.715571
DEF0B,0.911064,0.81873,1.0,0.871016,0.915358
DEF05,0.83313,0.845349,0.871016,1.0,0.942527
DEF0E,0.764974,0.715571,0.915358,0.942527,1.0
