In [68]:
import os
os.chdir('/home/clemens/armed_conflict_avalanche/')

from workspace.utils import *
from voronoi_globe.interface import load_voronoi
from arcolanche.pipeline import *
from arcolanche.transfer_entropy_func import iter_polygon_pair
from arcolanche.self_loop_entropy_func import iter_valid_polygons

from utils_CB import get_coarsegrained, get_ids_from_centroid
from math import factorial

from scipy.sparse import lil_matrix, csr_matrix

In [113]:
conflict_type = "battles"
mesoscale = (32,453,3) #(dt, dx, gridix)
coarse = (32, 80, 3)
dt, dx, gridix = mesoscale

load_pickle(f"avalanches/{conflict_type}/gridix_{gridix}/te/conflict_ev_{str(dt)}_{str(dx)}.p")
count_ts = get_coarsegrained(conflict_type, scale = mesoscale, binary=True)
polygons = load_voronoi(dx, gridix) 

size = 2
degree = 2
centroid = 7311 #counts: 218 
cell_ids = get_ids_from_centroid(polygons, size= size, centroid = 7311)
print("Cells in subset:", len(cell_ids))


def binomial(n, k):
#number of unique k sets in n
    return (factorial(n)/(factorial(k)*factorial(n-k)))

print(f"Possible tuples in subset of size {size}:", binomial(len(cell_ids), 2)*2)


Cells in subset: 18
Possible tuples in subset of size 2: 306.0


In [100]:
def self_links(time_series, number_of_shuffles):
    
    def valid_polygons_finder():
        valid_polygons = time_series.columns.astype(int).to_list()

        return valid_polygons

    valid_poly_te = iter_valid_polygons(valid_polygons_finder(),
                                        number_of_shuffles,
                                        time_series)

    return valid_poly_te

Unnamed: 0,6888,6793,8171,9261,7311,7408,7377,7410,7951,6932,7637,7094,6646,8058,7994,8122,7228,7511
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
259,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
260,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
261,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [115]:
neighbor_info_dataframe = polygons.loc[cell_ids]
time_series = count_ts[cell_ids]

n = len(neighbor_info_dataframe)

adjacency_matrix = lil_matrix((n, n), dtype=int) #list of lists format

cell_id_to_position = {cell_id: pos for pos, cell_id in enumerate(neighbor_info_dataframe.index)}

#iterate over (column name, series), fill adjacency matrix
for cell_id, neighbors in neighbor_info_dataframe['neighbors'].items():
    #current cell_id has to be in time_series (not in ts if zero activity)
    if cell_id in time_series.columns:
        for neighbor in neighbors:
            #neighbour has to be in polygons and time series
            if neighbor in cell_id_to_position and neighbor in time_series.columns: 
                #at position of cell_id and neighbor, set value to 1 -> iteratively fill adjacency matrix
                adjacency_matrix[cell_id_to_position[cell_id], cell_id_to_position[neighbor]] = 1

# Convert the adjacency matrix to CSR format for efficient matmul (also done implicitly)
adjacency_matrix = adjacency_matrix.tocsr()
adjacency_matrix_power = adjacency_matrix.copy()


#fill connections dict first. keys: (cell1, cell2) values = degree of connection
connections = {}

for d in range(1, degree+1):
    if d > 1:
        adjacency_matrix_power = adjacency_matrix_power @ adjacency_matrix

    #remove self loops
    adjacency_matrix_power.setdiag(0)
    #get indices of non-zero values
    rows, cols = adjacency_matrix_power.nonzero()

  self._set_arrayXarray(i, j, x)


In [31]:
def links(time_series, neighbor_info_dataframe, number_of_shuffles, degree):
    
    def get_tuples():
        #add argument for time series check, or do later
        # Initialize an empty sparse adjacency matrix of nxn
        n = len(neighbor_info_dataframe)

        adjacency_matrix = lil_matrix((n, n), dtype=int) #list of lists format

        cell_id_to_position = {cell_id: pos for pos, cell_id in enumerate(neighbor_info_dataframe.index)}

        #iterate over (column name, series), fill adjacency matrix
        for cell_id, neighbors in neighbor_info_dataframe['neighbors'].items():
            #current cell_id has to be in time_series (not in ts if zero activity)
            if cell_id in time_series.columns:
                for neighbor in neighbors:
                    #neighbour has to be in polygons and time series
                    if neighbor in cell_id_to_position and neighbor in time_series.columns: 
                        #at position of cell_id and neighbor, set value to 1 -> iteratively fill adjacency matrix
                        adjacency_matrix[cell_id_to_position[cell_id], cell_id_to_position[neighbor]] = 1

        # Convert the adjacency matrix to CSR format for efficient matmul (also done implicitly)
        adjacency_matrix = adjacency_matrix.tocsr()
        adjacency_matrix_power = adjacency_matrix.copy()

        
        #fill connections dict first. keys: (cell1, cell2) values = degree of connection
        connections = {}

        for d in range(1, degree+1):
            if d > 1:
                adjacency_matrix_power = adjacency_matrix_power @ adjacency_matrix

            #remove self loops
            adjacency_matrix_power.setdiag(0)
            #get indices of non-zero values
            rows, cols = adjacency_matrix_power.nonzero()

            for row, col in zip(rows, cols):
                #add to connections if not already in
                if (row, col) not in connections:
                    connections[(row, col)] = d

        #return tuples of (cell1, cell2, first degree of connection)
        tuples = [(neighbor_info_dataframe.index[row], neighbor_info_dataframe.index[col], d) for (row, col), d in connections.items()]
        
        #if adj:
        #    adjacency_df_power = pd.DataFrame(adjacency_matrix_power.toarray(), index=neighbor_info_dataframe.index, columns=neighbor_info_dataframe.index)
        #    return tuples, adjacency_df_power
        
        return tuples

    pair_poly_te = iter_polygon_pair(get_tuples(),
                                    number_of_shuffles, 
                                    time_series)
    return pair_poly_te


In [34]:
def calculate_significant_edges(self_edges, pair_edges, summary = False):
        
    significant_edges = [[] for _ in range(degree+1)]
    counts = np.zeros(degree+1)
    
    #self edges
    for poly, (te, te_shuffle) in self_edges.items():
        if (te > te_shuffle).mean() >= (95 / 100):
            significant_edges[0].append([(poly, poly), te])
    counts[0] = len(self_edges)

    #pair edges
    for pair, (te, te_shuffle) in pair_edges.items():
        d = pair[2]
        if 1 <= d <= degree:
            counts[d] += 1
            if (te > te_shuffle).mean() >= (95/100): #threshold!
                significant_edges[d].append([(pair[0], pair[1]), te])            
    
    #ratio dataframe 
    if summary:
        df = pd.DataFrame(
            {
                "Degree": np.arange(degree+1),
                "Total edges": counts,
                "Significant edges": [len(edges) for edges in significant_edges],
            }
        )
        df["Ratio"] = round(df["Significant edges"] / df["Total edges"], 3)
        
        return df, significant_edges
    
    else:
        return significant_edges

In [32]:
pair_edges = links(count_ts, polygons, 100, 2)
self_edges = self_links(count_ts, 100)

  self._set_arrayXarray(i, j, x)


In [38]:
degree = 2
df, significant_edges = calculate_significant_edges(self_edges, pair_edges, summary = True)
df

Unnamed: 0,Degree,Total edges,Significant edges,Ratio
0,0,296.0,164,0.554
1,1,1454.0,429,0.295
2,2,2650.0,593,0.224
