In [1]:
import tools
print(dir(tools))

['LineString', 'Moran', 'Polygon', 'Voronoi', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'box', 'build_g_borders', 'build_g_region', 'build_g_rook', 'coo_array', 'create_corrupted_graphs', 'csr_matrix', 'diags', 'extract_subset', 'generate_hex_lattice', 'generate_pent_lattice', 'generate_square_lattice', 'gpd', 'graph', 'identity', 'math', 'np', 'pd', 'plot_graph', 'plot_lattice', 'plt', 'polygonize', 'reindex', 'remove_random_edges', 'simulate_autocorrelated_data', 'sns', 'spmatrix', 'spsolve', 'triu', 'unary_union']


In [2]:
#from scipy.sparse import identity, csr_matrix
from scipy.sparse.linalg import spsolve
import geopandas as gpd
import numpy as np
#from libpysal.weights import Queen, Rook
import matplotlib.pyplot as plt
from esda.moran import Moran
#from shapely.geometry import Polygon
#import copy
import seaborn as sns
from libpysal import graph,weights
#from splot.libpysal import plot_spatial_weights
#from scipy.sparse import spmatrix, triu, diags, coo_array
#from tools import generate_square_lattice,remove_random_edges,simulate_autocorrelated_data
import itertools
from tqdm import tqdm
import pandas as pd
from joblib import Parallel, delayed

#from spreg import ML_Error

In [3]:
shapes = ['square','pent','hex']
sizes = [25,100,400]
corruption_methods = ['random','border','periphery','center']
n_runs = 10
c_runs = 10
rhos = np.arange(-0.9, 1.0, 0.1)
perc_missing = np.linspace(5,95, 19)  


In [11]:
# 2. Calculate the total number of iterations (strictly for the progress bar to work)
total_iters = len(shapes) * len(sizes) * len(corruption_methods) * n_runs * c_runs * len(rhos) * len(perc_missing)

# 3. Create the combinations generator
# Note: Because n_runs and c_runs are integers, we wrap them in range()
combinations = itertools.product(
    corruption_methods, 
    range(n_runs), 
    range(c_runs), 
    rhos, 
    perc_missing
)

# 1. The Golden Rule: Create an empty list OUTSIDE the loop
all_results = []

def _process(shape, size, method, n_run, c_run, rho, p):
    wm = graph.read_parquet(f"graphs/{shape}/size_{size}/{method}/g_{int(p)}_{c_run}.parquet").to_W()
    wm.transform = 'r'
    col_prefix = f"rho_{rho:.1f}"
    y = gdf[f"{col_prefix}_run_{n_run}"].values
    mi_cor = Moran(y, wm)
    
    
    # 2. Build your dictionary using the exact loop variables
    iteration_result = {
        "shape": shape,
        "size": size,
        "corruption_method": method,
        "rho": rho,
        "p_missing": p,                
        "corruption_run": c_run,
        "data_run": n_run,
        "moran_i": mi_cor.I,
        "p_value": mi_cor.p_sim
    }
    
    return iteration_result

for shape, size in itertools.product(shapes, sizes):
    gdf = gpd.read_parquet(f'data/gdf_{shape}_{size}.parquet')

    all_results.append(
        Parallel(n_jobs=-1)(delayed(_process)(shape, size, method, n_run, c_run, rho, p) for method, n_run, c_run, rho, p in itertools.product(
            corruption_methods, 
            range(n_runs), 
            range(c_runs), 
            rhos, 
            perc_missing
        ))
    )
    
        
# 4. OUTSIDE THE LOOP: Convert the 1.3 million dictionaries into a DataFrame instantly!
# final_df = pd.DataFrame(all_results)

In [6]:
# 1. The helper function that runs on each core
def _process_truth(shape, size, rho, n_run, gdf, wm):
    #wm = graph.read_parquet(f"graphs/{shape}/size_{size}/{method}/g_{int(p)}_{c_run}.parquet").to_W().transform('R')
    col_prefix = f"rho_{rho:.1f}"
    y = gdf[f"{col_prefix}_run_{n_run}"].values
    mi_cor = Moran(y, wm)

    return {
        "shape": shape,
        "size": size,
        "rho": rho,                
        "data_run": n_run,
        "moran_i": mi_cor.I,
        "p_value": mi_cor.p_sim
    }

ground_truth = []

# 2. The sequential outer loop (so we only load the files 9 times, not hundreds of times)
for shape, size in itertools.product(shapes, sizes):
    
    # Load the base map and weights for this specific shape and size
    gdf = gpd.read_parquet(f'data/gdf_{shape}_{size}.parquet')
    wm = graph.read_parquet(f'graphs/{shape}/size_{size}/g_true.parquet').to_W()
    wm.transform = 'r'

    # 3. The parallel inner loop
    ground_truth.append(
        Parallel(n_jobs=-1)(
            delayed(_process_truth)(shape, size, rho, n_run, gdf, wm) 
            for rho, n_run in itertools.product(rhos, range(n_runs))
        )
    )

In [12]:
flat_truth = list(itertools.chain.from_iterable(ground_truth))
truth_df = pd.DataFrame(flat_truth)

In [8]:
truth_df.to_parquet('true_moran.parquet')

In [17]:
combinations

<itertools.product at 0x219e5813280>

In [13]:
# 1. Flatten the list of lists into a single list of dictionaries
flat_results = list(itertools.chain.from_iterable(all_results))

# 2. Convert directly into a pandas DataFrame
df = pd.DataFrame(flat_results)

In [14]:
df

Unnamed: 0,shape,size,corruption_method,rho,p_missing,corruption_run,data_run,moran_i,p_value
0,square,25,random,-0.9,5.0,0,0,-0.664540,0.001
1,square,25,random,-0.9,10.0,0,0,-0.677601,0.001
2,square,25,random,-0.9,15.0,0,0,-0.666051,0.001
3,square,25,random,-0.9,20.0,0,0,-0.694786,0.001
4,square,25,random,-0.9,25.0,0,0,-0.577562,0.001
...,...,...,...,...,...,...,...,...,...
1299595,hex,400,center,0.9,75.0,9,9,0.673896,0.001
1299596,hex,400,center,0.9,80.0,9,9,0.664584,0.001
1299597,hex,400,center,0.9,85.0,9,9,0.682769,0.001
1299598,hex,400,center,0.9,90.0,9,9,0.737686,0.001


In [15]:
df.to_parquet('morans_i_results.parquet', index=False)

In [11]:
results = pd.read_parquet('morans_i_results.parquet')

In [12]:
results

Unnamed: 0,shape,size,corruption_method,rho,p_missing,corruption_run,data_run,moran_i,p_value
0,square,25,random,-0.9,5.0,0,0,-0.369604,0.018
1,square,25,random,-0.9,10.0,0,0,-0.230982,0.112
2,square,25,random,-0.9,15.0,0,0,-0.316287,0.048
3,square,25,random,-0.9,20.0,0,0,-0.141609,0.297
4,square,25,random,-0.9,25.0,0,0,-0.284885,0.101
...,...,...,...,...,...,...,...,...,...
1299595,hex,400,center,0.9,75.0,9,9,0.226211,0.001
1299596,hex,400,center,0.9,80.0,9,9,0.226556,0.001
1299597,hex,400,center,0.9,85.0,9,9,0.208840,0.001
1299598,hex,400,center,0.9,90.0,9,9,0.231492,0.001
