# Statistical data

### GDP data (DeStatis)

In [8]:
# Load regional GDP data

import pandas as pd

df_vgr = pd.read_excel(
    '../data/processed/vgrdl.xlsx',
    sheet_name='sample',
    dtype={'mun_name_short': str, '2023': 'float64'}
)

df_vgr = df_vgr[['mun_name_short', 2023]]
df_vgr = df_vgr.rename(columns={2023: 'gdp_2023'})

df_vgr

Unnamed: 0,mun_name_short,gdp_2023
0,Herbrechtingen,2984.793
1,München,106075.126
2,Bogen,1808.474
3,Lichtenberg,1846.651
4,Kupferberg,1675.631
5,Nürnberg,27518.072
6,Langenzenn,2073.02
7,Rothenfels,2410.204
8,Augsburg,11317.154
9,Burgau,3765.137


### Measurement data per Municipality

In [9]:
# Load df_merge (measurements per municipality)

import pandas as pd

df_merged = pd.read_csv('../data/processed/df_merged_aws-fra.csv')

df_merged = df_merged.astype({  # nullable integers (Int64) for graceful handling of NaN
    'mun_key': 'str',
    'distance': 'float64',
    'probe_id': 'int64',
    'msm_id': 'Int64',
    'type': 'str',
    'step': 'str',
    'proto': 'str',
    'dst_addr': 'str',
    'dst_name': 'str',
    'af': 'Int64',
    'size': 'Int64',
    'avg_rtt': 'float64',
    'min_rtt': 'float64',
    'med_rtt': 'float64',
    'max_rtt': 'float64',
    'sent': 'Int64',
    'rcvd': 'Int64',
    'lost': 'Int64',
    'ttl': 'Int64',
    'sample_type': 'str',
    'mun_name_short': 'str',
    'mun_pop_cen22': 'int64',
    'state': 'str',
    'mun_lat': 'float64',
    'mun_lon': 'float64'
})

print(f"Cols: {df_merged.columns.tolist()}\n")
print(f"Shape: {df_merged.shape}\n")
df_merged.head()

Cols: ['mun_key', 'distance', 'probe_id', 'msm_id', 'type', 'step', 'proto', 'prb_id', 'dst_addr', 'dst_name', 'af', 'size', 'avg_rtt', 'min_rtt', 'med_rtt', 'max_rtt', 'sent', 'rcvd', 'lost', 'ttl', 'sample_type', 'mun_name_short', 'mun_pop_cen22', 'state', 'mun_lat', 'mun_lon']

Shape: (46, 26)



Unnamed: 0,mun_key,distance,probe_id,msm_id,type,step,proto,prb_id,dst_addr,dst_name,...,sent,rcvd,lost,ttl,sample_type,mun_name_short,mun_pop_cen22,state,mun_lat,mun_lon
0,81350020020,22.270402,1506,155973633,ping,,ICMP,1506.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,5,0,54.0,median,Herbrechtingen,13238,Baden-Württemberg,48.621403,10.176871
1,81350020020,25.714412,53353,155973633,ping,,ICMP,53353.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,5,0,55.0,median,Herbrechtingen,13238,Baden-Württemberg,48.621403,10.176871
2,91620000000,0.369909,1010220,155973633,ping,,ICMP,1010220.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,0,5,,top,München,1505005,Bayern,48.135125,11.581981
3,91620000000,0.369909,1010221,155973633,ping,,ICMP,1010221.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,5,0,52.0,top,München,1505005,Bayern,48.135125,11.581981
4,91620000000,0.376944,1000792,155973633,ping,,ICMP,1000792.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,5,0,56.0,top,München,1505005,Bayern,48.135125,11.581981


### Join

In [10]:
# Match gdp data to measurements by municipality name
df_merged = df_merged.merge(
    df_vgr[['mun_name_short', 'gdp_2023']], 
    on='mun_name_short', 
    how='left'
)

df_merged.head()

Unnamed: 0,mun_key,distance,probe_id,msm_id,type,step,proto,prb_id,dst_addr,dst_name,...,rcvd,lost,ttl,sample_type,mun_name_short,mun_pop_cen22,state,mun_lat,mun_lon,gdp_2023
0,81350020020,22.270402,1506,155973633,ping,,ICMP,1506.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,0,54.0,median,Herbrechtingen,13238,Baden-Württemberg,48.621403,10.176871,2984.793
1,81350020020,25.714412,53353,155973633,ping,,ICMP,53353.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,0,55.0,median,Herbrechtingen,13238,Baden-Württemberg,48.621403,10.176871,2984.793
2,91620000000,0.369909,1010220,155973633,ping,,ICMP,1010220.0,63.179.105.208,aws-fra.ncc.dock.ee,...,0,5,,top,München,1505005,Bayern,48.135125,11.581981,106075.126
3,91620000000,0.369909,1010221,155973633,ping,,ICMP,1010221.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,0,52.0,top,München,1505005,Bayern,48.135125,11.581981,106075.126
4,91620000000,0.376944,1000792,155973633,ping,,ICMP,1000792.0,63.179.105.208,aws-fra.ncc.dock.ee,...,5,0,56.0,top,München,1505005,Bayern,48.135125,11.581981,106075.126


In [11]:
# Write to file

df_merged.to_csv('../data/processed/df_merged_aws-fra_stat.csv', index=False)

In [12]:
print(f"Cols: {df_merged.columns.tolist()}\n")

Cols: ['mun_key', 'distance', 'probe_id', 'msm_id', 'type', 'step', 'proto', 'prb_id', 'dst_addr', 'dst_name', 'af', 'size', 'avg_rtt', 'min_rtt', 'med_rtt', 'max_rtt', 'sent', 'rcvd', 'lost', 'ttl', 'sample_type', 'mun_name_short', 'mun_pop_cen22', 'state', 'mun_lat', 'mun_lon', 'gdp_2023']



# SFA

In [None]:
from spreg import OLS, ML_Lag, ML_Error
import numpy as np

y = gdf[['gdp_2023']].values
X = gdf[['distance_to_frankfurt']].values  # add more covariates as needed

# Start with OLS, then test for spatial effects
ols = OLS(y, X, w=w, name_y='gdp', name_x=['dist_ffm'], spat_diag=True)
print(ols.summary)

# If Lagrange Multiplier tests suggest lag model:
lag_model = ML_Lag(y, X, w)
print(lag_model.summary)