In [35]:
import sys
import geopandas as gpd
import pandas as pd
import os

In [36]:
os.chdir('/home/nmd/Projects/mlup/tests/db-set-refactoring/')

In [37]:
# load nuts123 file
nuts12 = gpd.read_file('0-raw-data/lau/NUTS_RG_01M_2016_3035.shp')

In [38]:
# mapping country code in NUTS and EUBUCCO country name
# and choose NUTS level corresponding to EUBUCCO input datasets for each country
df = pd.DataFrame([('AT','austria',2),
 ('BE','belgium',1),
 ('BG','bulgaria',1),
 ('CH','switzerland',1),
 ('CY','cyprus',1),
 ('CZ','czechia',1),
 ('DE','germany',1),
 ('DK','denmark',1),
 ('EE','estonia',1),
 ('EL','greece',1),
 ('ES','spain',2),
 ('FI','finland',1),
 ('FR','france',1),
 ('HR','croatia',1),
 ('HU','hungary',1),
 ('IE','ireland',1),
 ('IT','italy',2),
 ('LT','lithuania',1),
 ('LU','luxembourg',1),
 ('LV','latvia',1),
 ('MT','malta',1),
 ('NL','netherlands',1),
 ('NO','norway',1),
 ('PL','poland',1),
 ('PT','portugal',1),
 ('RO','romania',1),
 ('SE','sweden',1),
 ('SI','slovenia',1),
 ('SK','slovakia',1),
 ('UK','united-kingdom',1)],
columns=['CNTR_CODE','country','nuts1/2'])

nuts = pd.DataFrame()

for _,row in df.iterrows():
       tmp = pd.merge(pd.DataFrame([row.values], columns=row.index),nuts12,on='CNTR_CODE')
       if row['nuts1/2']==1:
              tmp = tmp[tmp.LEVL_CODE==1]
       elif row['nuts1/2']==2:
              tmp = tmp[tmp.LEVL_CODE==2]
       else: sys.exit('Error with NUTS level.')
       nuts = pd.concat([nuts,tmp])[['country','NUTS_ID','NAME_LATN','CNTR_CODE']]
       
nuts = nuts.rename(columns={'NAME_LATN':'region','NUTS_ID': 'NUTS_ID_region'})

In [39]:
nuts.head()

Unnamed: 0,country,NUTS_ID_region,region,CNTR_CODE
4,austria,AT32,Salzburg,AT
5,austria,AT11,Burgenland,AT
6,austria,AT33,Tirol,AT
7,austria,AT34,Vorarlberg,AT
8,austria,AT22,Steiermark,AT


In [40]:
# get nuts 3 rows including geometries
nuts3 = nuts12[nuts12.LEVL_CODE==3].drop(columns='CNTR_CODE') 

In [41]:
nuts3.head()

Unnamed: 0,LEVL_CODE,NUTS_ID,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,geometry
103,3,HR043,Krapinsko-zagorska županija,Krapinsko-zagorska županija,4,3,3,"POLYGON ((4774720.630 2587061.468, 4774957.107..."
406,3,AT314,Steyr-Kirchdorf,Steyr-Kirchdorf,3,3,3,"POLYGON ((4648229.856 2789186.120, 4648604.204..."
407,3,AT315,Traunviertel,Traunviertel,3,3,3,"POLYGON ((4592531.261 2786657.952, 4593751.148..."
409,3,DE600,Hamburg,Hamburg,4,1,1,"MULTIPOLYGON (((4314918.091 3393914.497, 43157..."
410,3,DE711,"Darmstadt, Kreisfreie Stadt","Darmstadt, Kreisfreie Stadt",4,1,3,"POLYGON ((4229546.152 2983211.570, 4230168.376..."


In [42]:
# merge nuts 3 with relevant nuts 1/2 region
# using NUTS_ID_region column from nuts dataframe
nuts3 = pd.merge(nuts3,pd.DataFrame([(string, long_string) for string in nuts.NUTS_ID_region.to_list() 
                              for long_string in nuts3.NUTS_ID.to_list() 
                              if string in long_string],
                     columns=['NUTS_ID_region','NUTS_ID_3']),
        left_on='NUTS_ID', right_on='NUTS_ID_3').drop(columns=['NUTS_ID'])

In [43]:
nuts3.head()

Unnamed: 0,LEVL_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,geometry,NUTS_ID_region,NUTS_ID_3
0,3,Krapinsko-zagorska županija,Krapinsko-zagorska županija,4,3,3,"POLYGON ((4774720.630 2587061.468, 4774957.107...",HR0,HR043
1,3,Steyr-Kirchdorf,Steyr-Kirchdorf,3,3,3,"POLYGON ((4648229.856 2789186.120, 4648604.204...",AT31,AT314
2,3,Traunviertel,Traunviertel,3,3,3,"POLYGON ((4592531.261 2786657.952, 4593751.148...",AT31,AT315
3,3,Hamburg,Hamburg,4,1,1,"MULTIPOLYGON (((4314918.091 3393914.497, 43157...",DE6,DE600
4,3,"Darmstadt, Kreisfreie Stadt","Darmstadt, Kreisfreie Stadt",4,1,3,"POLYGON ((4229546.152 2983211.570, 4230168.376...",DE7,DE711


In [44]:
# merge with the nuts dataframe to add country, region and country code information
nuts3 = pd.merge(nuts,nuts3,on='NUTS_ID_region').drop(columns=['NUTS_NAME','LEVL_CODE']).rename(columns={'NAME_LATN':'nuts3'})

In [45]:
nuts3.head()

Unnamed: 0,country,NUTS_ID_region,region,CNTR_CODE,nuts3,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,geometry,NUTS_ID_3
0,austria,AT32,Salzburg,AT,Lungau,3,3,3,"POLYGON ((4610337.451 2691408.596, 4613598.628...",AT321
1,austria,AT32,Salzburg,AT,Pinzgau-Pongau,3,3,3,"POLYGON ((4523482.837 2733534.582, 4524947.733...",AT322
2,austria,AT32,Salzburg,AT,Salzburg und Umgebung,3,2,3,"POLYGON ((4548368.720 2773926.868, 4548145.492...",AT323
3,austria,AT11,Burgenland,AT,Mittelburgenland,4,3,3,"POLYGON ((4808007.995 2746805.778, 4808866.453...",AT111
4,austria,AT11,Burgenland,AT,Nordburgenland,4,3,3,"POLYGON ((4848412.387 2801146.931, 4848731.031...",AT112


In [46]:
# map laus to respective nuts 3 using spatial joins (yeah, couldnt find a way to do this on ID...)
nuts12 = gpd.read_file('0-raw-data/lau/NUTS_RG_01M_2016_3035.shp')
lau = gpd.read_file('0-raw-data/lau/LAU_RG_01M_2019_3035.shp')

nuts3G = nuts12[nuts12.LEVL_CODE==3].drop(columns={'CNTR_CODE'})
lau['geometry'] = lau['geometry'].centroid
join = gpd.sjoin_nearest(lau,nuts3G)
print(len(join)==len(lau))

True


In [47]:
# fix the fact that LAU_IDs are not unique and there may be the same ID in multiple countries
join = join[['LAU_ID','NUTS_ID','CNTR_CODE']]
join['LAU_ID'] = join['CNTR_CODE'] + join['LAU_ID']

# reload LAU boundary geometry that was turned to centroid for matching
g = gpd.read_file('0-raw-data/lau/LAU_RG_01M_2019_3035.shp')[['CNTR_CODE','LAU_ID','geometry']]
g['LAU_ID'] = g['CNTR_CODE'] + g['LAU_ID']

# match LAU and respective NUTS3
nuts_lau = pd.merge(g,join,on='LAU_ID').drop(columns={'CNTR_CODE_x','CNTR_CODE_y'})

In [48]:
nuts_lau.head()

Unnamed: 0,LAU_ID,geometry,NUTS_ID
0,CZ579271,"POLYGON ((4719987.985 3061286.922, 4720957.651...",CZ052
1,CZ579289,"POLYGON ((4586234.389 2954780.479, 4585216.487...",CZ032
2,CZ579297,"POLYGON ((4723406.984 3064343.207, 4723584.258...",CZ052
3,CZ579301,"POLYGON ((4734870.406 3048816.063, 4736070.209...",CZ052
4,CZ579319,"POLYGON ((4730828.439 3064624.032, 4731219.014...",CZ052


In [49]:
nuts_lau.to_file('0-raw-data/lau/lau_nuts.gpkg')

In [50]:
# merge with complete NUTS info
nuts_lau = pd.merge(nuts_lau,nuts3.drop(columns=['geometry']),
                    right_on='NUTS_ID_3',
                    left_on='NUTS_ID').drop(columns=['NUTS_ID'])

In [51]:
nuts_lau.head()

Unnamed: 0,LAU_ID,geometry,country,NUTS_ID_region,region,CNTR_CODE,nuts3,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,NUTS_ID_3
0,CZ579271,"POLYGON ((4719987.985 3061286.922, 4720957.651...",czechia,CZ0,ČESKÁ REPUBLIKA,CZ,Královéhradecký kraj,4,2,3,CZ052
1,CZ579297,"POLYGON ((4723406.984 3064343.207, 4723584.258...",czechia,CZ0,ČESKÁ REPUBLIKA,CZ,Královéhradecký kraj,4,2,3,CZ052
2,CZ579301,"POLYGON ((4734870.406 3048816.063, 4736070.209...",czechia,CZ0,ČESKÁ REPUBLIKA,CZ,Královéhradecký kraj,4,2,3,CZ052
3,CZ579319,"POLYGON ((4730828.439 3064624.032, 4731219.014...",czechia,CZ0,ČESKÁ REPUBLIKA,CZ,Královéhradecký kraj,4,2,3,CZ052
4,CZ579327,"POLYGON ((4736854.652 3053390.756, 4736722.023...",czechia,CZ0,ČESKÁ REPUBLIKA,CZ,Královéhradecký kraj,4,2,3,CZ052


In [52]:
nuts_lau.drop(columns=['geometry']).to_csv('0-raw-data/lau/lau_nuts_extra.csv',index=False)