import package

In [5]:
import os
import itertools

import numpy as np
import pandas as pd
import pyproj
import geopandas as gpd
import shapely
import fiona

from tqdm import tqdm
import networkx as nx

from sqr.core.shape import make_gdf_square_data, find_neighbor_shapes
from sqr.core.shape import label2coord, polygon_from_north_east
from sqr.pre_assign import pre_partition_area, assign_cells_partition, merge_insufficient

%matplotlib inline 

# Preprocess data
Load kvadratnet with population data into HDF store: 'data/parsed/kvadrat_data.hdf'

In [None]:
pop_file = 'DST/Antal personer pr. celle.xlsx'

for dst_sheetname in ['_10km','_1km','_100m']:
    print(dst_sheetname)
    in_df = pd.read_excel(pop_file, sheetname=dst_sheetname)
    in_df.to_hdf('DST/personer_celler.hdf', key = dst_sheetname)

Make HDF format of 100x100 kvadratnet and append data

In [3]:
dk = gpd.read_file('raw/DKN_100m_euref89.shp').iloc[:,:1]
dk_bornholm = gpd.read_file('data/raw/DKN_bholm_100m_euref89.shp').iloc[:,:1]

pd.concat([dk, dk_bornholm],
          ignore_index=True)\
  .KN100mDK\
  .to_hdf(data='data/parsed/kvadrat_data.hdf', 
          key='cells100')

KN100mDK = pd.read_hdf('data/parsed/kvadrat_data.hdf', key='cells100')

coords = pd.DataFrame(np.array(KN100mDK.str.split('_').tolist())[:,1:], 
                      columns = ['n','e'])


coords['e_cent'] = (coords.e.astype(int)*100+50).astype(np.int32)    
coords['n_cent'] = (coords.n.astype(int)*100+50).astype(np.int32)

p1 = pyproj.Proj(fiona.crs.from_epsg(25832))
p2 = pyproj.Proj(fiona.crs.from_epsg(4326))    
    
gps_coords = pyproj.transform(p1, p2, coords.e_cent.values, coords.n_cent.values)
gps_coords = pd.DataFrame(np.array(gps_coords).T, columns = ['lon_cent','lat_cent'])

all_coords = pd.concat([KN100mDK,coords,gps_coords],axis=1)

all_coords.to_hdf('data/parsed/kvadrat_data.hdf', key='cells100_data')

# Assign square to municipality shapes
Load data

In [6]:
partition_label = pd.read_json('data/output_final/partition.json')

In [31]:
p_labs = [row.dropna().tolist() for (i,row) in partition_label.iterrows()]

In [39]:
sum([pers_lab.loc[p].sum(0).min()>100 for p in p_labs[-2000:]])

1828

In [5]:
pers_lab = pers.set_index('ddkncelle100m')[year_cols]

In [2]:
pers = pd.read_hdf('data/parsed/personer_celler.hdf', key='_100m').iloc[1:]
year_cols = list(map(str,range(1986,2016)))

pers_years = pers[year_cols]
pers['minimum'] = pers_years.min(axis=1)
pers['mean'] = pers_years.fillna(0).mean(axis=1)

# new construction year
pers['zero'] = pers_years.isnull().max(axis=1)
zeros = pers_years[pers.zero].isnull()
zeros_t = zeros.T
inhabit = (zeros_t.shift(1).fillna(False) & (~zeros_t))
inhabit_single = (inhabit.sum(axis=0)==1) & (~zeros.iloc[:,-1])
inhabit_year = zeros_t\
                .loc[:,inhabit_single]\
                .idxmin()\
                .rename('inhabit_year')\
                .astype(int)
pers = pers.join(inhabit_year)


In [None]:
kommuner = gpd.read_file('data/shape/KOMMUNE.shp')

pers = pd.read_hdf('data/parsed/personer_celler.hdf', key='_100m').iloc[1:]
year_cols = list(map(str,range(1986,2016)))

pers_years = pers[year_cols]
pers['minimum'] = pers_years.min(axis=1)
pers['mean'] = pers_years.fillna(0).mean(axis=1)

# new construction year
pers['zero'] = pers_years.isnull().max(axis=1)
zeros = pers_years[pers.zero].isnull()
zeros_t = zeros.T
inhabit = (zeros_t.shift(1).fillna(False) & (~zeros_t))
inhabit_single = (inhabit.sum(axis=0)==1) & (~zeros.iloc[:,-1])
inhabit_year = zeros_t\
                .loc[:,inhabit_single]\
                .idxmin()\
                .rename('inhabit_year')\
                .astype(int)
pers = pers.join(inhabit_year)


        
        
all_squares = pd.read_hdf('data/parsed/kvadrat_data.hdf', key='cells100_data')
all_squares.rename(columns = {'lon_cent':'lon','lat_cent':'lat'}, inplace=True)
all_squares.e = all_squares.e.astype(int)
all_squares.n = all_squares.n.astype(int)

all_gdf = make_gdf_square_data(all_squares)

Make assignment

In [None]:
assignments = assign_cells_partition(kommuner, all_gdf)

assignment_dict = assignments\
                    .groupby('assignment')\
                    .apply(lambda g: g.index.tolist())\
                    .to_dict()

extra_cols = ['minimum','mean','shift_year','ddkncelle100m']            
            
for idx in assignment_dict.keys():
    assignment_idxs = assignment_dict[idx]
    
    out_df = all_gdf\
            .loc[assignment_idxs]\
            .drop('geometry', axis=1)\
            .reset_index()\
            .rename(columns={'index':'square_idx'})
    
    out_df = out_df.merge(pers[extra_cols+year_cols], 
                          right_on='ddkncelle100m',
                          left_on='KN100mDK', 
                          how='left')\
                .drop('ddkncelle100m',axis=1)
    
    pd.DataFrame(out_df).to_hdf('data/parsed/sqr_mun.hdf', key='sqidx%i'% idx)    
    
# check no cells is overlapping for ANY pair of municipality indices

errors = []

for (i1,i2) in mun_neighbor:
    cells1 = cell_assignment[i1]['within'] + list(cell_assignment[i1]['touching'])
    cells2 = cell_assignment[i2]['within'] + list(cell_assignment[i2]['touching'])
    
    if np.intersect1d(cells1,cells2).size>0:
        errors+= [(i1,i2)]
        
    

# Partition municipality shapes into chunks

In [2]:
kommuner = gpd.read_file('data/shape/KOMMUNE.shp')

mun_pop_min = {}
mun_pop_avg = {}
mun_cell_count = {}

for idx in kommuner.index:
    mun_data = \
        pd.read_hdf('data/parsed/sqr_mun.hdf', key='sqidx%i'% idx)
    mun_pop_min[idx] = mun_data.minimum.sum()    
    mun_pop_avg[idx] = mun_data['mean'].sum()    
    mun_cell_count[idx] = mun_data.shape[0]
                
kommuner['minimum_total'] = pd.Series(mun_pop_min)
kommuner['mean_total'] = pd.Series(mun_pop_avg)
kommuner['cell_count'] = pd.Series(mun_cell_count)

kommuner['to_assign'] = kommuner.minimum_total>100

In [6]:
select =  kommuner[(kommuner.to_assign) & (kommuner.cell_count>25000)]

for idx in tqdm(select.index.tolist()):
    print(idx)
    origin_geom= kommuner.iloc[idx].geometry
    mun_df = pd.read_hdf('data/parsed/sqr_mun.hdf', 'sqidx%i' % idx)
    mun_gdf = make_gdf_square_data(mun_df)
    pre_part = pre_partition_area(mun_df, origin_geom)
    
    pre_part_suff = merge_insufficient(pre_part)
    
    assignment = assign_cells_partition(pre_part, mun_gdf)

    mun_df = mun_df\
                .join(assignment)\
                .drop('geometry', axis=1)

    for sub_idx, sub_df in mun_df.groupby('assignment'):
        out_key = 'sqidx%i_%i' % (idx, sub_idx)
        sub_df.to_hdf('data/parsed/sqr_mun_sub.hdf', key = out_key)

  0%|          | 0/61 [00:00<?, ?it/s]

6


  2%|▏         | 1/61 [01:09<1:09:41, 69.69s/it]

7


  3%|▎         | 2/61 [03:59<1:38:07, 99.79s/it]

16


  5%|▍         | 3/61 [07:04<2:01:08, 125.32s/it]

24


  7%|▋         | 4/61 [07:51<1:36:45, 101.86s/it]

32


  8%|▊         | 5/61 [09:47<1:38:50, 105.90s/it]

33


 10%|▉         | 6/61 [14:20<2:23:16, 156.30s/it]

34


 11%|█▏        | 7/61 [16:14<2:09:01, 143.37s/it]

48


 13%|█▎        | 8/61 [17:55<1:55:29, 130.74s/it]

49


 15%|█▍        | 9/61 [20:24<1:58:00, 136.16s/it]

51


 16%|█▋        | 10/61 [21:52<1:43:38, 121.92s/it]

52


 18%|█▊        | 11/61 [23:16<1:31:59, 110.38s/it]

54


 20%|█▉        | 12/61 [25:23<1:34:12, 115.37s/it]

67


 21%|██▏       | 13/61 [27:07<1:29:37, 112.04s/it]

69


 23%|██▎       | 14/61 [29:11<1:30:27, 115.48s/it]

78


 25%|██▍       | 15/61 [30:17<1:17:10, 100.67s/it]

79


 26%|██▌       | 16/61 [31:48<1:13:22, 97.83s/it] 

85


 28%|██▊       | 17/61 [33:26<1:11:50, 97.97s/it]

87


 30%|██▉       | 18/61 [34:42<1:05:30, 91.41s/it]

89


 31%|███       | 19/61 [36:55<1:12:38, 103.78s/it]

91


 33%|███▎      | 20/61 [39:29<1:21:14, 118.88s/it]

93


 34%|███▍      | 21/61 [41:04<1:14:29, 111.73s/it]

97


 36%|███▌      | 22/61 [42:38<1:09:12, 106.48s/it]

100


 38%|███▊      | 23/61 [45:00<1:14:10, 117.11s/it]

136


 39%|███▉      | 24/61 [48:10<1:25:43, 139.02s/it]

139


 41%|████      | 25/61 [50:36<1:24:32, 140.89s/it]

140


 43%|████▎     | 26/61 [52:52<1:21:17, 139.37s/it]

143


 44%|████▍     | 27/61 [54:31<1:12:14, 127.48s/it]

149


 46%|████▌     | 28/61 [57:01<1:13:46, 134.15s/it]

152


 48%|████▊     | 29/61 [1:00:29<1:23:22, 156.33s/it]

154


 49%|████▉     | 30/61 [1:04:16<1:31:38, 177.36s/it]

163


 51%|█████     | 31/61 [1:05:31<1:13:21, 146.72s/it]

168


 52%|█████▏    | 32/61 [1:06:35<58:57, 121.99s/it]  

170


 54%|█████▍    | 33/61 [1:08:53<59:07, 126.71s/it]

176


 56%|█████▌    | 34/61 [1:09:54<48:11, 107.11s/it]

177


 57%|█████▋    | 35/61 [1:11:03<41:27, 95.69s/it] 

179


 59%|█████▉    | 36/61 [1:12:43<40:19, 96.80s/it]

184


 61%|██████    | 37/61 [1:14:53<42:46, 106.94s/it]

188


 62%|██████▏   | 38/61 [1:15:58<36:08, 94.30s/it] 

196


 64%|██████▍   | 39/61 [1:17:42<35:39, 97.24s/it]

198


 66%|██████▌   | 40/61 [1:18:29<28:47, 82.25s/it]

204


 67%|██████▋   | 41/61 [1:19:47<26:54, 80.73s/it]

206


 69%|██████▉   | 42/61 [1:21:50<29:38, 93.61s/it]

212


 70%|███████   | 43/61 [1:22:51<25:05, 83.65s/it]

215


 72%|███████▏  | 44/61 [1:24:12<23:31, 83.05s/it]

216


 74%|███████▍  | 45/61 [1:25:48<23:08, 86.80s/it]

235


 75%|███████▌  | 46/61 [1:26:52<19:58, 79.92s/it]

236


 77%|███████▋  | 47/61 [1:28:17<19:03, 81.66s/it]

238


 79%|███████▊  | 48/61 [1:31:47<25:59, 119.98s/it]

245


 80%|████████  | 49/61 [1:33:42<23:43, 118.60s/it]

266


 82%|████████▏ | 50/61 [1:35:37<21:33, 117.56s/it]

269


 84%|████████▎ | 51/61 [1:38:12<21:25, 128.56s/it]

281


 85%|████████▌ | 52/61 [1:39:40<17:30, 116.68s/it]

284


 87%|████████▋ | 53/61 [1:40:46<13:30, 101.35s/it]

293


 89%|████████▊ | 54/61 [1:42:53<12:42, 108.93s/it]

294


 90%|█████████ | 55/61 [1:44:09<09:54, 99.13s/it] 

295


 92%|█████████▏| 56/61 [1:47:00<10:04, 120.82s/it]

297


 93%|█████████▎| 57/61 [1:49:32<08:40, 130.10s/it]

301


 95%|█████████▌| 58/61 [1:51:28<06:17, 125.82s/it]

304


 97%|█████████▋| 59/61 [1:54:29<04:44, 142.41s/it]

307


 98%|█████████▊| 60/61 [1:56:26<02:14, 134.87s/it]

308


100%|██████████| 61/61 [1:58:28<00:00, 131.01s/it]
