# Картографирование потенциала поля расселения

**Date**: 19.04.2023

**Course**: Methods of Spatial Analysis. Advanced Level. // HSE, Moscow, spring 2023

In [1]:
# Restart the kernel
import IPython
IPython.Application.instance().kernel.do_shutdown(True)
print('Ядро перезапущено, можно продолжать работать!')

Ядро перезапущено, можно продолжать работать!


: 

## 1. Import libraries

In [2]:
import pandas as pd
import geopandas as gpd
import folium
import math

## 2. Read and filter data

In [40]:
data = pd.read_csv('cities.csv', sep=';') 
selectedData = data.loc[(data['region'] == 'Краснодарский край') & (data['population'] > 5000)] 
# selectedData = data.loc[(data['region'] == 'Краснодарский край')] 

## 3. Explore the data

In [41]:
selectedData.head()

Unnamed: 0,id,region,municipality,settlement,type,population,children,latitude_dms,longitude_dms,latitude_dd,longitude_dd,oktmo,dadata,rosstat
167,1331,Краснодарский край,Красноармейский район,Старонижестеблиевская,ст-ца,10230,2253,45.22.46,038.26.22,45.379444,38.439444,3623425000.0,0,1
212,1695,Краснодарский край,Ленинградский район,Крыловская,ст-ца,5955,1159,46.06.09,039.17.37,46.1025,39.293611,3632404000.0,0,1
297,2285,Краснодарский край,Город Краснодар,Пригородный,п,6453,988,45.04.26,039.11.37,45.073889,39.193611,3701000000.0,1,0
346,2628,Краснодарский край,Туапсинский район,Джубга,пгт,5621,1196,44.19.21,038.42.16,44.3225,38.704444,3655154000.0,1,0
349,2648,Краснодарский край,Северский район,Афипский,пгт,22155,5255,44.54.12,038.50.28,44.903333,38.841111,3643152000.0,1,0


## 4. Create spatial data frame (and reproject)

In [42]:
spatialData = gpd.GeoDataFrame(selectedData, geometry=gpd.points_from_xy(selectedData.longitude_dd, selectedData.latitude_dd), crs='EPSG:4326').to_crs(crs='EPSG:32637')

## 5. Explore the data

In [43]:
spatialData.head()

Unnamed: 0,id,region,municipality,settlement,type,population,children,latitude_dms,longitude_dms,latitude_dd,longitude_dd,oktmo,dadata,rosstat,geometry
167,1331,Краснодарский край,Красноармейский район,Старонижестеблиевская,ст-ца,10230,2253,45.22.46,038.26.22,45.379444,38.439444,3623425000.0,0,1,POINT (456112.233 5025256.087)
212,1695,Краснодарский край,Ленинградский район,Крыловская,ст-ца,5955,1159,46.06.09,039.17.37,46.1025,39.293611,3632404000.0,0,1,POINT (522692.967 5105477.946)
297,2285,Краснодарский край,Город Краснодар,Пригородный,п,6453,988,45.04.26,039.11.37,45.073889,39.193611,3701000000.0,1,0,POINT (515239.892 4991176.803)
346,2628,Краснодарский край,Туапсинский район,Джубга,пгт,5621,1196,44.19.21,038.42.16,44.3225,38.704444,3655154000.0,1,0,POINT (476432.823 4907735.691)
349,2648,Краснодарский край,Северский район,Афипский,пгт,22155,5255,44.54.12,038.50.28,44.903333,38.841111,3643152000.0,1,0,POINT (487456.086 4972224.328)


In [44]:
spatialData.explore()

## 6. Data processing

In [45]:
spatialData_new = spatialData.set_index('id')

#### 6.1 Distance matrix

In [46]:
distanceMatrix = spatialData_new.geometry.apply(lambda g: spatialData_new.distance(g))
distanceMatrix

id,1331,1695,2285,2628,2648,3156,3483,4243,4341,4562,...,150541,150990,151000,151211,152320,152541,152588,154061,154794,155920
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1331,0.000000,104252.293468,68245.714665,119264.286249,61601.985134,55576.237017,174883.747113,68666.091501,37880.444867,92409.523531,...,77563.999761,86810.831341,25166.787477,156258.312892,99975.196954,266491.510541,90834.747404,98222.768625,159725.902784,83482.042288
1695,104252.293468,0.000000,114543.875967,203081.265411,137833.829057,114504.299703,129823.169802,35715.497280,127740.999100,195876.630826,...,87023.832097,46285.504107,127988.189866,117430.429588,198853.687392,257817.204378,195076.299375,86820.849849,56476.336734,184115.566213
2285,68245.714665,114543.875967,0.000000,92023.952438,33632.367987,14001.166287,123095.770297,86357.128514,104524.350858,132325.296239,...,134812.589020,124585.454768,84124.178316,104395.545375,150630.853187,198812.214044,123535.884139,51451.241410,168104.697383,133169.110533
2628,119264.286249,203081.265411,92023.952438,0.000000,65423.976262,88812.199714,199168.928489,170652.220168,136358.774282,107997.923251,...,196623.800030,201880.787461,113633.165491,182515.059692,135611.538930,224179.200210,93560.739054,140471.237549,258403.614363,122877.115364
2648,61601.985134,137833.829057,33632.367987,65423.976262,0.000000,24979.132831,155925.327863,105249.244416,90623.758386,102287.120073,...,137923.015049,137820.881630,67300.856137,137403.344632,123334.641496,219530.929763,92148.146776,84962.519097,193509.341713,106272.871213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152541,266491.510541,257817.204378,198812.214044,224179.200210,219530.929763,212424.389499,134355.710759,253620.864999,303322.521556,318486.648078,...,320785.343487,294173.814573,282585.185935,141637.132627,342029.250590,0.000000,306066.245861,180426.276094,282311.041758,325358.979320
152588,90834.747404,195076.299375,123535.884139,93560.739054,92148.146776,110853.362983,246488.870538,159496.526830,77873.858147,14552.299067,...,148782.117576,172648.716392,67814.025141,227686.211786,42483.623444,306066.245861,0.000000,171739.841589,250252.585779,33265.766990
154061,98222.768625,86820.849849,51451.241410,140471.237549,84962.519097,61711.085677,76987.912228,73794.218884,135782.689218,178910.641255,...,140504.282233,115180.807631,120837.697799,58189.553475,193749.983697,180426.276094,171739.841589,0.000000,131658.719433,176344.099108
154794,159725.902784,56476.336734,168104.697383,258403.614363,193509.341713,169593.134862,148050.078206,91893.800395,179504.759462,250141.380922,...,125093.275941,84958.269942,182628.208330,142071.115446,250732.054180,282311.041758,250252.585779,131658.719433,0.000000,236985.287376


#### 6.2 Join population to distance matrix

In [47]:
merged = pd.merge(distanceMatrix, spatialData_new[['population']], left_index=True, right_index=True)

In [48]:
merged.head()

Unnamed: 0_level_0,1331,1695,2285,2628,2648,3156,3483,4243,4341,4562,...,150990,151000,151211,152320,152541,152588,154061,154794,155920,population
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1331,0.0,104252.293468,68245.714665,119264.286249,61601.985134,55576.237017,174883.747113,68666.091501,37880.444867,92409.523531,...,86810.831341,25166.787477,156258.312892,99975.196954,266491.510541,90834.747404,98222.768625,159725.902784,83482.042288,10230
1695,104252.293468,0.0,114543.875967,203081.265411,137833.829057,114504.299703,129823.169802,35715.49728,127740.9991,195876.630826,...,46285.504107,127988.189866,117430.429588,198853.687392,257817.204378,195076.299375,86820.849849,56476.336734,184115.566213,5955
2285,68245.714665,114543.875967,0.0,92023.952438,33632.367987,14001.166287,123095.770297,86357.128514,104524.350858,132325.296239,...,124585.454768,84124.178316,104395.545375,150630.853187,198812.214044,123535.884139,51451.24141,168104.697383,133169.110533,6453
2628,119264.286249,203081.265411,92023.952438,0.0,65423.976262,88812.199714,199168.928489,170652.220168,136358.774282,107997.923251,...,201880.787461,113633.165491,182515.059692,135611.53893,224179.20021,93560.739054,140471.237549,258403.614363,122877.115364,5621
2648,61601.985134,137833.829057,33632.367987,65423.976262,0.0,24979.132831,155925.327863,105249.244416,90623.758386,102287.120073,...,137820.88163,67300.856137,137403.344632,123334.641496,219530.929763,92148.146776,84962.519097,193509.341713,106272.871213,22155


#### 6.3 Function to calculate population density potential (for each row)

In [49]:
def calculate_potential(row):
    array = merged.columns
    sum = 0
    for val in array:
        distance = row[val]
        if distance == 0:
            continue
        pop = merged.loc[merged[val] == row[val], 'population'].iloc[0]
        sum += pop/distance
    
    return math.log2(sum*row['population'])

#### 6.4 Create a subset to test algorithm

In [50]:
subset = merged.iloc[:10]

#### 6.5 Testing algorithm 

In [51]:
subset.loc['potential'] = subset.apply(calculate_potential, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset.loc['potential'] = subset.apply(calculate_potential, axis=1)


#### 6.6 Check the result

In [52]:
subset.head()

Unnamed: 0_level_0,1331,1695,2285,2628,2648,3156,3483,4243,4341,4562,...,150990,151000,151211,152320,152541,152588,154061,154794,155920,population
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1331,0.0,104252.293468,68245.714665,119264.286249,61601.985134,55576.237017,174883.747113,68666.091501,37880.444867,92409.523531,...,86810.831341,25166.787477,156258.312892,99975.196954,266491.510541,90834.747404,98222.768625,159725.902784,83482.042288,10230.0
1695,104252.293468,0.0,114543.875967,203081.265411,137833.829057,114504.299703,129823.169802,35715.49728,127740.9991,195876.630826,...,46285.504107,127988.189866,117430.429588,198853.687392,257817.204378,195076.299375,86820.849849,56476.336734,184115.566213,5955.0
2285,68245.714665,114543.875967,0.0,92023.952438,33632.367987,14001.166287,123095.770297,86357.128514,104524.350858,132325.296239,...,124585.454768,84124.178316,104395.545375,150630.853187,198812.214044,123535.884139,51451.24141,168104.697383,133169.110533,6453.0
2628,119264.286249,203081.265411,92023.952438,0.0,65423.976262,88812.199714,199168.928489,170652.220168,136358.774282,107997.923251,...,201880.787461,113633.165491,182515.059692,135611.53893,224179.20021,93560.739054,140471.237549,258403.614363,122877.115364,5621.0
2648,61601.985134,137833.829057,33632.367987,65423.976262,0.0,24979.132831,155925.327863,105249.244416,90623.758386,102287.120073,...,137820.88163,67300.856137,137403.344632,123334.641496,219530.929763,92148.146776,84962.519097,193509.341713,106272.871213,22155.0


#### 6.7 Apply algorithm to a main dataset

In [53]:
merged['potential'] = merged.apply(calculate_potential, axis=1)

In [54]:
merged.head()

Unnamed: 0_level_0,1331,1695,2285,2628,2648,3156,3483,4243,4341,4562,...,151000,151211,152320,152541,152588,154061,154794,155920,population,potential
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1331,0.0,104252.293468,68245.714665,119264.286249,61601.985134,55576.237017,174883.747113,68666.091501,37880.444867,92409.523531,...,25166.787477,156258.312892,99975.196954,266491.510541,90834.747404,98222.768625,159725.902784,83482.042288,10230,17.761026
1695,104252.293468,0.0,114543.875967,203081.265411,137833.829057,114504.299703,129823.169802,35715.49728,127740.9991,195876.630826,...,127988.189866,117430.429588,198853.687392,257817.204378,195076.299375,86820.849849,56476.336734,184115.566213,5955,15.951761
2285,68245.714665,114543.875967,0.0,92023.952438,33632.367987,14001.166287,123095.770297,86357.128514,104524.350858,132325.296239,...,84124.178316,104395.545375,150630.853187,198812.214044,123535.884139,51451.24141,168104.697383,133169.110533,6453,16.910898
2628,119264.286249,203081.265411,92023.952438,0.0,65423.976262,88812.199714,199168.928489,170652.220168,136358.774282,107997.923251,...,113633.165491,182515.059692,135611.53893,224179.20021,93560.739054,140471.237549,258403.614363,122877.115364,5621,15.669806
2648,61601.985134,137833.829057,33632.367987,65423.976262,0.0,24979.132831,155925.327863,105249.244416,90623.758386,102287.120073,...,67300.856137,137403.344632,123334.641496,219530.929763,92148.146776,84962.519097,193509.341713,106272.871213,22155,20.148873


## 7 Join the result to initial data 

In [55]:
spatialPotential = spatialData_new.merge(merged['potential'], left_index=True, right_index=True)

## 8 Save dataset to a file

In [56]:
spatialPotential.to_file('pop_density_potential.gpkg')