# **Chapter 2: GIS**

## **Part 1: Installing the environment**

In [1]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
from shapely import wkt
from shapely.geometry import MultiPolygon, Polygon
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display
import ipywidgets as widgets
import sys
import os
from scripts.plot_map import plot_interactive_map

Read the dataset

In [2]:
df = pd.read_csv('data/survey_random.csv') #Classified

Download the geographical data

In [3]:
# ! wget https://media.githubusercontent.com/media/wmgeolab/geoBoundaries/9469f09592ced973a3448cf66b6100b741b64c0d/releaseData/gbOpen/KAZ/ADM1/geoBoundaries-KAZ-ADM1-all.zip

In [4]:
# ! unzip geoBoundaries-KAZ-ADM1-all.zip -d ./geoBoundaries-KAZ-ADM1-all && rm -rf geoBoundaries-KAZ-ADM1-all.zip

Read geographical data

In [5]:
geojson_path = "geoBoundaries-KAZ-ADM1-all/geoBoundaries-KAZ-ADM1_simplified.geojson"
gdf = gpd.read_file(geojson_path)

In [6]:
sh_name = "Shymkent"
sh_iso  = "KZ-SHY"
sh_id   = "9891525B68436750823948"
sh_grp  = "KAZ"
sh_typ  = "ADM1"
sh_wkt  = """POLYGON ((69.42977905273443 42.29850387573242, 69.440658569336 42.256446838378906,
69.5219955444336 42.27233505249035, 69.53115844726562 42.296615600585994, 69.57532501220714 42.28984832763672,
69.60591888427734 42.26734542846691, 69.64803314208996 42.285514831543026, 69.72393798828136 42.264495849609375,
69.71796417236334 42.33912658691412, 69.67385864257812 42.345897674560604, 69.67630004882812 42.362247467041016,
69.7154312133789 42.35989379882818, 69.66631317138683 42.408203125000114, 69.71104431152344 42.405490875244084,
69.69738006591803 42.42692184448242, 69.64393615722662 42.44665908813488, 69.61659240722656 42.41533660888672,
69.62030792236334 42.43984985351568, 69.58672332763678 42.44188690185547, 69.57182312011719 42.41804504394531,
69.52764129638683 42.42483901977545, 69.50846862792969 42.37234115600586, 69.46366119384766 42.37501907348633,
69.42977905273443 42.29850387573242))"""

geom = wkt.loads(sh_wkt)
if gdf.geom_type.unique().tolist() == ['MultiPolygon'] and isinstance(geom, Polygon):
    geom = MultiPolygon([geom])

row = {col: None for col in gdf.columns}
for k, v in {
    "shapeName": sh_name,
    "shapeISO":  sh_iso,
    "shapeGroup": sh_grp,
    "shapeType":  sh_typ,
    "shapeID":    sh_id,
    "geometry":   geom
}.items():
    if k in row:
        row[k] = v

new_gdf = gpd.GeoDataFrame([row], crs=gdf.crs)

gdf = pd.concat([gdf, new_gdf], ignore_index=True)

Create the renaming map dictionary

In [7]:
name_map = {
    "г.Нур-Султан": "Astana",
    "г.Шымкент": "Shymkent",
    "г.Алматы": "Almaty",
    "Алматинская": "Almaty Region",
    "Жамбылская": "Jambyl Region",
    "Западно-Казахстанская": "West Kazakhstan Region",
    "Туркестанская": "Turkistan Region",
    "Южно-Казахстанская": "South Kazakhstan Region",
    "Северо-Казахстанская": "North Kazakhstan Region",
    "Костанайская": "Kostanay Region",
    "Мангистауская": "Mangystau Region",
    "Актюбинская": "Aktobe Region",
    "Акмолинская": "Akmola Region",
    "Атырауская": "Atyrau Region",
    "Восточно-Казахстанская": "East Kazakhstan Region",
    "Павлодарская": "Pavlodar Region",
    "Кызылординская": "Kyzylorda Region",
    "Карагандинская": "Karaganda Region"
}

In [8]:
le_long = pd.read_csv('data/LE_2017_2021.csv').melt(
    id_vars=['Region'],
    value_vars=['2017','2018','2019','2020','2021'],
    var_name='year',
    value_name='life_expectancy'
)

le_long = le_long.rename(columns={'Region': 'region_en'})
le_long['year'] = le_long['year'].astype(int)

## **Part 2: Ecology map**

Map the eco survey

In [9]:
eco_mapping = {
    'Плохая': 0,
    'Удовлетворительная': 1,
    'Хорошая': 2,
}
df['eco_score'] = df[
    'q8. Оцените, пожалуйста, экологическую ситуацию в Вашем населенном пункте'
    ].map(eco_mapping)

In [10]:
df_eco = df[['eco_score', 'Область']]
df_eco = df_eco.groupby('Область').mean().reset_index().sort_values(by='eco_score')
df_eco["region_en"] = df_eco["Область"].map(name_map)

In [11]:
merged_eco = gdf.merge(df_eco, left_on="shapeName", right_on="region_en", how="left")

In [12]:
df['date'] = pd.to_datetime(dict(year=df['Год'], month=df['Месяц'], day=1))

ts = (df
      .groupby(['Область','date'], as_index=False)['eco_score']
      .mean()
      .sort_values(['Область','date']))

ts['region_en'] = ts['Область'].map(name_map)

ts_eco = {k: v[['date','eco_score']].reset_index(drop=True)
           for k, v in ts.groupby('region_en', dropna=False)}

Plot the map!

In [13]:
%xmode Minimal
plot_interactive_map(merged_eco,
                     ts_eco,
                     le_long,
                     "eco_score",
                     "Kazakhstan: Ecology Score by Region (2017-2021)",
                     y_range=[0.0, 2.0])

Exception reporting mode: Minimal


HBox(children=(FigureWidget({
    'data': [{'coloraxis': 'coloraxis',
              'customdata': array([['Pav…

## **Part 3: Health map**

Map the health survey

In [14]:
health_mapping = {
    'Ужасное': 0,
    'Плохое': 1,
    'Удовлетворительное': 2,
    'Хорошее': 3,
    'Прекрасное': 4
}
df['health_score'] = df[
    'q10a. В целом как бы Вы оценили свое здоровье в настоящее время?'
    ].map(health_mapping)

In [15]:
df_health = df[['health_score', 'Область']]
df_health = df_health.groupby('Область').mean().reset_index().sort_values(by='health_score')
df_health["region_en"] = df_health["Область"].map(name_map)

In [16]:
merged_health = gdf.merge(df_health, left_on="shapeName", right_on="region_en", how="left")

In [17]:
df['date'] = pd.to_datetime(dict(year=df['Год'], month=df['Месяц'], day=1))

ts = (df
      .groupby(['Область','date'], as_index=False)['health_score']
      .mean()
      .sort_values(['Область','date']))

ts['region_en'] = ts['Область'].map(name_map)

ts_health = {k: v[['date','health_score']].reset_index(drop=True)
           for k, v in ts.groupby('region_en', dropna=False)}

In [18]:
%xmode Minimal
plot_interactive_map(merged_health,
                     ts_health,
                     le_long,
                     "health_score",
                     "Kazakhstan: Health Score by Region (2017-2021)",
                     y_range=[0.0, 4.0])

Exception reporting mode: Minimal


HBox(children=(FigureWidget({
    'data': [{'coloraxis': 'coloraxis',
              'customdata': array([['Pav…

## **Part 4. Government Medicine**

In [19]:
gov_med_mapping = {
    'Плохое': 1,
    'Удовлетворительное': 2,
    'Хорошее': 3
    }
df['gov_med_score'] = df[
    'q9.1. Оцените, пожалуйста, качество медицинских услуг в государственных медицинских учреждениях (поликлиники, больницы) в Казахстане'
    ].map(gov_med_mapping)

In [20]:
df_gov_med = df[['gov_med_score', 'Область']]
df_gov_med = df_gov_med.groupby('Область').mean().reset_index().sort_values(by='gov_med_score')
df_gov_med["region_en"] = df_gov_med["Область"].map(name_map)

In [21]:
merged_gov_med = gdf.merge(df_gov_med, left_on="shapeName", right_on="region_en", how="left")

In [22]:
df['date'] = pd.to_datetime(dict(year=df['Год'], month=df['Месяц'], day=1))

ts = (df
      .groupby(['Область','date'], as_index=False)['gov_med_score']
      .mean()
      .sort_values(['Область','date']))

ts['region_en'] = ts['Область'].map(name_map)

ts_gov_med = {k: v[['date','gov_med_score']].reset_index(drop=True)
           for k, v in ts.groupby('region_en', dropna=False)}

In [23]:
%xmode Minimal
plot_interactive_map(merged_gov_med,
                     ts_gov_med,
                     le_long,
                     "gov_med_score",
                     "Kazakhstan: Government Medicine Score by Region (2017-2021)",
                     y_range=[1.0, 3.0])

Exception reporting mode: Minimal


HBox(children=(FigureWidget({
    'data': [{'coloraxis': 'coloraxis',
              'customdata': array([['Pav…

## **Part 5. Private Medicine**

In [24]:
priv_med_mapping = {
    'Плохое': 1,
    'Удовлетворительное': 2,
    'Хорошее': 3
    }
df['priv_med_score'] = df[
    'q9.2. Оцените, пожалуйста, качество медицинских услуг в  частных клиниках в Казахстане'
    ].map(priv_med_mapping)

In [25]:
df_priv_med = df[['priv_med_score', 'Область']]
df_priv_med = df_priv_med.groupby('Область').mean().reset_index().sort_values(by='priv_med_score')
df_priv_med["region_en"] = df_priv_med["Область"].map(name_map)

In [26]:
merged_priv_med = gdf.merge(df_priv_med, left_on="shapeName", right_on="region_en", how="left")

In [27]:
df['date'] = pd.to_datetime(dict(year=df['Год'], month=df['Месяц'], day=1))

ts = (df
      .groupby(['Область','date'], as_index=False)['priv_med_score']
      .mean()
      .sort_values(['Область','date']))

ts['region_en'] = ts['Область'].map(name_map)

ts_priv_med = {k: v[['date','priv_med_score']].reset_index(drop=True)
           for k, v in ts.groupby('region_en', dropna=False)}

In [28]:
%xmode Minimal
plot_interactive_map(merged_priv_med,
                     ts_priv_med,
                     le_long,
                     "priv_med_score",
                     "Kazakhstan: Private Medicine Score by Region (2017-2021)",
                     y_range=[1.0, 3.0])

Exception reporting mode: Minimal


HBox(children=(FigureWidget({
    'data': [{'coloraxis': 'coloraxis',
              'customdata': array([['Pav…

In [29]:
! python scripts/export2html.py

Saved: /Users/ilyapopov/Desktop/PHH-25/docs/interactive_maps_all.html
