In [1]:
from bs4 import BeautifulSoup
import codecs
from IPython.display import display, Image, SVG
import itertools
import json
import math
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import openpyxl
import os
import pandas as pd
import re
import requests
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from urllib.request import urlopen
import warnings
import wikipedia

### Population Statistics from Wikipedia

In [2]:
url = "https://en.wikipedia.org/wiki/Demographics_of_Ukraine"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
table = soup.findAll('table',{'class':"wikitable"})
demo = pd.read_html(str(table[2]))
demo = pd.DataFrame(demo[0])
del url, table, soup
demo.tail()

Unnamed: 0.1,Unnamed: 0,Average population,Live births,Deaths,Natural change,"Crude birth rate (per 1,000)","Crude death rate (per 1,000)","Natural change (per 1,000)",Fertility rates,Urban fertility,Rural fertility,"Abortions, reported"
72,2017,42584542.0,363987,574123.0,-210136.0,9.4,14.5,-5.1,1.37,1.28,1.52,94665.0
73,2018,42386403.0,335874,587665.0,-251791.0,8.7,14.8,-6.1,1.3,1.22,1.43,46552.0
74,2019,42153201.0,308817,581114.0,-272297.0,8.1,14.7,-6.6,1.23,1.16,1.34,74606.0
75,2020,41902416.0,293457,616835.0,-323378.0,7.8,15.9,-8.1,1.22,1.13,1.36,
76,2021,41167336.0,271984,714263.0,-442279.0,6.7,17.3,-10.6,,,,


### Politcal Violence Events dataset

In [3]:
url = 'https://data.humdata.org/dataset/7b36830b-c033-4a06-b812-9940baec603b/resource/e122ca1c-9463-4e3a-8731-8a85fab2a15e/download/ukraine_political_violence_events_and_fatalities_by_month-year.xlsx'
PVE = pd.read_excel(url, sheet_name="Data")
del url
PVE.tail(10)

Unnamed: 0,Country,Year,Month,Events,Fatalities
41,Ukraine,2021,June,467,24
42,Ukraine,2021,July,629,7
43,Ukraine,2021,August,554,12
44,Ukraine,2021,September,613,9
45,Ukraine,2021,October,707,4
46,Ukraine,2021,November,937,12
47,Ukraine,2021,December,545,4
48,Ukraine,2022,January,426,7
49,Ukraine,2022,February,1327,393
50,Ukraine,2022,March,740,1246


### COVID-19 Vaccine Doses in HRP (Humanitarian Response Plan) Countries

In [4]:
url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTVzu79PPTfaA2syevOQfyRRjy63dJWitqu0fFbXIQCzoUn9K9TiMWMRvFGg1RBsnLmgYugzSEiAye2/pub?gid=992438980&single=true&output=csv'
VacD = pd.read_csv(url)
del url
VacD.loc[VacD['Country'] == 'Ukraine']

Unnamed: 0,Country,ISO3,SFP/AMC,Population (UNDESA),COVAX Forecast Total,COVAX AstraZeneca/SII,COVAX AstraZeneca/SKBio,COVAX Pfizer/BioNTech,COVAX Delivered,Other Delivered,Total Delivered,Population Covered (Two Dose),(DEPRECATAED) Other Delivered Source Country,(DEPRECATAED) Other Delivered Source URLs
27,Ukraine,UKR,AMC,43733759,1893000,0,1776000,117000,8414990,38193842,46608832,53.29%,Ukraine (procured) - India (AZ),https://www.france24.com/en/live-news/20210223...


### COVID-19 Vaccine Deliveries in HRP Countries

In [5]:
url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTVzu79PPTfaA2syevOQfyRRjy63dJWitqu0fFbXIQCzoUn9K9TiMWMRvFGg1RBsnLmgYugzSEiAye2/pub?gid=1635331605&single=true&output=csv'
Del = pd.read_csv(url)
del url
Del.loc[Del['Country'] == 'Ukraine'].tail(10)

Unnamed: 0,Country,ISO3,Pipeline,Vaccine,Funder,Doses,Source Url,NewPipeline,Date,Unnamed: 9
619,Ukraine,UKR,Other,,Procured,12907512,https://www.unicef.org/supply/covid-19-vaccine...,Procured,December,
653,Ukraine,UKR,COVAX,Pfizer/BioNTech,USA,333450,https://www.unicef.org/supply/covid-19-vaccine...,COVAX/USA,January,
654,Ukraine,UKR,Other,,Procured,178740,https://www.unicef.org/supply/covid-19-vaccine...,Procured,January,
666,Ukraine,UKR,Other,AstraZeneca,Slovakia,60000,https://twitter.com/VladoBilcik/status/1430123...,Slovakia,August,
667,Ukraine,UKR,Other,AstraZeneca,Greece,100000,https://112.international/ukraine-top-news/gre...,Greece,August,
668,Ukraine,UKR,Other,,Procured,348587,https://www.unicef.org/supply/covid-19-vaccine...,Procured,January,
677,Ukraine,UKR,Other,,Procured,354902,https://www.unicef.org/supply/covid-19-vaccine...,Procured,Feb 2022,
693,Ukraine,UKR,Other,,Procured,944065,https://www.unicef.org/supply/covid-19-vaccine...,Procured,Feb 2022,
708,Ukraine,UKR,Other,,Procured,428362,https://www.unicef.org/supply/covid-19-vaccine...,Procured,Feb 2022,
722,Ukraine,UKR,Other,,Procured,204525,https://www.unicef.org/supply/covid-19-vaccine...,Procured,Feb 2022,


### Health Indicators for Ukraine

In [6]:
url = 'https://data.humdata.org/dataset/960fa5c9-7f62-422d-af59-34d69b1bc2ad/resource/da2eb95d-53ff-4b9e-a700-2b4d5a157e1c/download/health_indicators_ukr.csv'
HIU = pd.read_csv(url, low_memory=False)
del url
HIU = HIU.dropna(axis=1, how='all', thresh=87)
HIU = HIU[HIU.columns.drop(list(HIU.filter(regex='(CODE)')))]
HIU = HIU[HIU.columns.drop(list(HIU.filter(regex='(URL)')))]
HIU.drop(['STARTYEAR','ENDYEAR','WORLDBANKINCOMEGROUP (DISPLAY)',
          'CHILDCAUSE (DISPLAY)','COUNTRY (DISPLAY)','Comments'], axis=1, inplace=True)
HIU.drop([0], axis=0, inplace=True)
HIU.head()

Unnamed: 0,GHO (DISPLAY),PUBLISHSTATE (DISPLAY),YEAR (DISPLAY),REGION (DISPLAY),AGEGROUP (DISPLAY),SEX (DISPLAY),GHECAUSES (DISPLAY),Display Value,Numeric,Low,High
1,"Age-standardized DALYs (per 100,000)",Published,2012,Europe,,,Communicable & other Group I,4935,4934.62207,,
2,"Age-standardized DALYs (per 100,000)",Published,2012,Europe,,,Noncommunicable diseases,26107,26106.69922,,
3,"Age-standardized DALYs (per 100,000)",Published,2012,Europe,,,Injuries,4079,4079.29712,,
4,Adult mortality rate (probability of dying bet...,Published,2004,Europe,,Male,,387,387.1917,,
5,Adult mortality rate (probability of dying bet...,Published,2004,Europe,,Female,,145,145.3291,,


### FTS Annual Requirements and Funding Data for Ukraine

In [7]:
url = 'https://data.humdata.org/dataset/3ade4119-fa7c-476b-94a9-f001c6c8e7ba/resource/5c5eba7c-8665-4c20-b307-24873fc66473/download/fts_requirements_funding_ukr.csv'
FTSAR = pd.read_csv(url)
del url
FTSAR.drop(['countryCode','id','code','typeId'], axis=1, inplace=True)
FTSAR.drop([0], axis=0, inplace=True)
FTSAR['name'] = FTSAR['name'].replace('Not specified', np.nan)
FTSAR = FTSAR[FTSAR['name'].notna()]
FTSAR.head(10)

Unnamed: 0,name,typeName,startDate,endDate,year,requirements,funding,percentFunded
3,Ukraine Humanitarian Response Plan 2022,Humanitarian response plan,2022-01-01,2022-12-31,2022,190349122,,
4,Ukraine Flash Appeal 2022,Flash appeal,2022-03-01,2022-05-31,2022,1139900000,224100952.0,20.0
6,Ukraine Humanitarian Response Plan 2021,Humanitarian response plan,2021-01-01,2021-12-31,2021,167982922,103290692.0,61.0
8,Ukraine 2020,Humanitarian response plan,2020-01-01,2020-12-31,2020,204653234,122153695.0,60.0
10,Ukraine Humanitarian Response Plan (HRP) 2019,Humanitarian response plan,2019-01-01,2019-12-31,2019,163640038,86604543.0,53.0
12,Ukraine Humanitarian Response Plan 2018,Humanitarian response plan,2018-01-01,2018-12-31,2018,186909122,70773032.0,38.0
14,Ukraine Humanitarian Response Plan 2017,Humanitarian response plan,2017-01-01,2017-12-31,2017,203608611,74606618.0,37.0
16,Ukraine 2016,Humanitarian response plan,2016-01-01,2015-12-31,2016,297907511,105289272.0,35.0
18,Ukraine 2015,Humanitarian response plan,2015-01-01,2015-12-31,2015,316000000,173400221.0,55.0
20,Ukraine PRP 2014,Flash appeal,2014-08-15,2014-12-31,2014,33201996,32665901.0,98.0


### Tuberculosis Indicators for Ukraine

In [8]:
url = 'https://data.humdata.org/dataset/960fa5c9-7f62-422d-af59-34d69b1bc2ad/resource/43694242-0da7-4d32-b4b3-2c3a85b1ac84/download/tuberculosis_indicators_ukr.csv'
TB = pd.read_csv(url, low_memory=False)
del url
TB = TB[TB.columns.drop(list(TB.filter(regex='(CODE)')))]
TB = TB[TB.columns.drop(list(TB.filter(regex='(URL)')))]
TB = TB.dropna(axis=1, how='all')
TB.drop(['STARTYEAR','ENDYEAR','COUNTRY (DISPLAY)','WORLDBANKINCOMEGROUP (DISPLAY)',
        'Display Value','PUBLISHSTATE (DISPLAY)','REGION (DISPLAY)'], axis=1, inplace=True)
TB.drop([0], axis=0, inplace=True)
TB.sort_values(by='YEAR (DISPLAY)')

Unnamed: 0,GHO (DISPLAY),YEAR (DISPLAY),Numeric,Low,High
3,Tuberculosis - new and relapse cases,1990,16465.00000,,
78,Number of prevalent tuberculosis cases,1990,36000.00000,18000.0,61000.0
4,Tuberculosis - new and relapse cases,1991,16713.00000,,
79,Number of prevalent tuberculosis cases,1991,43000.00000,22000.0,70000.0
5,Tuberculosis - new and relapse cases,1992,18140.00000,,
...,...,...,...,...,...
225,Confirmed cases of RR-/MDR-TB,2020,4257.00000,,
487,New or unknown treatment history cases: Pulmon...,2020,3771.00000,,
503,New cases: extrapulmonary,2020,1343.00000,,
550,"Relapse cases: Pulmonary, bacteriologically co...",2020,2527.00000,,


### Summary Data on Attacks on Aid Operations, Education, Health and Protection

In [9]:
url = 'https://data.humdata.org/dataset/f21002e5-4e00-4c95-bff1-ecd698bf9ff4/resource/cc3414fa-d560-44c5-82fa-3f689b6d1a9a/download/2022-ukraine-health-data.xlsx'
AAO = pd.read_excel(url, sheet_name=0)
del url
AAO = AAO.dropna(axis=1, how='all', thresh=3)
AAO.drop(['Country','Country.1'], axis=1, inplace=True)
AAO.drop([0], axis=0, inplace=True)
AAO.head(10)

Unnamed: 0,Incident date,Perpetrator,Weapons use,Total health worker killed,Total health worker injured,Total number of attacks on facilities which reported damage,Health transportation damaged
1,2022-02,RussianStateMilitary,Explosives,,6.0,1.0,
2,2022-02,RussianStateMilitary,Explosives,,,,
3,2022-02,RussianStateMilitary,Explosives,,,1.0,
4,2022-02,RussianStateMilitary,Explosives,,,1.0,
5,2022-02,RussianStateMilitary,Explosives,,,,
6,2022-02,RussianStateMilitary,Explosives,,,,
7,2022-02,RussianStateMilitary,Explosives,,,1.0,
8,2022-02,RussianStateMilitary,Explosives,,,1.0,
9,2022-02,RussianStateMilitary,Explosives,1.0,1.0,,
10,2022-02,RussianStateMilitary,Explosives,,,,


### Ukraine Food Prices

In [14]:
url = 'https://data.humdata.org/dataset/9b95de1b-d4e9-4c81-b2bb-db35bd9620e8/resource/1730560f-8e9f-4999-bec8-72118ac0ee5f/download/wfp_food_prices_ukr.csv'
UFP = pd.read_csv(url, low_memory=False)
del url
UFP.drop([0], axis=0, inplace=True)
UFP.drop(['priceflag','pricetype','admin1','admin2'], axis=1, inplace=True)
cols = ['longitude','latitude','price','usdprice']
for col in cols:
    UFP[col] = UFP[col].apply(lambda x: float(x))
UFP.tail(10)

Unnamed: 0,date,market,latitude,longitude,category,commodity,unit,currency,price,usdprice
68279,2022-01-15,Zhytomyr,50.25465,28.658667,non-food,Vasodilating agents (imported),10 tablets,UAH,53.35,1.8177
68280,2022-01-15,Zhytomyr,50.25465,28.658667,non-food,Vasodilating agents (local),10 tablets,UAH,11.66,0.3973
68281,2022-01-15,Zhytomyr,50.25465,28.658667,oil and fats,Butter,200 G,UAH,52.17,1.7775
68282,2022-01-15,Zhytomyr,50.25465,28.658667,oil and fats,Fat (salo),KG,UAH,104.74,3.5685
68283,2022-01-15,Zhytomyr,50.25465,28.658667,oil and fats,Oil (sunflower),L,UAH,61.05,2.08
68284,2022-01-15,Zhytomyr,50.25465,28.658667,vegetables and fruits,Apples,KG,UAH,12.78,0.4354
68285,2022-01-15,Zhytomyr,50.25465,28.658667,vegetables and fruits,Beetroots,KG,UAH,15.58,0.5308
68286,2022-01-15,Zhytomyr,50.25465,28.658667,vegetables and fruits,Cabbage,KG,UAH,17.01,0.5795
68287,2022-01-15,Zhytomyr,50.25465,28.658667,vegetables and fruits,Carrots,KG,UAH,13.56,0.462
68288,2022-01-15,Zhytomyr,50.25465,28.658667,vegetables and fruits,Onions,KG,UAH,13.93,0.4746


### Internally Displaced Persons Estimates

In [27]:
url = 'https://data.humdata.org/dataset/0d36e8ad-d2e8-4646-babd-61a41f99159a/resource/af76a684-de9a-4e14-8880-e4373c3763b8/download/idp_estimation_08_03_2022-unhcr-protection-cluster.xlsx'
IDP = pd.read_excel(url, sheet_name=0)
del url
IDP = IDP.rename(columns={"Unnamed: 0": "Row Labels", "Unnamed: 1": "Sum of IDP estimation"})
IDP.drop([0,1,2,9,17], axis=0, inplace=True)
IDP.head(10)

Unnamed: 0,Row Labels,Sum of IDP estimation
3,Chernivetska,155502
4,Lvivska,153014
5,Odeska,146175
6,Vinnytska,64589
7,Volynska,168255
8,Zakarpatska,479366
10,Chernivetska,64711
11,Ivano-Frankivska,88758
12,Khmelnytska,91933
13,Lvivska,233870


In [2]:
# things