# Kosovo Population Animation

## Preprocessing

In [1]:
import pandas as pd

df_mun = pd.read_csv("../data/kosovo-municipalities.csv")
df_pop = pd.read_csv("../data/2012-2018_population.csv", encoding = "ISO-8859-1", sep=";")

In [2]:
# Melt dataframe
df_pop.columns = ["komuna", "31-12-2012", "31-12-2013", "31-12-2014", "31-12-2015", "31-12-2016", "31-12-2017", "31-12-2018"]
df_pop = df_pop.melt(id_vars=["komuna"], var_name="year", value_name="population")

In [3]:
df_pop["year"] = df_pop["year"].apply(lambda x: x[-4:])

In [10]:
# remove special characters
df_pop.replace("ë", "e", inplace=True, regex=True)
df_pop.replace("ç", "c", inplace=True, regex=True)

### Municipalities data

In [11]:
from pathlib import Path

df_all = pd.DataFrame(columns=["komuna", "year", "population"])

pathlist = Path("../data/municipalities-data/").glob('*.csv')
for path in pathlist:
    komuna = str(path)[str(path).find("_")+1:(str(path).find(".csv"))]
    df = pd.read_csv(str(path), header=1, encoding = "ISO-8859-1", sep=";")
    df.columns = ["komuna", "1948", "1953", "1961", "1971", "1981", "1991", "2011"]
    df = df.melt(id_vars=["komuna"], var_name="year", value_name="population")
    df = pd.concat([df, df_pop.loc[df_pop["komuna"]==komuna]], ignore_index=True)
    df["komuna"] = komuna
    # TODO(Interpolate the years in between)
    df.set_index("year", inplace=True)
    df_temp = pd.DataFrame(index=[str(i).zfill(2) for i in range(1948,2019)], columns=["komuna", "population"])
    df_temp.index.rename('year', inplace=True)
    df_temp.update(df)
    df_temp.reset_index(inplace=True)
    df_temp["komuna"] = komuna
    df_temp["year"] = pd.to_numeric(df_temp["year"])
    df_temp["population"] = pd.to_numeric(df_temp["population"])
    df_temp["population"] = df_temp["population"].interpolate()
    df_all = pd.concat([df_all, df_temp], ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [12]:
df_map = pd.read_csv("id_map.csv", encoding = "ISO-8859-1", sep=",", names=["id", "komuna"], header=0)

In [13]:
df_map["komuna_raw"] = df_map["komuna"].values

In [14]:
# remove special characters
df_map["komuna_raw"].replace("ë", "e", inplace=True, regex=True)
df_map["komuna_raw"].replace("ç", "c", inplace=True, regex=True)

In [15]:
komuna_map = pd.Series(df_map["komuna"].values, index=df_map["komuna_raw"]).to_dict()

In [16]:
id_map = pd.Series(df_map["id"].values, index=df_map["komuna"]).to_dict()

In [17]:
df_all["komuna"].replace("Novoberd", "Novoberde", inplace=True, regex=True)

In [18]:
df_all["komuna"].replace("Shterpc", "Shterpce", inplace=True, regex=True)

### Add coordinates

In [19]:
# Merge with coordinates after you have everything
df = pd.merge(df_all, df_mun[["X", "Y", "nam"]], how="left", left_on = "komuna", right_on = "nam")

In [20]:
df.drop("nam",inplace=True,axis=1)

In [21]:
df["komuna_me_e"] = df["komuna"].replace(to_replace=komuna_map)

In [22]:
df["komuna_me_e"].replace("Decan", "Deçan", inplace=True, regex=True)

In [23]:
df.replace("Gllogoc", "Drenas", inplace=True, regex=True)

In [24]:
df["id"] = df["komuna_me_e"].replace(to_replace=id_map)

In [25]:
df.to_csv("../data/population.csv", index=False)

## Animation

In [10]:
import imageio
from pathlib import Path

pathlist = Path("./plots/").glob('*.png')
images = []

for filename in pathlist:
    images.append(imageio.imread(filename))
imageio.mimsave('gifs/kosovo-population-v3.gif', images, duration=0.25)

## Shapefile

In [103]:
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns

In [104]:
sns.set(style="whitegrid", palette="pastel", color_codes=True)
sns.mpl.rc("figure", figsize=(10,6))

In [105]:
%matplotlib inline

In [106]:
import shapefile as shp  # Requires the pyshp package
import matplotlib.pyplot as plt

sf = shp.Reader("../data/kosovo-shapefile/XK_EA_2018.shp")

## Plotting

In [None]:
# A lot easier with gganimate in R