In [44]:
import pandas as pd
import numpy as np

df_ent = pd.read_csv("../data/raw/enterprises03.csv", encoding = "ISO-8859-1", sep=";")
df_pop = pd.read_csv("../data/raw/population-15-and-older.csv", encoding = "ISO-8859-1", sep=";")
df_mun = pd.read_csv("../data/raw/kosovo-municipalities.csv")

In [45]:
# Merge with coordinates after you have everything
df = pd.merge(df_ent, df_pop, how="left", left_on = "komuna", right_on = "Komuna")
df.replace(":", np.nan, inplace=True)

In [46]:
# Load df with komunas and their respective id's
df_map = pd.read_csv("../data/raw/id_map.csv", encoding = "ISO-8859-1", sep=",", names=["id", "komuna"], header=0)
id_map = pd.Series(df_map["id"].values, index=df_map["komuna"]).to_dict()
# Manual replaces to get an id
df.replace("F.Kosovë", "Fushë Kosovë", inplace=True, regex=True)
df.replace("Skenderaj", "Skënderaj", inplace=True, regex=True)
df.replace("Shtërpce", "Shtërpcë", inplace=True, regex=True)
df.replace("Zveqan", "Zveçan", inplace=True, regex=True)
df.replace("Juniku", "Junik", inplace=True, regex=True)
df.replace("Hani Elezit", "Hani i Elezit", inplace=True, regex=True)
df.replace("Graçanica", "Graçanicë", inplace=True, regex=True)
df.replace("Mitr. Veriore", "Mitrovicë Veriore", inplace=True, regex=True)
# get id
df["id"] = df["komuna"].replace(to_replace=id_map)

In [47]:
# get komuna without special characters to join with geo data
df["komuna_raw"] = df["komuna"].values
# remove special characters
df["komuna_raw"].replace("ë", "e", inplace=True, regex=True)
df["komuna_raw"].replace("ç", "c", inplace=True, regex=True)

In [48]:
# joined
df = pd.merge(df, df_mun[["X", "Y", "nam"]], how="left", left_on = "komuna_raw", right_on = "nam")

In [52]:
df.replace("Gllogoc", "Drenas", inplace=True, regex=True)  # Replaced later because of join
df.replace(np.nan, 0, inplace=True, regex=True)

In [58]:
# Generate new column
df["Gjithsej"] = pd.to_numeric(df["Gjithsej"])
df["Total"] = pd.to_numeric(df["Total"])
df["business_density"] = df["Gjithsej"] / (df["Total"]/1000)
df["business_density"].replace(np.inf, 0, inplace=True, regex=True)

In [68]:
# Find the highest economy section
df_sections = df.melt(
    id_vars=[
        "periudha",
        "viti",
        "komuna",
        "Gjithsej",
        "Gjinia",
        "Komuna",
        "Total",
        "id",
        "komuna_raw",
        "X",
        "Y",
        "nam",
        "business_density",
    ],
    var_name="section",
    value_name="section_amount",
)

In [78]:
df_sections["section_amount"].replace("-", 0, inplace=True, regex=True)
df_sections["section_amount"] = pd.to_numeric(df_sections["section_amount"])

In [96]:
def max_section(municipality):
    temp = df_sections.loc[df_sections["komuna"]==municipality]
    return temp["section"].iloc[temp['section_amount'].values.argmax()][2:]

In [97]:
df["section"] = df_sections["komuna"].apply(max_section)

In [99]:
df[
    [
        "komuna",
        "Gjithsej",
        "Total",
        "X",
        "Y",
        "id",
        "business_density",
        "section",
    ]
].to_csv("../data/interim/joined.csv", index=False)

In [115]:
dff = df_sections.loc[df_sections["komuna"]=="Prishtinë"]
dff["section"] = dff["section"].str[2:].values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [117]:
dff[["section", "section_amount"]].to_csv("../data/interim/prishtina_sections.csv", index=False)

In [123]:
# the top sections growing by number. Doesn't show much beside Prishtina
df_sections[df_sections["komuna"]!="Gjithsej"].sort_values("section_amount", ascending=False).head(10)

Unnamed: 0,periudha,viti,komuna,Gjithsej,Gjinia,Komuna,Total,id,komuna_raw,X,Y,nam,business_density,section,section_amount
252,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,"G Tregti me shumicë dhe pakicë, riparim i mjet...",170
486,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,"M Aktivitete profesionale, shkencore dhe teknike",139
369,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,J Informacion dhe komunikim,79
330,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,I Akomodim dhe aktivitete të shërbimeve me us...,67
253,TM1,2019,Prizren,175,Gjithsej,Prizren,128582,18,Prizren,20.694142,42.211432,Prizren,1.360999,"G Tregti me shumicë dhe pakicë, riparim i mjet...",52
96,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,C Prodhim,50
525,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,N Aktivitete administrative dhe mbështetëse,46
213,TM1,2019,Prishtinë,759,Gjithsej,Prishtinë,147494,20,Prishtine,21.270819,42.699626,Prishtine,5.145972,F Ndërtimtari,44
258,TM1,2019,Ferizaj,183,Gjithsej,Ferizaj,77044,26,Ferizaj,21.143619,42.376065,Ferizaj,2.375266,"G Tregti me shumicë dhe pakicë, riparim i mjet...",42
237,TM1,2019,Gjilan,124,Gjithsej,Gjilan,66714,2,Gjilan,21.585033,42.403435,Gjilan,1.85868,"G Tregti me shumicë dhe pakicë, riparim i mjet...",42
