# Madrid districts dynamic network of migrations (2004-2017)

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [2]:
migrations_nodes = pd.read_csv("./data/migrations_degree_madrid.csv")
migrations_edges = pd.read_csv("./data/migrations_complete_madrid.csv")

In [3]:
# We will remove internal migrations, external migrations and unknown
migrations_edges = migrations_edges[migrations_edges["origin"] != migrations_edges["destination"]]
migrations_edges = migrations_edges[(migrations_edges["origin"] != "external") & \
                                    (migrations_edges["destination"] != "external")]
migrations_edges = migrations_edges.dropna()

In [4]:
net_migrations_edges = migrations_edges.merge(migrations_edges, left_on=["year","origin","destination"],
                       right_on=["year","destination","origin"])

In [5]:
net_migrations_edges["net_migrations"] = net_migrations_edges["count_x"] - net_migrations_edges["count_y"]
net_migrations_edges = net_migrations_edges[net_migrations_edges["net_migrations"] > 0].loc[:,("origin_x",
                                "destination_x","year","net_migrations")].rename(columns={"origin_x":"origin",
                                                                                         "destination_x":"destination"})

In [6]:
node_modularity_class = {'Arganzuela':"middle_centre", 'Barajas':"high_suburbs", 'Carabanchel':"low_suburbs", 
 'Centro':"middle_centre", 'Chamartín':"high_centre",
       'Chamberí':"high_centre", 'Ciudad Lineal':"high_suburbs", 
 'Fuencarral - El Pardo':"high_suburbs", 'Hortaleza':"high_suburbs",
       'Latina':"middle_centre", 'Moncloa - Aravaca':"high_suburbs", 
 'Moratalaz':"middle_suburbs", 'Puente de Vallecas':"low_suburbs",
       'Retiro':"high_centre", 'Salamanca':"high_centre", 
 'San Blas - Canillejas':"middle_suburbs", 'Tetuán':"middle_centre", 
 'Usera':"low_suburbs",'Vicálvaro':"low_suburbs", 'Villa de Vallecas':"low_suburbs", 'Villaverde':"low_suburbs"}

### Build nodes dataset

In [7]:
node_ids = dict(zip(list(node_modularity_class.keys()), np.arange(len(list(node_modularity_class.keys())))))
nodes_data = pd.DataFrame.from_dict(node_ids, orient="index").reset_index().rename(columns={"index":"label",0:"id"})
nodes_data["Class"] = nodes_data["label"].apply(lambda x: node_modularity_class[x])
nodes_data.to_csv("./data/nodes_data.csv", index=False, encoding="utf-8")

### Build edges dataset

In [8]:
net_migrations_edges["source"] = net_migrations_edges["origin"].apply(lambda x: node_ids[x])
net_migrations_edges["target"] = net_migrations_edges["destination"].apply(lambda x: node_ids[x])
net_migrations_edges["type"] = "directed"
net_migrations_edges["start"] = net_migrations_edges["year"]
net_migrations_edges["end"] = net_migrations_edges["year"]+1
mmscaler = MinMaxScaler()
net_migrations_edges["weights"] = mmscaler.fit_transform(net_migrations_edges[["net_migrations"]])

In [9]:
net_migrations_edges = net_migrations_edges.loc[:,("source","target","type","start","end","weights")]\
                                   .to_csv("./data/edges_data.csv", index=False, encoding="utf-8")