In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import json
import os
from scipy.spatial import KDTree
metro = pd.read_csv("lineas-y-estaciones-del-metro.csv")
metrobus = pd.read_csv("estaciones-metrobus.csv")
# parse json data to dicts
metro["Geo Shape"] = [json.loads(m) for m in metro["Geo Shape"]]
metrobus["Geo Shape"] = [json.loads(m) for m in metrobus["Geo Shape"]]
metro = metro.rename(columns={"Nombre":"nombre"})

In [2]:
def separateLines(df):
    index = np.array([df.loc[i,"Geo Shape"]["type"]=="MultiLineString" for i in range(df.shape[0])])
    return df[index].reset_index(), df[~index].reset_index()

In [3]:
def unique(sequence):
    seen = set()
    return [x for x in sequence if not (x in seen or seen.add(x))]

In [4]:
def getCoordinates(edge,df):
    coord1 = np.array(df[df["nombre"]==edge[0]]["Geo Shape"].iloc[0]["coordinates"])
    coord2 = np.array(df[df["nombre"]==edge[1]]["Geo Shape"].iloc[0]["coordinates"])
    return coord1, coord2
def earthDist(coord1,coord2):
    coord1 = np.pi/180*coord1
    coord2 = np.pi/180*coord2
    a = np.sin((coord1[1]-coord2[1])/2)**2 + np.cos(coord1[1])*np.cos(coord2[1])*np.sin((coord1[1]-coord2[1])/2)**2
    c = 2*np.arctan2(np.sqrt(a),np.sqrt(1-a))
    return c*6371000

In [5]:
def getLines(df):
    lineas, estaciones = separateLines(df)
    tree = KDTree(np.array([x["coordinates"][0:2] for x in estaciones["Geo Shape"]]))
    full_stat = []
    full_dists = []
    for index,row in lineas.iterrows():
        enlaces = row["Geo Shape"]["coordinates"][0]
        print(row["nombre"])
        stat = []
        for i in range(len(enlaces)):
            edge = tree.query(enlaces[i],p=np.inf)
            ind = edge[1]
            stat.append(estaciones.loc[ind,"nombre"])
        #print(stat)
        stat_clean = unique(stat)
        #print(stat_clean)
        edges = list(zip(stat_clean[:-1],stat_clean[1:]))
        dists = []
        for edge in edges:
            coord1, coord2 = getCoordinates(edge,df)
            dists.append(round(earthDist(coord1,coord2)))
        full_dists.extend(dists)
        full_stat.extend(edges)
    return full_stat, full_dists

In [6]:
edges_metrobus,dists_metrobus = getLines(metrobus)
edges_metro,dists_metro = getLines(metro)

MB07-A (Indios Verdes - Campo Marte)
MB05-A (Remedios - San Lazaro)
MB07-C (Glorieta Cuitlahuac - Campo Marte)
MB02-C (Colonia del Valle - Del Moral)
MB07-A (Campo Marte - Indios Verdes)
MB01-B (Indios Verdes - Caminero)
MB03-B (Tenayuca - Balderas)
MB03-C (Tenayuca - Buenavista)
MB03-D (La Raza - Tenayuca)
MB07-B (Hospital Infantil - Campo Marte)
MB06-B (Villa de Aragon - IPN)
MB04-S (San Lazaro - Buenavista)
MB04-S (Buenavista - San Lazaro)
MB06-C (Deportivo 18 Marzo - El Rosario)
MB07-C (Campo Marte - Glorieta Cuitlahuac)
MB04-A (Aeropuerto)
MB01-C (Galvez - Indios Verdes)
MB01-A (Insurgentes - Indios Verdes)
MB03-B (Balderas - Tenayuca)
MB03-A (Etiopia - Tenayuca)
MB02-D (Tepalcates - Colonia del Valle)
MB06-B (IPN - Villa de Aragon)
MB02-B (Etiopia - Tepalcates)
MB07-D (Indios Verdes - El Angel)
MB03-C (Buenavista - Tenayuca)
MB01-A (Indios Verdes - Insurgentes)
MB02-A (Tepalcates - Tacubaya)
MB06-A (El Rosario - Villa de Aragon)
MB01-B (Caminero - Indios Verdes)
MB06-C (El Rosari

In [7]:
def processString(string):
    dic = {"á":"a",
    "é":"e",
    "í":"i",
    "ó":"o",
    "ú":"u",
    "ü":"u",
    " ":"_"}
    string = string.lower()
    for val in dic.items():
        string = string.replace(val[0],val[1])
    return string

In [8]:
edges_metro = [(processString(var[0].split("_")[0]),processString(var[1].split("_")[0])) for var in edges_metro]
edges_metro_inv = [(edg[1],edg[0]) for edg in edges_metro]
edges_metrobus = [(processString(var[0].split("_")[0]),processString(var[1].split("_")[0])) for var in edges_metrobus]

In [9]:
edges = edges_metro +  edges_metro_inv + edges_metrobus
dists = dists_metro + dists_metro + dists_metrobus

In [10]:
unzipped = list(zip(*edges))
df = pd.DataFrame.from_dict({"source":unzipped[0],"dest":unzipped[1],"dist":dists})

In [11]:
df.shape

(1469, 3)

In [12]:
G = nx.from_pandas_edgelist(df,source="source",target="dest",edge_attr=["dist"],create_using=nx.DiGraph) 

In [13]:
nx.write_gexf(G,"metro-metrobus.gefx",version="1.2draft")

In [14]:
#nx.write_graphml_xml(G,"metro-metrobus.graphml")