In [14]:
%load_ext blackcellmagic

In [3]:
import geopandas as gpd
import pandas as pd
import os
import re
from shapely.geometry import Point
from fiona.crs import from_epsg
import logging

# configure logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler("error_log.log")
handler.setLevel(logging.ERROR)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)


path_name = os.getcwd()
folder = "June2019"

rails = ["LIRR", "metro_north", "nyc_subway"]

def make_stop_shapes(df, x="stop_lon", y="stop_lat"):
    if df[x].isna().sum() > 0 or df[y].isna().sum() > 0:
        raise "DataFrame contains Null coordinates"

    points = [Point(xy) for xy in zip(df[x], df[y])]
    gdf = gpd.GeoDataFrame(df, geometry=points)
    gdf.crs = from_epsg(4269)  # initiate crs as NAD83
    gdf = gdf.to_crs(epsg=2263)  # NY State Plane
    return gdf


# read-in file that idicates which trains stop at which stations
trains_at_stops = pd.read_csv(
    "http://web.mta.info/developers/data/nyct/subway/Stations.csv",
    usecols=["GTFS Stop ID", "Daytime Routes", "Structure"],
)

trains_at_stops.rename(
    columns={
        "GTFS Stop ID": "stop_id",
        "Daytime Routes": "trains",
        "Structure": "structure",
    },
    inplace=True,
)

def make_rail_stops(path_name, folder, rail):
    try:
        file = os.path.join(path_name, folder, f"{rail}")
        stops = pd.read_csv(
            os.path.join(file, "stops.txt"),
            usecols=["stop_id", "stop_name", "stop_lat", "stop_lon"],
        )

        stops = stops.loc[
            stops["stop_id"].isin(
                stops.stop_id.astype(str)
                .str.rstrip("N")
                .str.rstrip("S")
                .unique()
                .tolist()
            )
        ]

        # correct coordinates of the station with id='H01'
        stops.loc[stops["stop_id"] == "H01", "stop_lat"] = 40.672086
        stops.loc[stops["stop_id"] == "H01", "stop_lon"] = -73.835914

        df = stops.loc[stops.duplicated(subset=["stop_lat", "stop_lon"])][
            ["stop_lat", "stop_lon", "stop_id"]
        ]  # get the duplciate stations only; columns specified
        df.rename(
            columns={"stop_id": "stop_id2"}, inplace=True
        )  # rename the last column; it will be used as stop_id2 to reference the removed duplicates

        if rail == "nyc_subway":
            stops = (
                stops.merge(trains_at_stops, on="stop_id", how="outer")
                .drop_duplicates(["stop_lat", "stop_lon"], keep="first")
                .merge(df, on=["stop_lat", "stop_lon"], how="left")
            )
        elif rail == "metro_north":
            metro_north_bus_stops = stops[
                (stops["stop_id"] > 500)
                & (stops["stop_id"] != 622)
                & (stops["stop_id"] < 1000)
                | (stops["stop_id"] == 14)
                | (stops["stop_id"] == 16)
            ]
            stops = stops.drop_duplicates(["stop_lat", "stop_lon"], keep="first")
        else:
            stops = stops.drop_duplicates(["stop_lat", "stop_lon"], keep="first")

        stops_geo = make_stop_shapes(stops)
        counties = gpd.read_file(
            os.path.join(path_name, "counties_bndry.geojson"), driver="GeoJSON"
        )
        counties = counties.to_crs(epsg=2263)
        stops_geo = gpd.sjoin(stops_geo, counties, how="inner", op="intersects").drop(
            "index_right", 1
        )
        stops_geo.to_file(
            os.path.join(
                path_name, folder, "shapes", f"stops_{rail}_{folder.lower()}.shp"
            )
        )
        print (f"Created stop shapefiles for {rail}")

    except Exception as e:
        logger.exception("Unexpected exception occurred")
        raise