In [178]:
import csv
import geojson
import folium
import pytz
import os

from datetime import datetime
from collections import defaultdict
from geojson import dumps as geojson_dumps

data_path = '/mnt/c/Users/cephei/Desktop'

# SQL para extraer datos de la base de datos

In [155]:
# ingresar a la shell de postgres
#python manage.py dbshell

# obtener datos de antenas y guardarlos en un csv
#\copy (select timestamp, prediction, trip_id, "phoneId" from metro_tripmeasure as a inner join metro_trip as b on a.trip_id=b.id order by b.id, a.id) To '/tmp/metrotrips.csv' WITH CSV DELIMITER ',' HEADER;

# obtener datos de nearbybuses_log y guardarlos en un csv
#\copy (select * from metro_station) To '/tmp/metrostations.csv' WITH CSV DELIMITER ',' HEADER;

# Cargar nombre de estaciones y su posición

In [156]:
station_name_path = os.path.join(data_path, 'metrostations.csv')
station_loc_path = os.path.join(data_path, 'metrolocations.csv')

metro_stations = {}
names = {}
# se cargan las estaciones de tranSapp con su identificador
with open(station_name_path, 'r', encoding='utf-8-sig') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',')
    for row in spamreader:
        metro_stations[row['id']] = {
            'name': row['name']
        }
        modified_name = row['name'].split('-')[1][1:].lower()
        if modified_name in names:
            names[modified_name].append(row['id'])
        else:
            names[modified_name] = [row['id']]

# se carga la posición de las estaciones presente en el GTFS. Se matchea 
# con el id de tranSapp a través del nombre de la estación
with open(station_loc_path, 'r', encoding='utf-8-sig') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',')
    for row in spamreader:
        station_id = row['stop_name'].lower()
        loc = [float(row['stop_lat']), float(row['stop_lon'])]
        for transapp_station_id in names[station_id]:
            metro_stations[transapp_station_id]['loc'] = loc

for k in metro_stations:
    if 'loc' not in metro_stations[k].keys():
        print(metro_stations[k]['name'], metro_stations[k].keys())

# Generar geojson de estaciones de metro

In [207]:
from geojson import Point, Feature, FeatureCollection

stop_shape_path = os.path.join(data_path, 'stop_shapes.geojson')

stop_features = []
for station_id in metro_stations:
    station = metro_stations[station_id]
    lat, lon = station['loc']
    geometry = Point((lon, lat))
    stop_feature = Feature(geometry=geometry, properties={'name': station['name']})
    stop_features.append(stop_feature)

stop_feature_collection = FeatureCollection(stop_features)

with open(stop_shape_path, 'w') as shape_file:
    shape_file.write(geojson_dumps(stop_feature_collection))

stop_feature_collection.errors()

[]

# Procesar eventos y puntaje

In [157]:
metro_trips_path = os.path.join(data_path, 'score_history_trampa.csv')

lower_threshold = datetime(2017, 5, 1, tzinfo=pytz.UTC)

trips = defaultdict(lambda : dict({'stations': []}))
with open(metro_trips_path, 'r') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',')
    previous_prediction = None
    for row in spamreader:
        trip_id = row['trip_id']
        timestamp = datetime.strptime(row['timestamp'] + '00', '%Y-%m-%d %H:%M:%S%z')
            
        if timestamp < lower_threshold:
            continue
        
        prediction = row['prediction']
        if '|' in prediction or prediction == '' or prediction == previous_prediction:
            continue
        previous_prediction = prediction
        
        station_info = metro_stations[prediction]
        trips[trip_id]['phone_id'] = row['phoneId']
        trips[trip_id]['stations'].append([station_info['name'], timestamp, station_info['loc'][0], station_info['loc'][1]])

stages = []
for trip_id in trips:
    stations = trips[trip_id]['stations']
    phone_id = trips[trip_id]['phone_id']
    for index in range(0, len(stations) - 1):
        station_start = stations[index]
        station_end = stations[index + 1]
        stages.append(station_start + station_end + [phone_id, trip_id, '{0}-{1}'.format(trip_id, phone_id)])

print('Número de viajes: {0}'.format(len(trips.keys())))
print('Número de etapas: {0}'.format(len(stages)))
"""
# contar n° personas por etapa
stages2 = {}
for stage in stages:
    stage_id = '{0}-{1}'.format(stage[1], stage[5])
    if stage_id not in stages2:
        stages2[stage_id] = {
            'station_start': stage[1],
            'station_end': stage[5],
            'lat_start': stage[3],
            'lon_start': stage[4],
            'lat_end': stage[7],
            'lon_end': stage[8],
            'trip_number': 1
        }
    else:
        stages2[stage_id]['trip_number'] += 1

stages = []
for stage in stages2:
    i = stages2[stage]
    stages.append([i['trip_number'], i['station_start'], '', i['lat_start'], i['lon_start'], 
                   i['station_end'], '', i['lat_end'], i['lon_end'], ''])
"""

Número de viajes: 42
Número de etapas: 431


"\n# contar n° personas por etapa\nstages2 = {}\nfor stage in stages:\n    stage_id = '{0}-{1}'.format(stage[1], stage[5])\n    if stage_id not in stages2:\n        stages2[stage_id] = {\n            'station_start': stage[1],\n            'station_end': stage[5],\n            'lat_start': stage[3],\n            'lon_start': stage[4],\n            'lat_end': stage[7],\n            'lon_end': stage[8],\n            'trip_number': 1\n        }\n    else:\n        stages2[stage_id]['trip_number'] += 1\n\nstages = []\nfor stage in stages2:\n    i = stages2[stage]\n    stages.append([i['trip_number'], i['station_start'], '', i['lat_start'], i['lon_start'], \n                   i['station_end'], '', i['lat_end'], i['lon_end'], ''])\n"

# construir archivo para kepler

In [158]:
output_path = os.path.join(data_path, 'output.csv')

header = ['station_name_start', 'timestamp_start', 'lat_start', 'lon_start', 
          'station_name_end', 'timestamp_end', 'lat_end', 'lon_end', 'phone_id', 'trip_id', 'trip_id-phone_id']

stop_error = set()
with open(output_path, 'w') as csvfile:
    spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    spamwriter.writerow(header)
    for stage in stages:
        spamwriter.writerow(stage)

# Procesar shapes de GTFS para obtener líneas de metro

In [159]:
import time

from geojson import LineString, Feature, FeatureCollection
from IPython import display

gtfs_shapes_path = os.path.join(data_path, '..', 'GTFS', 'shapes.csv')

shape_ids = ['L1-R', 'L1-I', 
             'L2-R', 'L2-I', 
             'L5-R', 'L5-I', 
             'L6-R', 'L6-I', 
             'L4-R', 'L4-I', 
             'L4A-R', 'L4A-I']
color = ['#C91B3E', '#C91B3E', 
         '#F2B64A', '#F2B64A',
         '#21915B', '#21915B',
         '#953E83', '#953E83',
         '#22306E', '#22306E',
         '#2776AB', '#2776AB']

shapes_dict = defaultdict(list)

with open(gtfs_shapes_path, 'r') as csvfile:
    spamreader = csv.DictReader(csvfile, delimiter=',')
    for row in spamreader:
        line_id = row['shape_id'][:-4]
        
        if line_id in ids:
            lon_lat = (float(row['shape_pt_lon']), float(row['shape_pt_lat']))
            shapes_dict[line_id].append(lon_lat)

features = []
for shape_id in shapes_dict:
    shape_points = shapes_dict[shape_id]
    geometry = LineString(shape_points)
    feature_color = color[shape_ids.index(shape_id)]
    feature = Feature(geometry=geometry, properties={'color': feature_color})
    
    if False:
        # ====================================
        beauchef_location = [-33.455328, -70.6682527]
        zoom = 11
        m = folium.Map(location=beauchef_location, zoom_start=zoom)
        folium.GeoJson(geometry, name='geojson').add_to(m)

        display.clear_output(wait=True)
        display.display(m)
        time.sleep(1.0)
        # ========================================
    features.append(feature)

collection = FeatureCollection(features)
collection.errors()

[]

# Guardar shape en un archivo

In [160]:
from geojson import dumps as geojson_dumps

metbus_shape_path = os.path.join(data_path, 'line_shapes.geojson')

with open(metbus_shape_path, 'w') as shape_file:
    shape_file.write(geojson_dumps(collection))

# Revisar shape generado

In [177]:
import geojson

beauchef_location = [-33.455328, -70.6682527]
zoom = 11
m = folium.Map(location=beauchef_location, zoom_start=zoom)

folium.GeoJson(str(collection), name='geojson').add_to(m)

m