In this notebook
- get raw data from CityBikes
- make a table of trips between stations
- use co-ordinates to get best routes between stations
- connect to a database

In [None]:
!pip install -q geojson geoalchemy2 geopandas requests shapely pandas sqlalchemy psycopg2-binary

In [1]:
import csv 
import json
import os
import requests
import sys

import geojson
from geoalchemy2 import Geometry, WKTElement
from geojson import Feature, Point, FeatureCollection
import geopandas as gpd
from geopandas import GeoDataFrame
import pandas as pd
from sqlalchemy import *
from shapely.geometry import Point
import psycopg2

In [2]:
r = requests.get('https://tor.publicbikesystem.net/ube/gbfs/v1/en/station_information')

In [None]:
bikeshare_stations = pd.DataFrame(json.loads(r.content)['data']['stations'])[['station_id', 'name', 'lat', 'lon']].astype({
    'station_id': 'float64',
})


In [None]:
bikeshare_stations.head()

In [None]:
# Creating SQLAlchemy's engine to use
engine = create_engine('postgresql://username:password@localhost:5432/atedeschi')

In [None]:
engine

In [None]:
##Method using csv
##df = pd.read_csv("bikeshare_stations.csv", delimiter=",")

df = bikeshare_stations

In [None]:
df.head()

In [None]:
## geometry 

gdf = GeoDataFrame(
    df.drop(['lon', 'lat'], axis=1),
    crs={'init': 'epsg:4326'},
    geometry=[Point(xy) for xy in zip(df.lon, df.lat)])

def create_wkt_element(geom):
    return WKTElement(geom.wkt, srid = 4326)

In [None]:
gdf.head()

In [None]:
gdf['geometry'] = gdf['geometry'].apply(create_wkt_element)



In [None]:
 #   if_exists = replace: If table exists, drop it, recreate it, and insert data.
 #   if_exists = fail: If table exists, do nothing.
 #   if_exists = append: If table exists, insert data. Create if does not exist.

In [None]:

gdf.to_sql("stations", engine, if_exists='replace', index=False,
                         dtype={'geometry': Geometry('POINT', 4326)})

SRID 4326 = WGS84 

World Geodetic System. 

![WGS84](images/WGS84.png)

Global Positioning System uses the World Geodetic System (WGS84) as its reference coordinate system.

PostGIS opens up the ability to store your data in a single coordinate system such as WGS84 (SRID 4326), and when you need something like Area, Distance, or Length, you use a function to create that column from your datain a projected coordinate system that will give you a local interpretation of your data in units that you want.

So for example, I could store students and schools in PostGIS both in WGS84/SRID:4326. When I want to calculate the distance between students and the schools they attend, I call a distance function on my geometry column, but also wrap a ST_Transform function around the geometry column first to 'project' the data into State Plane CO Central (SRID: 2877). This gives me a column for the distance of each student to their closest school in feet because SRID:2877 is a projected coordinate system that stores data in Feet.

## Fun fact ! ##

Well-known text (WKT) is a text markup language for representing vector geometry objects on a map. 


In [None]:
# Q1 and Q2 = hours and minutes
#day/month/year

#Q3 month/day/year hours and minutes
#Q4 month/day/year hours minutes and seconds

q1 = pd.read_csv("ridership1.csv")
q2 = pd.read_csv("ridership2.csv")
q3 = pd.read_csv("ridership3.csv")
q4 = pd.read_csv("ridership4.csv")



sids = pd.concat([q1,q2])



In [None]:

sids = sids.drop(['from_station_id', 'from_station_name', 'trip_duration_seconds', 'trip_start_time', \
'trip_stop_time', 'user_type', 'trip_id'], axis  =1 )
                  


In [None]:
sids = sids.drop_duplicates(keep = 'last')

In [None]:
sids.columns = ['id', 'station_name']


In [None]:
q1.drop(['from_station_id','to_station_id'], axis=1, inplace =True)
q2.drop(['from_station_id','to_station_id'], axis=1, inplace =True)


In [None]:
a = pd.concat([q1,q2])
a = a.dropna(how='any') 



In [None]:
start_times = a['trip_start_time'] 
a['trip_start_time'] = [datetime.strptime(time, "%d/%m/%Y %H:%M") for time in start_times]

stop_times = a['trip_stop_time'] 
a['trip_stop_time'] = [datetime.strptime(time, "%d/%m/%Y %H:%M") for time in stop_times]


In [None]:
start_times = q3['trip_start_time'] 
q3['trip_start_time'] = [datetime.strptime(time, "%m/%d/%Y %H:%M") for time in start_times]

stop_times = q3['trip_stop_time'] 
q3['trip_stop_time'] = [datetime.strptime(time, "%m/%d/%Y %H:%M") for time in stop_times]

b = q3
b = b.dropna(how='any') 


In [None]:
q4.dtypes

In [None]:
q4 = q4.dropna(how='any') 

start_times = q4['trip_start_time'] 
q4['trip_start_time'] = [datetime.strptime(time, "%m/%d/%y %H:%M:%S") for time in start_times]

stop_times = q4['trip_stop_time'] 
q4['trip_stop_time'] = [datetime.strptime(time, "%m/%d/%y %H:%M:%S") for time in stop_times]


c = q4


In [None]:

ct = pd.concat([a,b,c])


In [None]:
#con = engine.connect()
sids = pd.read_sql_query("SELECT station_id, ST_Y(geometry) as lat, \
                         ST_X(geometry) as lon FROM stations ", con)
#sids.head()


In [None]:
sids.head()

In [None]:
ct = pd.merge(ct, sids, left_on = 'from_station_name', right_on = 'station_name')
ct.drop(['station_name'], inplace = True, axis = 1)
ct = ct.rename(columns = {'id': 'from_station_id'})



In [None]:
ct = pd.merge(ct, sids, left_on = 'to_station_name', right_on = 'station_name')
ct.drop(['station_name'], inplace = True, axis = 1)
ct = ct.rename(columns = {'id': 'to_station_id'})




In [None]:
ct.isna().sum()

In [None]:
ct = pd.read_csv('clean_trips.csv')

In [None]:

ct = pd.read_csv('clean_trips.csv')
ct.groupby(['from_station_id', 'to_station_id']).size().reset_index().rename(columns={0: 'count'})



In [None]:
#con = engine.connect()
sids = pd.read_sql_query("SELECT station_id, ST_Y(geometry) as lat, \
                         ST_X(geometry) as lon FROM stations ", con)

sids['station_id'] = sids['station_id'].astype('int32')

sids.head()

sids.to_csv("sids.csv")


In [None]:
ct = pd.read_csv('clean_trips.csv')
ct = ct.groupby(['from_station_id', 'to_station_id']).size().reset_index().rename(columns={0: 'count'})


In [None]:
pairs = pd.merge(ct, sids, left_on = 'from_station_id', right_on = 'station_id')

In [None]:
pairs.head()

In [None]:
pairs = pairs.rename(columns = {'lat': 'from_station_lat', 'lon': 'from_station_lon'})


In [None]:
pairs = pd.merge(pairs, sids, left_on = 'to_station_id', right_on = 'station_id')

In [None]:
pairs = pairs.rename(columns = {'lat': 'to_station_lat', 'lon': 'to_station_lon'})

In [None]:
pairs.drop(['station_id_x', 'station_id_y'], axis = 1, inplace=True)

In [None]:
pairs.to_csv("pairs.csv")

In [None]:
pairs.head()

In [None]:
 source =str(pairs['from_station_lon'][0]) + ',' + str(pairs['from_station_lat'][0]) 

In [None]:
source

In [None]:

features = []

item = 0

for index, pair in pairs.iterrows():
    
    source_coordinates = str(pair['from_station_lon']) + ',' + str(pair['from_station_lat']) + ';' 
    print(type(source_coordinates))
    
    dest_coordinates = str(pair['to_station_lon']) + ',' + str(pair['to_station_lat']) 

    item += 1
    
    url =  'http://router.project-osrm.org/route/v1/driving/'+source_coordinates+dest_coordinates

    payload = {"steps":"true","geometries":"geojson"}

    response = requests.get(url,params=payload)

    data = response.json()
    #print(data)
    print(item)
    #print(data['routes'][0]['geometry'])
    features.append(data['routes'][0]['geometry'])

    

Please iterate for all trips


In [None]:
feature_collection_


In [None]:
from shapely.geometry import Point, Polygon, MultiPolygon, LineString
new_features = []


for feature in features:
  line = LineString(feature['coordinates'])
  
  feature = Feature(
    geometry=line,
    properties={}
  )

  new_features.append(feature)




In [None]:
len(new_features)

In [None]:

feature_collection = FeatureCollection(new_features)

with open('paths.geojson', 'w') as f:
  f.write(geojson.dumps(feature_collection))

In [None]:
query = "SELECT  \
n.area_name, \
SUM(ST_Length( \
    ST_Intersection(p.wkb_geometry::geography, \
                n.wkb_geometry::geography))) as length \
\
FROM neighborhoods n \
INNER JOIN paths p ON ST_Intersects(n.wkb_geometry, p.wkb_geometry) \
GROUP BY 1 "

con = engine.connect()
output = pd.read_sql_query(query, con)


### PostGIS exercises

In [None]:

postgres://ubuntu:nyc@localhost/nyc 

In [None]:
engine = create_engine('postgres://ubuntu:nyc@localhost/nyc ')


In [None]:
query  =  "SELECT * FROM pg_catalog.pg_tables where schemaname = 'public'"

In [None]:

ogr2ogr -f PostgreSQL PG:host='database-1.cpu2z0a5bugq.us-east-2.rds.amazonaws.com' port='5432' dbname='postgres' password ='postgres' user='postgres' 

In [None]:
engine = create_engine('postgresql://postgres:postgres@database-1.cpu2z0a5bugq.us-east-2.rds.amazonaws.com:5432/postgres')


In [None]:
con = engine.connect()

In [None]:
query = "SELECT * FROM pg_catalog.pg_tables where schemaname = 'public'"

pd.read_sql_query(query, con)