# Purpose
Download weather data    
Download aggregate hourly data for stations (incoming /outgoing)

In [1]:
import pandas as pd
import google.auth


In [2]:
_, project_id = google.auth.default()

# Trips Data
Return hourly incoming and outgoing data

In [11]:
trips_query = """
WITH
  departing AS (
      SELECT
        DATE(starttime) date,
        EXTRACT(HOUR
        FROM
          starttime) hour,
        start_station_id station_id,
        COUNT(*) AS departing_bikes
      FROM
        `np-training.ableto.citibike`
      GROUP BY
        date,
        hour,
        station_id
    ),
  
  incoming AS(
      SELECT
        DATE(stoptime) date,
        EXTRACT(HOUR
        FROM
          stoptime) hour,
        end_station_id station_id,
        COUNT(*) AS incoming_bikes
      FROM
        `np-training.ableto.citibike`
      GROUP BY
        date,
        hour,
        station_id 
    )
SELECT
  COALESCE(d.date,
    i.date) AS date,
  COALESCE(d.hour,
    i.hour) AS hour,
  COALESCE(d.station_id,
    i.station_id) AS station_id,
  IFNULL(d.departing_bikes,
    0) AS departing_bikes,
  IFNULL(i.incoming_bikes,
    0) AS incoming_bikes
FROM
  departing d
FULL OUTER JOIN
  incoming i
ON
  d.date = i.date
  AND d.hour = i.hour
  AND d.station_id = i.station_id
  
"""

In [12]:
trips_df = pd.read_gbq(trips_query, project_id=project_id,reauth=True,dialect='standard')

Requesting query... ok.
Job ID: job_rSLeaSuKlGKG91ey8WwQDPFTQjJY
Query running...
Query done.
Cache hit.

Retrieving results...
  Got page: 2; 2% done. Elapsed 11.51 s.
  Got page: 3; 3% done. Elapsed 16.13 s.
  Got page: 4; 4% done. Elapsed 20.59 s.
  Got page: 5; 5% done. Elapsed 26.18 s.
  Got page: 6; 6% done. Elapsed 30.97 s.
  Got page: 7; 7% done. Elapsed 36.71 s.
  Got page: 8; 8% done. Elapsed 41.31 s.
  Got page: 9; 8% done. Elapsed 47.64 s.
  Got page: 10; 9% done. Elapsed 52.4 s.
  Got page: 11; 10% done. Elapsed 57.21 s.
  Got page: 12; 11% done. Elapsed 62.34 s.
  Got page: 13; 12% done. Elapsed 67.46 s.
  Got page: 14; 13% done. Elapsed 74.0 s.
  Got page: 15; 14% done. Elapsed 79.8 s.
  Got page: 16; 15% done. Elapsed 84.74 s.
  Got page: 17; 16% done. Elapsed 89.93 s.
  Got page: 18; 17% done. Elapsed 95.45 s.
  Got page: 19; 18% done. Elapsed 101.89 s.
  Got page: 20; 19% done. Elapsed 108.81 s.
  Got page: 21; 20% done. Elapsed 113.55 s.
  Got page: 22; 21% done. Ela

In [13]:
trips_df.to_csv('trips.csv.gz',index=False, compression='gzip')

In [None]:
!ls -lah

# Station Info
Contains current capacity and info

In [4]:
station_info_query = """

SELECT
  *
  FROM `bigquery-public-data.new_york.citibike_stations`
"""

In [5]:
stations_df = pd.read_gbq(station_info_query, project_id=project_id,dialect='standard',verbose=False )

In [6]:
stations_df.to_csv('stations.csv.gz',index=False, compression='gzip')

# Weather Data

In [7]:
# NOAA Station
# According to  http://forecast.weather.gov/stations.php?foo=0 , zip code is 10018
#http://forecast.weather.gov/MapClick.php?CityName=New+York&state=NY&site=OKX&lat=40.7198&lon=-73.993
# New York City, Central Park (KNYC)
# 725030 = La Guardia

In [5]:
weather_query = """

SELECT
  stn, wban, DATE(CAST (year as INT64) , Cast(mo as INT64) , CAST(da as INT64 )) as date, temp, count_temp, prcp
      , fog as is_foggy
      , rain_drizzle as is_raining
      , snow_ice_pellets as is_snowing
      
FROM
  `bigquery-public-data.noaa_gsod.gsod20*`
WHERE
  stn = '725030' 
AND _TABLE_SUFFIX BETWEEN '15'
  AND '17'



"""

In [6]:
weather_df = pd.read_gbq(weather_query, project_id=project_id,dialect='standard',verbose=False )

In [7]:
weather_df.to_csv('weather.csv.gz',compression='gzip',index=False)