# **Exporting data from BigQuery to Google Cloud Storage**

In this notebook, we export BigQuery data to GCS so that we can reuse our Keras model that was developed on CSV data.

In [None]:
import os

from google.cloud import bigquery

In [None]:
# Change with your own bucket and project below:
BUCKET = "<BUCKET>"
PROJECT = "<PROJECT>"

OUTDIR = "gs://{bucket}/taxifare/data".format(bucket=BUCKET)

os.environ["BUCKET"] = BUCKET
os.environ["OUTDIR"] = OUTDIR
os.environ["PROJECT"] = PROJECT

## **Create BigQuery tables**

In [None]:
# Initialise a BigQuery client
bq = bigquery.Client(project=PROJECT)
dataset = bigquery.Dataset(bq.dataset("taxifare"))

# Create a new data set with the `create_dataset()` method
try:
    bq.create_dataset(dataset)
    print("Dataset created")
except:
    print("Dataset already exists")

Let's create a table with 1 million examples.

Note that the order of columns is exactly what it was in our CSV files.

In [None]:
%%bigquery

CREATE OR REPLACE TABLE taxifare.feateng_training_data AS

SELECT
    (tolls_amount + fare_amount) AS fare_amount,
    pickup_datetime,
    pickup_longitude AS pickuplon,
    pickup_latitude AS pickuplat,
    dropoff_longitude AS dropofflon,
    dropoff_latitude AS dropofflat,
    passenger_count*1.0 AS passengers,
    "unusued" AS key
FROM 
    `nyc-tlc.yellow.trips`
WHERE
    ABS(MOD(FARM_FINGERPRINT(CAST(pickup_datetime AS STRING)), 1000)) = 1
AND
    trip_distance > 0
AND
    fare_amount >= 2.5
AND
    pickup_longitude > -78
AND
    pickup_longitude < 70
AND
    dropoff_longitude > -78
AND
    dropoff_longitude < -70
AND
    pickup_latitude > 37
AND
    pickup_latitude < 45
AND
    dropoff_latitude > 37
AND
    dropoff_latitude < 45
AND
    passenger_count > 0