# BigQuery Public Datasets Tests and Samples

### Notebook to demonstrate ability to run queries on public datasets.

Assumes all steps in GoogleConnectandTests notebook have been completed

## Start by setting variables and importing bigquery package

In [1]:
#Must be run again after restarting kernel
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/<path-to-your-key>/<keyname>.json"

In [2]:
from google.cloud import bigquery  #enable BigQuery - rerun at kernel restart

## Test to see if we can connect to a public dataset

In [5]:
dataset_id = 'bigquery-public-data.covid19_aha' #specify the dataset you want to work with,located in bigquery-public-data 

client = bigquery.Client()

tables = client.list_tables(dataset_id)  # Make an API request.

print("Tables contained in '{}':".format(dataset_id))
for table in tables:
    print("{}.{}.{}".format(table.project, table.dataset_id, table.table_id))

Tables contained in 'bigquery-public-data.covid19_aha':
bigquery-public-data.covid19_aha.hospital_beds
bigquery-public-data.covid19_aha.staffing


## Get Column Names

In [28]:
query_job = client.query(
    """
   SELECT * FROM `bigquery-public-data.covid19_aha.INFORMATION_SCHEMA.COLUMNS` WHERE table_name='hospital_beds'"""  #Limit here is just for display purposes. This can also be viewed in the GCP Console
)
results = query_job.result()  # Waits for job to complete.

for row in results:
    print("{} ".format(row.column_name))

county_fips_code 
county_name 
state_name 
cbsa_code 
total_hospital_beds 
acute_long_term_care_beds 
alcohol_drug_abuse_dependency_inpatient_care_beds 
burn_care_beds 
cardiac_intensive_care_beds 
gen_medical_surgical_adult_beds 
gen_medical_surgical_pediatric_beds 
intermediate_nursing_care_beds 
medical_surgical_intensive_care_beds 
neonatal_intensive_care_beds 
neonatal_intermediate_care_beds 
obstetric_care_beds 
other_care_beds 
other_intensive_care_beds 
other_long_term_care_beds 
other_special_care_beds 
pediatric_intensive_care_beds 
physical_rehabilitation_care_beds 
psychiatric_care_beds 
skilled_nursing_care_beds 
num_airborne_infection_isolation_rooms 


## Run a query on the public dataset and print the results

In [27]:
query_job = client.query(
    """
   SELECT county_name, state_name, total_hospital_beds FROM `bigquery-public-data.covid19_aha.hospital_beds` LIMIT 10"""
)
results = query_job.result()  # Waits for job to complete.

for row in results:
    print("{} : {} : {} ".format(row.county_name, row.state_name, row.total_hospital_beds))

Windsor County : Vermont : 70 
Apache County : Arizona : 143 
Iberville Parish : Louisiana : 8 
Adair County : Oklahoma : 34 
Graves County : Kentucky : 227 
Platte County : Nebraska : 51 
Lincoln Parish : Louisiana : 177 
Lawrence County : Tennessee : 99 
Geneva County : Alabama : 147 
Hamilton County : Illinois : 25 


# We have now successfully connected to and queried a public dataset using BigQuery.

In [13]:
assert False, "DO NOT REMOVE THIS LINE"

AssertionError: DO NOT REMOVE THIS LINE

In [3]:
%%bash
git add GoogleBigQueryTests-generic.ipynb
git commit -a -m "Added connect to, describe, and query a public dataset"
git push

[main 8039cfa] FirstStabConnectingExternalJupyterToGCP
 1 file changed, 1 insertion(+), 1 deletion(-)


To github.com:jasondeden/GCP-Jupyter.git
   51e2154..8039cfa  main -> main
