In [1]:
from google.cloud import bigquery

In [3]:
client = bigquery.Client()

# Construct a reference to the "hacker_news" dataset
dataset_ref = client.dataset("openaq", project="bigquery-public-data")

# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)

In [6]:
tables = list(client.list_tables(dataset))

print("Number of tables: " + str(len(tables)))
print("Table Names: ")
for table in tables:  
    print(table.table_id)

Number of tables: 1
Table Names: 
global_air_quality


In [8]:
# Construct a reference to the "full" table
gaq_table_ref = dataset_ref.table("global_air_quality")

# API request - fetch the table
gaq_table = client.get_table(gaq_table_ref)

In [9]:
gaq_table.schema

[SchemaField('location', 'STRING', 'NULLABLE', 'Location where data was measured', ()),
 SchemaField('city', 'STRING', 'NULLABLE', 'City containing location', ()),
 SchemaField('country', 'STRING', 'NULLABLE', 'Country containing measurement in 2 letter ISO code', ()),
 SchemaField('pollutant', 'STRING', 'NULLABLE', 'Name of the Pollutant being measured. Allowed values: PM25, PM10, SO2, NO2, O3, CO, BC', ()),
 SchemaField('value', 'FLOAT', 'NULLABLE', 'Latest measured value for the pollutant', ()),
 SchemaField('timestamp', 'TIMESTAMP', 'NULLABLE', 'The datetime at which the pollutant was measured, in ISO 8601 format', ()),
 SchemaField('unit', 'STRING', 'NULLABLE', 'The unit the value was measured in coded by UCUM Code', ()),
 SchemaField('source_name', 'STRING', 'NULLABLE', 'Name of the source of the data', ()),
 SchemaField('latitude', 'FLOAT', 'NULLABLE', 'Latitude in decimal degrees. Precision >3 decimal points.', ()),
 SchemaField('longitude', 'FLOAT', 'NULLABLE', 'Longitude in d

In [13]:
gaq_df = client.list_rows(gaq_table).to_dataframe()
gaq_df.head()

Unnamed: 0,location,city,country,pollutant,value,timestamp,unit,source_name,latitude,longitude,averaged_over_in_hours
0,"BTM Layout, Bengaluru - KSPCB",Bengaluru,IN,co,910.0,2018-02-22 03:00:00+00:00,µg/m³,CPCB,12.912811,77.60922,0.25
1,"BTM Layout, Bengaluru - KSPCB",Bengaluru,IN,no2,131.87,2018-02-22 03:00:00+00:00,µg/m³,CPCB,12.912811,77.60922,0.25
2,"BTM Layout, Bengaluru - KSPCB",Bengaluru,IN,o3,15.57,2018-02-22 03:00:00+00:00,µg/m³,CPCB,12.912811,77.60922,0.25
3,"BTM Layout, Bengaluru - KSPCB",Bengaluru,IN,pm25,45.62,2018-02-22 03:00:00+00:00,µg/m³,CPCB,12.912811,77.60922,0.25
4,"BTM Layout, Bengaluru - KSPCB",Bengaluru,IN,so2,4.49,2018-02-22 03:00:00+00:00,µg/m³,CPCB,12.912811,77.60922,0.25


In [20]:
query = """
        SELECT city
        FROM `bigquery-public-data.openaq.global_air_quality`
        WHERE country = 'US'
        """

In [21]:
query_job = client.query(query)
cities = query_job.to_dataframe()

In [25]:
cities.city.value_counts()

Phoenix-Mesa-Scottsdale             88
Houston                             78
Los Angeles-Long Beach-Santa Ana    68
Riverside-San Bernardino-Ontario    58
San Francisco-Oakland-Fremont       58
                                    ..
BELTRAMI                             1
Moscow                               1
BOULDER                              1
TWIN FALLS                           1
GUNNISON                             1
Name: city, Length: 769, dtype: int64

In [30]:
cityCountryquery = """
        SELECT city, country
        FROM `bigquery-public-data.openaq.global_air_quality`
        """

In [31]:
query_job = client.query(cityCountryquery)
citiesCountry = query_job.to_dataframe()

In [32]:
citiesCountry.head()

Unnamed: 0,city,country
0,Bengaluru,IN
1,Bengaluru,IN
2,Bengaluru,IN
3,Bengaluru,IN
4,Bengaluru,IN


In [33]:
citiesCountry.country.value_counts()

CN    4962
US    3678
ES    2120
FR    1592
IN    1452
      ... 
MM       1
TJ       1
TM       1
DZ       1
BM       1
Name: country, Length: 87, dtype: int64

In [35]:
citiesCountry.shape

(23941, 2)