In [1]:
import requests as r
import pandas as pd
import datetime as dt
import io
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'

In [0]:
from google.colab import files, drive

## Setting up folder

Login to Google Drive and set `basepath` as the home folder where all files will be stored.

In [12]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
basepath = '/content/drive/My Drive/PurpleAir/'

In [0]:
sensors_us = pd.read_csv(basepath+'sensors_us.csv')

Source: https://github.com/bomeara/purpleairpy/blob/master/api.md

In [0]:
field_names = {'field1': 'PM1.0_ATM', 'field2': 'PM2.5_ATM', 'field3': 'PM10.0_ATM', 'field4': 'Uptime_min', 'field5': 'RSSI (WiFi Signal Strength)', 'field6': 'Temperature_F', 'field7': 'Humidity_%', 'field8': 'PM2.5'}

## Select sensors

Select sensors to download data for based on city, state, specific label names or zipcode. Simply uncomment the selection criteria

In [0]:
city = 'Austin'
#state = 'TX'
#label_names = ['Columbia South', 'Columbia South B', 'Columbia University SEAS', 'Columbia University SEAS B']
#zipcode = 77840

Modify parameters below to select date range and averaging

In [0]:
start_date = '2019-10-01%2000:00:00' # Beginning of Oct 2019
end_date =  dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # time now
averaging = '60' # minutes 60 minutes = hourly average, 1440 mins = daily average

In [0]:
sensor_meta = ['ID', 'Label', 'Lat', 'Lon', 'ParentID', 'THINGSPEAK_PRIMARY_ID', 'THINGSPEAK_PRIMARY_ID_READ_KEY', 'THINGSPEAK_SECONDARY_ID', 'THINGSPEAK_SECONDARY_ID_READ_KEY', 'humidity', 'pressure', 'temp_f', 'street_address', 'city', 'state', 'zipcode']

Uncomment below based on selection criteria

In [0]:
#sensors_state = sensors_us.loc[sensors_us['state'] == state, sensor_meta].reset_index(drop=True)
#sensors_state.head()

In [0]:
sensors_city = sensors_us.loc[sensors_us['city'] == city, sensor_meta].reset_index(drop=True)
sensors_city.head()

In [0]:
#sensors_zip = sensors_us.loc[(sensors_us['zipcode'] < zipcode+200) & (sensors_us['zipcode'] > zipcode-200), sensor_meta].reset_index(drop=True)
#sensors_zip.head()

In [0]:
ID_names = sensors_us[sensors_us['city'] == city.title()].ID.reset_index(drop=True)

In [0]:
#ID_names = sensors_us[sensors_us['state'] == state.title()].ID.reset_index(drop=True)

In [0]:
def get_hist_data(sensor_ID):
    sensor = sensors_us[sensors_us.ID == sensor_ID]
    sensor_name = sensor.Label.values[0].replace(' ', '_')
    
    channel_id = sensor['THINGSPEAK_PRIMARY_ID'].values[0]
    api_key = sensor['THINGSPEAK_PRIMARY_ID_READ_KEY'].values[0]
       
    ts_url = f'https://api.thingspeak.com/channels/{channel_id}/feeds.csv?&api_key={api_key}&start={start_date}&end={end_date}&average={averaging}'
    ts_data = r.get(ts_url)
    
    if not ts_data.ok:
        print(ts_data.reason)
    else:
        sensor_data = pd.read_csv(io.StringIO(ts_data.text))
        if sensor_name[-2:] == '_B':
            field_names = {'field8': f'PM2.5_{sensor_ID}'}
            req_cols = ['created_at', f'PM2.5_{sensor_ID}']
        else:
            field_names = {'field6': f'Temperature_F_{sensor_ID}', 'field7': f'Humidity_%_{sensor_ID}', 'field8': f'PM2.5_{sensor_ID}'}
            req_cols = ['created_at', f'Temperature_F_{sensor_ID}', f'Humidity_%_{sensor_ID}', f'PM2.5_{sensor_ID}']
        sensor_data.rename(mapper = field_names, axis = 'columns', inplace = True)
        return sensor_data[req_cols]

In [0]:
def get_multiple_sensors(ID_names):
    n = len(ID_names)
    sensors_df = get_hist_data(ID_names[0])
    print(sensors_df.head())
    for i in range(1,n):
        temp_df = get_hist_data(ID_names[i])
        sensors_df = sensors_df.merge(temp_df, how = 'outer', on = 'created_at', sort = True)
    
    sensors_df['Temperature_F_avg'] = sensors_df.filter(regex = 'Temperature.*').mean(axis = 1)
    sensors_df['Humidity_avg'] = sensors_df.filter(regex = 'Humidity.*').mean(axis = 1)
    return sensors_df

In [0]:
ID_names

In [0]:
sensors_df = get_multiple_sensors(ID_names)

In [0]:
sensors_df.head()

In [0]:
sensors_df.shape

In [0]:
city_f = city.replace(' ','-')
#state_f = state.replace(' ','-')
#zipcode_f = str(zipcode)

In [0]:
sensor_data_filename = f'{city_f}-{start_date[:10]}-to-{end_date[:10]}-{averaging}.csv'

In [0]:
sensors_df.to_csv(basepath+sensor_data_filename, index = False)

In [0]:
sensors_df.iloc[:,11].plot(figsize = [10,8])