In [1]:
import os 
import requests
from bs4 import BeautifulSoup
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Comment out API keys to avoid running cells by accident

with open("../../../src/api_credentials.json") as f:
    purple_air_creds = json.load(f)["purple_air_credentials"]

# api read key created at https://develop.purpleair.com/keys
api_read_key = purple_air_creds["purple_air_read_key"]

# api write key created at https://develop.purpleair.com/keys
api_write_key = purple_air_creds["purple_air_write_key"]

In [7]:
# open variables json and assign to p_air_vars variable
with open('./EDA/purple-air/purple-air-variables.json') as f:
    p_air_vars = json.load(f)

# pull in the list of test sensors as sensor_test
sensor_test = p_air_vars['sensor_test']
sensor_test

[53, 77, 81, 443, 820]

In [8]:
# create function to pull all data from a sensor
def pull_sensor_data(sensor_id, p_air_READ_API_key):
    url = f"https://api.purpleair.com/v1/sensors/{sensor_id}"
    headers = {
        "X-API-Key": p_air_READ_API_key
        }
    
    sensor_response = requests.get(url, headers=headers)
    sensor_response_json = sensor_response.json()
    sensor_data = sensor_response_json.get('sensor')
    return sensor_data

In [9]:
list_of_test_sensors = []

for sensor in sensor_test:
    sensor_data = pull_sensor_data(sensor, api_read_key)
    list_of_test_sensors.append(sensor_data)

In [10]:
sample_sensor_df = pd.DataFrame(list_of_test_sensors)
sample_sensor_df

Unnamed: 0,sensor_index,last_modified,date_created,last_seen,private,is_owner,name,icon,location_type,model,...,5.0_um_count_b,10.0_um_count_b,pm1.0_cf_1_b,pm1.0_atm_b,pm2.5_atm_b,pm2.5_cf_1_b,pm10.0_atm_b,pm10.0_cf_1_b,stats_b,altitude
0,53,1520025982,1454548891,1708014938,0,0,Lakeshore,0,0,UNKNOWN,...,,,,,,,,,,
1,77,1575074907,1456896339,1708014855,0,0,Sunnyside,0,0,PA-I,...,,,,,,,,,,
2,81,1465680292,1465657200,1708014946,0,0,Sherwood Hills 2,0,0,UNKNOWN,...,,,,,,,,,,
3,443,1559966508,1478491864,1708014870,0,0,Weber-Morgan Health Department P1,0,0,PA-II,...,0.0,0.0,8.06,8.06,8.45,8.45,8.85,8.85,"{'pm2.5': 8.4, 'pm2.5_10minute': 8.9, 'pm2.5_3...",
4,820,1575003401,1483643179,1708014887,0,0,Granite Basement,0,1,PA-II,...,0.0,0.0,0.13,0.13,0.18,0.18,0.28,0.28,"{'pm2.5': 0.2, 'pm2.5_10minute': 0.3, 'pm2.5_3...",5131.0


In [21]:
def pull_sensor_data_v2(sensor_ids, p_air_READ_API_key, fields):
    """
    Retrieve sensor data from the PurpleAir API for multiple sensors.

    Args:
    - sensor_ids (list of int): List of sensor IDs to retrieve data for.
    - p_air_READ_API_key (str): Your PurpleAir API key for reading data.
    - fields (list of str): A list of fields to retrieve for each sensor.

    Returns:
    - list of dict: List of sensor data dictionaries containing the specified fields for each sensor.
    """
    
    # Join the list of fields into a comma-separated string
    fields_str = ",".join(fields)

    # Initiate empty list to hold data for all sensor_ids identified
    list_of_sensor_data = []
    
    for sensor_id in sensor_ids:
        # Construct the URL for the API request
        url = f"https://api.purpleair.com/v1/sensors/{sensor_id}"
        
        # Set the query parameters for the request  
        params = {
            "fields": fields_str
        }

        # Set the headers including the API key
        headers = {
            "X-API-Key": p_air_READ_API_key
        }

        # Send the GET request to the PurpleAir API
        sensor_response = requests.get(url, params=params, headers=headers)

        # Parse the JSON response
        sensor_response_json = sensor_response.json()

        # Extract the sensor data from the response
        sensor_data = sensor_response_json.get('sensor')
        
        list_of_sensor_data.append(sensor_data)

    # return the fill list of sensor data for all sensor_ids
    return list_of_sensor_data


In [25]:
# Test Cell

#field = ['location_type','latitude']

#list_of_test_sensors_v2 = pull_sensor_data_v2(sensor_ids = sensor_test, p_air_READ_API_key = api_read_key, fields=field)

#sample_sensor_df_v2 = pd.DataFrame(list_of_test_sensors_v2)
#sample_sensor_df_v2

In [27]:
final_field_list = ['location_type',
                   'latitude',
                   'longitude',
                   'scattering_coefficient',
                   'deciview',
                   'visual_range',
                   'pressure',
                   'temperature',
                   'altitude',
                   'humidity']

slc_sensor_list = p_air_vars['slc_sensor_list']

In [None]:
# call all sensors with the wanted fields
#purpleair_slc_sensors = pull_sensor_data_v2(sensor_ids = slc_sensor_list,
#                                            p_air_READ_API_key = api_read_key,
#                                            fields = final_field_list)

# turn it into a dataframe
#purpleair_slc_sensors_df = pd.DataFrame(purpleair_slc_sensors)
#purpleair_slc_sensors_df

# export to csv
#purpleair_slc_sensors_df.to_csv("datapull.csv")