# UNMC Water, Climate and Health Group Air Quality Data

In [178]:
#import necessary packages
import pandas as pd

In [179]:
#load data

air_data = pd.read_csv('AirQuality_Daily_StudentVersion.csv')
## Turn data into a dataframe

air_data = pd.DataFrame(air_data)
## View data

air_data.head(5)

Unnamed: 0,date,monitor_index,humidity,pressure,temperature,voc,analog_input,pm2.5_alt,pm1.0_atm,pm2.5_atm,pm10.0_atm,sensor.latitude,sensor.longitude,sensor.altitude,sensor.name
0,02/23/24,195089,14.377667,912.884333,62.266667,51.998667,0.051333,0.1,0.0,0.0025,0.039667,40.050922,-101.53357,3005,Swnphd-Benklemen
1,02/23/24,195365,12.2236,926.403,71.1934,64.9208,0.0,0.18,0.0048,0.02,0.176,40.20033,-100.639885,2576,Swnphd-mccook
2,02/23/24,195541,20.09575,905.67075,61.00825,68.307,0.02,0.1625,0.004125,0.014812,0.063937,41.128284,-101.72022,3220,Swnphd-ogallala
3,02/24/24,195089,25.368,911.708833,51.462458,91.17675,0.052667,0.4375,0.099542,0.170667,0.355208,40.050922,-101.53357,3005,Swnphd-Benklemen
4,02/24/24,195365,23.703083,925.282125,56.818208,107.863708,0.0,0.475,0.099208,0.231687,0.548583,40.20033,-100.639885,2576,Swnphd-mccook


## Part 1: High Concentration Analysis

Client Request: What are the 5 locations in Nebraska with the highest mean and median concentrations of VOC, PM 2.5, and PM 10.0?

In [182]:
# Group by sensor location and aggregate mean and median for pollutants
sensor_stats = air_data.groupby('sensor.name').agg({
    'voc': ['mean', 'median'],
    'pm2.5_atm': ['mean', 'median'],
    'pm10.0_atm': ['mean', 'median']
})

# Display the full statistical summary table
sensor_stats

pivot_stats = pd.pivot_table(
    air_data,
    index = 'sensor.name',
    values = ['voc','pm2.5_atm','pm10.0_atm'],
    aggfunc = ['mean','median']
)

pivot_stats.head(5)

Unnamed: 0_level_0,mean,mean,mean,median,median,median
Unnamed: 0_level_1,pm10.0_atm,pm2.5_atm,voc,pm10.0_atm,pm2.5_atm,voc
sensor.name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
#16 - Richardson County Courthouse,701.632446,700.127342,86.75865,13.305615,11.977344,84.4705
#17 - Otoe County,11.671141,9.755345,251.427296,9.398875,7.687563,234.953542
#18 - Southeast District Health Department- Tecumseh,614.227248,613.175352,196.150771,11.433729,10.322875,138.582583
Ainsworth Public School #9,12.390708,10.697058,219.200274,9.449781,8.38974,217.8975
Broken Bow,929.678512,928.710593,158.285807,43.179094,36.05024,138.559646


# Part 2: Maximum Pollution

Client Request: On what days did the maximum values occur and whre did these maximums occur?

Maximum Pollution using VOC data

In [186]:
# Sort the entire dataset by VOC to find the maximum event
max_voc_event = air_data[['date', 'sensor.name', 'voc']].sort_values(by='voc', ascending=False).head(1)

max_voc_event


Unnamed: 0,date,sensor.name,voc
2391,06/24/24,Swnphd-ogallala,1209.931571


Maximum Pollution using PM 2.5 Data

In [188]:
# Sort the entire dataset by PM 2.5 to find the maximum event
max_pm25_event = air_data[['date', 'sensor.name', 'pm2.5_atm']].sort_values(by='pm2.5_atm', ascending=False).head(1)

max_pm25_event

Unnamed: 0,date,sensor.name,pm2.5_atm
7561,02/18/25,#16 - Richardson County Courthouse,3782.823313


Maximum Pollution using PM 10.0 Data

In [190]:
# Sort the entire dataset by PM 10.0 to find the maximum event
max_pm10_event = air_data[['date', 'sensor.name', 'pm10.0_atm']].sort_values(by='pm10.0_atm', ascending=False).head(1)

max_pm10_event

Unnamed: 0,date,sensor.name,pm10.0_atm
7561,02/18/25,#16 - Richardson County Courthouse,3784.682542


# Part 3: Temperature and Humidity Environmental Factors

Client Request: Does humidity and temperature have an effect on air quality?

In [224]:
# Define function for humidity categories
def assign_humidity(humidity):
    if humidity < 50:
        return 'Low Humidity'
    elif 50 <= humidity <= 80:
        return 'High Humidity'
    else:
        return 'Very High Humidity'

# Define function for temperature categories
def assign_temperature(temperature):
    if temperature < 32:
        return 'Below Freezing'
    elif 32 <= temperature <= 50:
        return 'Cool'
    elif 51 <= temperature <= 70:
        return 'Warm'
    else: 
        return 'Hot'

# Apply the functions to create new columns
air_data['humidity_level'] = air_data['humidity'].apply(assign_humidity)
air_data['temperature_level'] = air_data['temperature'].apply(assign_temperature)

air_data.head(2100)

Unnamed: 0,date,monitor_index,humidity,pressure,temperature,voc,analog_input,pm2.5_alt,pm1.0_atm,pm2.5_atm,pm10.0_atm,sensor.latitude,sensor.longitude,sensor.altitude,sensor.name,humidity_level,temperature_level
0,02/23/24,195089,14.377667,912.884333,62.266667,51.998667,0.051333,0.100000,0.000000,0.002500,0.039667,40.050922,-101.533570,3005,Swnphd-Benklemen,Low Humidity,Warm
1,02/23/24,195365,12.223600,926.403000,71.193400,64.920800,0.000000,0.180000,0.004800,0.020000,0.176000,40.200330,-100.639885,2576,Swnphd-mccook,Low Humidity,Hot
2,02/23/24,195541,20.095750,905.670750,61.008250,68.307000,0.020000,0.162500,0.004125,0.014812,0.063937,41.128284,-101.720220,3220,Swnphd-ogallala,Low Humidity,Warm
3,02/24/24,195089,25.368000,911.708833,51.462458,91.176750,0.052667,0.437500,0.099542,0.170667,0.355208,40.050922,-101.533570,3005,Swnphd-Benklemen,Low Humidity,Warm
4,02/24/24,195365,23.703083,925.282125,56.818208,107.863708,0.000000,0.475000,0.099208,0.231687,0.548583,40.200330,-100.639885,2576,Swnphd-mccook,Low Humidity,Warm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2095,06/13/24,194969,24.341833,899.296583,89.839917,116.082917,0.019333,2.112500,2.409063,2.978979,3.729479,40.512820,-101.629420,3269,SWNPHD-Imerial,Low Humidity,Hot
2096,06/13/24,195089,24.024917,907.628625,92.144667,183.032333,0.050042,2.362500,2.625521,3.273229,4.430979,40.050922,-101.533570,3005,Swnphd-Benklemen,Low Humidity,Hot
2097,06/13/24,195091,91.000000,954.960000,65.000000,0.000000,0.010000,7.733333,9.892271,12.949479,15.019083,41.995426,-96.927210,1385,ELVPHD Wisner HD 5,Very High Humidity,Warm
2098,06/13/24,195103,42.583471,954.409294,85.711765,142.744823,0.020000,7.717647,10.940324,13.405029,14.635029,42.425266,-97.100790,1537,Laurel High School,Low Humidity,Hot


# Part 4: AQI Health Risks

Client Request: Have there been any Air Quality Index (AQI) health risks

PM2.5 Risk is when it is above 35.4 micrograms per cubic meter
PM 10.0 Risk is when it is above 154 micrograms per cubic meter

Health Risks dates for PM2.5

In [198]:
# Filter for PM 2.5 Health Risks
risk_pm25 = air_data[air_data['pm2.5_atm'] > 35.4]

# Display the risk events sorted by date (showing specific columns for clarity)
risk_pm25[['date', 'sensor.name', 'pm2.5_atm']].sort_values(by='date')

Unnamed: 0,date,sensor.name,pm2.5_atm
6565,01/01/25,Broken Bow,2107.029354
6556,01/01/25,#16 - Richardson County Courthouse,2114.623250
6585,01/02/25,Broken Bow,1968.824813
6576,01/02/25,#16 - Richardson County Courthouse,2135.601729
6596,01/03/25,#16 - Richardson County Courthouse,2357.012438
...,...,...,...
6496,12/29/24,#16 - Richardson County Courthouse,1433.841104
6525,12/30/24,Broken Bow,1722.934604
6516,12/30/24,#16 - Richardson County Courthouse,1946.299250
6545,12/31/24,Broken Bow,2102.347375


Health Risks dates for PM10.0

In [200]:
# Filter for PM 10 Health Risks (> 154)
risk_pm10 = air_data[air_data['pm10.0_atm'] > 154]

# Display the risk events sorted by date
risk_pm10[['date', 'sensor.name', 'pm10.0_atm']].sort_values('date')

Unnamed: 0,date,sensor.name,pm10.0_atm
6556,01/01/25,#16 - Richardson County Courthouse,2114.860313
6565,01/01/25,Broken Bow,2107.215604
6576,01/02/25,#16 - Richardson County Courthouse,2136.544792
6585,01/02/25,Broken Bow,1969.514229
6596,01/03/25,#16 - Richardson County Courthouse,2359.675646
...,...,...,...
6505,12/29/24,Broken Bow,1720.299958
6516,12/30/24,#16 - Richardson County Courthouse,1948.080313
6525,12/30/24,Broken Bow,1723.087500
6536,12/31/24,#16 - Richardson County Courthouse,2205.778262
