In [1]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import neighbors, datasets, preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.svm import SVC


In [2]:
from google.colab import files


uploaded = files.upload()

Saving air_quality.csv to air_quality.csv


In [3]:
#load dataset
df = pd.read_csv('air_quality.csv')

In [4]:
#select features and target
X = df[['AQI','PM10','PM2_5','NO2','SO2','O3','Temperature','Humidity','WindSpeed']]

y = df['HealthImpactClass']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)



In [6]:
# Random Forest Model
clf = RandomForestClassifier(n_estimators = 100)


clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)


from sklearn import metrics
print()


print("ACCURACY OF THE MODEL:", metrics.accuracy_score(y_test, y_pred))





ACCURACY OF THE MODEL: 0.9062768701633706


In [7]:
# SVM Model
svm_clf = SVC(kernel='linear')
svm_clf.fit(X_train, y_train)
svm_y_pred = svm_clf.predict(X_test)

print("ACCURACY OF THE SVM MODEL:", metrics.accuracy_score(y_test, svm_y_pred))
print(classification_report(y_test, svm_y_pred))

In [8]:
counties = {
    # Massachusetts
    "Barnstable": {"lat": 41.7001, "lon": -70.2995},
    "Berkshire": {"lat": 42.3118, "lon": -73.1822},
    "Bristol": {"lat": 41.7938, "lon": -71.1449},
    "Dukes": {"lat": 41.4253, "lon": -70.6693},
    "Essex": {"lat": 42.6309, "lon": -70.7824},
    "Franklin": {"lat": 42.5306, "lon": -72.6151},
    "Hampden": {"lat": 42.1172, "lon": -72.6620},
    "Hampshire": {"lat": 42.3471, "lon": -72.6624},
    "Middlesex": {"lat": 42.4672, "lon": -71.2874},
    "Nantucket": {"lat": 41.2835, "lon": -70.0995},
    "Norfolk": {"lat": 42.1767, "lon": -71.1449},
    "Plymouth": {"lat": 41.9948, "lon": -70.6929},
    "Suffolk": {"lat": 42.3601, "lon": -71.0589},
    "Worcester": {"lat": 42.3648, "lon": -71.8969}




}


In [9]:
import requests

API_KEY = "bbabd92f03c0a823ab817bd070837aeb"
  # Replace with your API key

def fetch_air_quality(lat, lon):
    url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={API_KEY}"
    response = requests.get(url)
    return response.json()

def fetch_weather(lat, lon):
    url = f"http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API_KEY}&units=metric"
    response = requests.get(url)
    return response.json()




In [10]:

def scale_aqi_to_us(aqi):
    """
    Linearly maps OpenWeather AQI (1–5) to U.S. AQI (0–500).
    """
    if aqi == 1:
        return 0 + (50 - 0) * (aqi - 1) / 4  # Good (0–50)
    elif aqi == 2:
        return 51 + (100 - 51) * (aqi - 2) / 4  # Fair (51–100)
    elif aqi == 3:
        return 101 + (150 - 101) * (aqi - 3) / 4  # Moderate (101–150)
    elif aqi == 4:
        return 151 + (200 - 151) * (aqi - 4) / 4  # Poor (151–200)
    elif aqi == 5:
        return 201 + (300 - 201) * (aqi - 5) / 4  # Very Poor (201–300)
    else:
        return None  # Handle unknown cases




In [11]:
import pandas as pd
#AQI, PM10, PM2.5, NO2, SO2, O3, Temp, Humid, WindSpeed

def process_data(counties):
    data = []
    for county, coords in counties.items():
        air_quality = fetch_air_quality(coords['lat'], coords['lon'])
        weather = fetch_weather(coords['lat'], coords['lon'])

        # Extract relevant metrics
        try:
            aqi = air_quality["list"][0]["main"]["aqi"]
            pm10 = air_quality["list"][0]["components"]["pm10"]
            pm2_5 = air_quality["list"][0]["components"]["pm2_5"]
            no2 = air_quality["list"][0]["components"]["no2"]
            so2 = air_quality["list"][0]["components"]["so2"]
            o3 = air_quality["list"][0]["components"]["o3"]
            temp = weather["main"]["temp"]
            humidity = weather["main"]["humidity"]
            wind_speed = weather["wind"]["speed"]

            # Append data for ML processing
            data.append({
                "county": county,
                "AQI": aqi,
                "PM10": pm10,
                "PM2_5": pm2_5,
                "NO2": no2,
                "SO2": so2,
                "O3": o3,
                "Temperature": temp,
                "Humidity": humidity,
                "WindSpeed": wind_speed
            })
        except KeyError:
            print(f"Error processing data for {county}")

    return pd.DataFrame(data)

# Collect data
county_data = process_data(counties)

# Update the AQI column
county_data['AQI'] = county_data['AQI'].apply(scale_aqi_to_us)


county_data['HealthImpactClass'] = clf.predict(county_data.drop(columns=['county']))
print(county_data)


        county   AQI  PM10  PM2_5   NO2   SO2     O3  Temperature  Humidity  \
0   Barnstable  51.0  5.55   4.96  1.00  0.24  92.98        13.63        97   
1    Berkshire  51.0  1.31   1.27  4.76  0.16  73.67        11.31        95   
2      Bristol  51.0  4.59   4.18  1.61  0.24  90.12        15.78        95   
3        Dukes  51.0  6.03   5.11  0.66  0.25  92.98        14.59        90   
4        Essex  51.0  2.30   2.17  7.80  1.07  80.11        13.97        98   
5     Franklin  51.0  1.59   1.47  8.14  0.22  62.23         7.02        97   
6      Hampden  51.0  1.53   1.38  7.11  0.43  74.39         4.32        98   
7    Hampshire  51.0  1.40   1.26  7.28  0.31  68.67         5.70        96   
8    Middlesex  51.0  2.06   1.72  4.84  0.30  77.25        15.28        85   
9    Nantucket  51.0  8.79   6.47  0.56  0.35  94.41        12.30        98   
10     Norfolk  51.0  3.48   3.16  4.03  0.34  85.83        15.18        97   
11    Plymouth  51.0  3.71   3.49  1.69  0.53  88.69

In [12]:
!pip install plotly-geo


Collecting plotly-geo
  Downloading plotly_geo-1.0.0-py3-none-any.whl.metadata (834 bytes)
Downloading plotly_geo-1.0.0-py3-none-any.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: plotly-geo
Successfully installed plotly-geo-1.0.0


In [14]:
from itertools import count
import plotly.graph_objects as go

# Example: Add FIPS codes to the `county_data` DataFrame
fips_codes = {
    "Barnstable": "25001",
    "Berkshire": "25003",
    "Bristol": "25005",
    "Dukes": "25007",
    "Essex": "25009",
    "Franklin": "25011",
    "Hampden": "25013",
    "Hampshire": "25015",
    "Middlesex": "25017",
    "Nantucket": "25019",
    "Norfolk": "25021",
    "Plymouth": "25023",
    "Suffolk": "25025",
    "Worcester": "25027"

}

# Map FIPS codes to county_data
county_data['FIPS'] = county_data['county'].map(fips_codes)

# Prepare data for the map
values = county_data['HealthImpactClass'].tolist()
fips = county_data['FIPS'].tolist()

# Classification descriptions
class_descriptions = {
    0: "Very High",
    1: "High",
    2: "Moderate",
    3: "Low",
    4: "Very Low"
}

# Add hover text column


# Create the choropleth map
fig = go.Figure(go.Choropleth(
    geojson="https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json",
    locations=fips,  # FIPS codes
    z=values,  # Data to map
    colorscale=[[0, "#d7191c"], [0.25, "#fdae61"], [0.5, "#ffffbf"], [0.75, "#abdda4"], [1, "#2b83ba"]],
    colorbar_title="Health Impact",
    marker_line_width=0.5,
    colorbar=dict(
        title="Health Impact",
        tickvals=[0, 1, 2, 3, 4],  # Specify exact tick positions
        ticktext=["0", "1", "2", "3", "4"],  # Labels for the ticks
    )

))

# Update layout to focus on Massachusetts
fig.update_layout(
    title_text="Air Health Class by County in Massachusetts",
    geo=dict(
        scope="usa",
        projection_type="albers usa",
        center={"lat": 42.4072, "lon": -71.3824},  # Center on Massachusetts
        # Change center to be on Worcester County
        fitbounds="locations"

    )
)

fig.show()
