In [None]:
import requests
import datetime
import pandas as pd

def get_air_pollution(lat, lon, start, end, api_key):
    # api parameters if we want to change them
    url = f"http://api.openweathermap.org/data/2.5/air_pollution/history"
    params = {
        "lat": lat,
        "lon": lon,
        "start": start,
        "end": end,
        "appid": api_key
    }
    
    response = requests.get(url, params=params)

    # Check if error, and if it has an error print which one
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error {response.status_code}: we couldnt get the data")
        return None

# parameters
lat = 40.730610
lon = -73.935242

start_date = datetime.datetime(2022, 1, 1)
end_date = datetime.datetime(2022, 12, 30)

start_timestamp = int(start_date.timestamp())
end_timestamp = int(end_date.timestamp())

# API Key
api_key = "2a6e7b9a271d6e30972411d8deb72893"

# get the data
air_pollution_data = get_air_pollution(lat, lon, start_timestamp, end_timestamp, api_key)

if air_pollution_data:
    print(air_pollution_data)


In [None]:
subway_df = pd.read_csv("./subway_data.csv")

subway_df["DateTime"] = pd.to_datetime(subway_df["Date"] + " " + subway_df["Time"])

# Opcional: eliminar las columnas originales si ya no las necesitas
subway_df.drop(columns=["Date", "Time", "Description", "Division", "Line Name"], inplace=True)

print(subway_df.head())

In [None]:
from datetime import datetime, timezone

data_list = []
for record in air_pollution_data['list']:
    dt = datetime.fromtimestamp(record['dt'], tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
    aqi = record['main']['aqi']  # Índice de calidad del aire (AQI)
    data_list.append({'DateTime': dt, 'AQI': aqi})

air_pollution_df = pd.DataFrame(data_list)

air_pollution_df['DateTime'] = pd.to_datetime(air_pollution_df['DateTime'])

# Pass the data from hours to daily
air_pollution_df["Date"] = air_pollution_df["DateTime"].dt.date
AQI_daily_avg = air_pollution_df.groupby("Date")["AQI"].mean().reset_index()

AQI_daily_avg.rename(columns={"Date": "DateTime", "AQI": "AQI_Avg"}, inplace=True)

print(AQI_daily_avg.head())

In [None]:
subway_df["Date"] = subway_df["DateTime"].dt.date

# Agrupar por día y calcular la media de Entries y Exits
subway_daily_avg = subway_df.groupby("Date")[["Entries"]].mean().reset_index()

# Renombrar las columnas para mayor claridad
subway_daily_avg.rename(columns={"Date": "DateTime", "Entries": "Avg_Entries"}, inplace=True)

print(subway_daily_avg.head())

In [None]:
subway_daily_avg['DateTime'] = pd.to_datetime(subway_daily_avg['DateTime'])
AQI_daily_avg['DateTime'] = pd.to_datetime(AQI_daily_avg['DateTime'])

combined_df = pd.merge_asof(subway_daily_avg.sort_values('DateTime'), AQI_daily_avg.sort_values('DateTime'), on='DateTime')

# Seleccionar solo columnas relevantes
final_df = combined_df[['DateTime', 'Avg_Entries', 'AQI_Avg']]

print(final_df.head())

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(final_df['DateTime'], final_df['Avg_Entries'], color='tab:blue', label='Subway entries')

plt.xlabel('Date')
plt.ylabel('Subway entries', color='tab:blue')
plt.title('Daily subway entries')
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
plt.plot(final_df['DateTime'], final_df['AQI_Avg'], color='tab:green', label='Air quality')

plt.xlabel('Date')
plt.ylabel('Air quality', color='tab:green')
plt.title('Air quality daily')
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

In [None]:
# Create graph
fig, ax1 = plt.subplots(figsize=(10, 6))

# left -> Avg entries
ax1.set_xlabel('Date')
ax1.set_ylabel('Entry to subway', color='tab:blue')
ax1.plot(final_df['DateTime'], final_df['Avg_Entries'], color='tab:blue', label='Avg_Entries')
ax1.tick_params(axis='y', labelcolor='tab:blue')

# right -> Avg contamination
ax2 = ax1.twinx()
ax2.set_ylabel('Air quality', color='tab:green')
ax2.plot(final_df['DateTime'], final_df['AQI_Avg'], color='tab:green', label='AQI_Avg')
ax2.tick_params(axis='y', labelcolor='tab:green')

# Títulos y etiquetas
plt.title('Entry to subway and Air quality daily')

# Show graph
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
start = "2022-05-01"
finish = "2022-08-31"

# Filtrar el DataFrame
summer_df = final_df[(final_df["DateTime"] >= start) & (final_df["DateTime"] <= finish)]

# Mostrar el resultado
print(summer_df.head())

In [None]:
# Create graph
fig, ax1 = plt.subplots(figsize=(10, 6))

# left -> Avg entries
ax1.set_xlabel('Date')
ax1.set_ylabel('Entry to subway', color='tab:blue')
ax1.plot(summer_df['DateTime'], summer_df['Avg_Entries'], color='tab:blue', label='Avg_Entries')
ax1.tick_params(axis='y', labelcolor='tab:blue')

# right -> Avg contamination
ax2 = ax1.twinx()
ax2.set_ylim(None, 5)
ax2.set_ylabel('Air quality', color='tab:green')
ax2.plot(summer_df['DateTime'], summer_df['AQI_Avg'], color='tab:green', label='AQI_Avg')
ax2.tick_params(axis='y', labelcolor='tab:green')

# Títulos y etiquetas
plt.title('Entry to subway and Air quality daily')

# Show graph
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
print (final_df)