## Importing Libraries

In [2]:
import os
import json
import boto3
import requests
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine
from bs4 import BeautifulSoup
import pytz
from datetime import datetime, date, timedelta
import pickle
from sklearn.linear_model import LinearRegression

### Weather Data

In [3]:
url = f"""https://api.tomorrow.io/v4/weather/realtime?location=chandigarh&apikey={os.environ.get("weather_api_key")}"""
headers = {"accept": "application/json"}
response = requests.get(url, headers=headers)

print(response.text)

{"data":{"time":"2024-04-02T05:30:00Z","values":{"cloudBase":1.91,"cloudCeiling":null,"cloudCover":39,"dewPoint":10.69,"freezingRainIntensity":0,"humidity":38,"precipitationProbability":0,"pressureSurfaceLevel":972.8,"rainIntensity":0,"sleetIntensity":0,"snowIntensity":0,"temperature":25.88,"temperatureApparent":25.88,"uvHealthConcern":3,"uvIndex":8,"visibility":16,"weatherCode":1101,"windDirection":307.13,"windGust":10,"windSpeed":5.13}},"location":{"lat":30.729843139648438,"lon":76.78414916992188,"name":"Chandigarh, Chandigarh District, Chandigarh, India","type":"administrative"}}


### AWS Connection

In [2]:
region_name = "ap-southeast-2"
secret_name = "rdsMYSQL"
session = boto3.session.Session(region_name=region_name, aws_access_key_id=os.environ.get("aws_access_key_id"),
                                aws_secret_access_key=os.environ.get("aws_secret_access_key"))
sm_client = session.client(service_name="secretsmanager")

try:
    get_secret_value_response = sm_client.get_secret_value(SecretId=secret_name)
    value = json.loads(get_secret_value_response["SecretString"])
except Exception as e:
    print("Failed to Read Data:", e)

### MySQL Connection

In [3]:
def post_data(time_received, aqi_us_count, aqi_in_count, pm25, pm10, so2, co, o3, no2):
    mysql_config = {
        "host": value["endpoint"],
        "user": value["user"],
        "password": value["password"],
        "database": value["database"]
    }
    mysql_connection = mysql.connector.connect(**mysql_config)
    mysql_cursor = mysql_connection.cursor()

    # Checking if Data exists in MySQL
    if mysql_connection.is_connected():
        already_exist_query = """
        SELECT COUNT(*) FROM dashboard.aqi_measures 
        WHERE time_received = %s
        """
        mysql_cursor.execute(already_exist_query, (time_received,))
        count = mysql_cursor.fetchone()[0]
    
    # Insert Data into MySQL
    if(count == 0):
        insert_query = f"""
        INSERT INTO dashboard.aqi_measures 
        (time_received, aqi_us_count, aqi_in_count, pm25, pm10, so2, co, o3, no2) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        mysql_cursor.execute(insert_query, (time_received, aqi_us_count, aqi_in_count, pm25, pm10, so2, co, o3, no2))
        mysql_connection.commit()
    else:
        print("Data already Exists")
    
    mysql_cursor.close()
    mysql_connection.close()
    return True

### Collecting Data

In [4]:
url = "https://www.aqi.in/au/dashboard/india/chandigarh"
response = requests.get(url)
html_content = response.text

In [5]:
soup = BeautifulSoup(html_content, "html.parser")
time_received = soup.find(class_="card-location-time").text.split(":", 1)[-1].strip()
time_received = datetime.strptime(time_received, "%d %b %Y, %I:%M%p")

aqi_us_count = int(soup.find_all(class_="AQI_toggle aqiUsa")[-1].text)
aqi_in_count = int(soup.find_all(class_="AQI_toggle aqiInd")[-1].text)
pm25, pm10, so2, co, o3, no2 = [int(value.text) for value in soup.find_all(class_="Pollutants_sensor_text")]

post_data(time_received, aqi_us_count, aqi_in_count, pm25, pm10, so2, co, o3, no2)

Data already Exists


True

### ML Data

In [6]:
def download_pdf(url, save_path):
    response = requests.get(url)
    with open(save_path, "wb") as pdf_file:
        pdf_file.write(response.content)

In [7]:
start_date = date(2023, 1, 1)
end_date = date(2023, 12, 31)
dates = [(start_date + timedelta(days=x)).strftime("%Y%m%d") for x in range((end_date - start_date).days + 1)]

for d in dates:
    url = f"https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_{d}.pdf"
    save_path = r"C:\Users\jaske\Downloads\data\{}.pdf".format(d)
    #download_pdf(url, save_path)

In [8]:
df = pd.read_csv(r"C:\Users\jaske\Downloads\archive\city_hour_transformed.csv")
df = df[df["City"] == "Chandigarh"]
df = df[["City", "Datetime", "AQI"]]
df.dropna(inplace=True)
df.to_csv(r"C:\Users\jaske\Downloads\chandigarh.csv", index=False)

### Predicting Value

In [9]:
df["Datetime"] = pd.to_datetime(df["Datetime"])
df["Hour"] = df["Datetime"].dt.hour

model = LinearRegression()
model.fit(df[["Hour"]], df["AQI"])

with open(r"C:\Users\jaske\Downloads\model.pkl", "wb") as f:
    pickle.dump(model, f)

In [10]:
upcoming_hour = (datetime.now().astimezone(pytz.timezone("Asia/Kolkata"))+timedelta(hours=1)).hour
model.predict(pd.DataFrame({"Hour": [upcoming_hour]}))[0]

98.4512902399668