# **Forest Fire Prediction**

### Part 3: Gathering data from wether API

In [None]:
# Please note if running on a clean environment, need to install missing modules
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None

In [None]:
API_URL = 'https://archive-api.open-meteo.com/v1/archive'

FINAL_AFTER_PREP_CSV = 'fire_history_prep.csv'
ADD_WEATHER_CSV = 'fire_history_add_weather.csv'
WEATHER_OUTLIERS_CSV = 'fire_history_weather_outliers_removed.csv'

FINAL_CSV = 'fire_history_final.csv'

PARAMETERS = ['temperature_2m_max', 'temperature_2m_min', 'windspeed_10m_max', 'winddirection_10m_dominant', 'shortwave_radiation_sum', 'precipitation_sum']
WEATHER_COLS = ['MaxTemperature', 'MinTemperature', 'WindSpeed', 'WindDirection', 'Radiation', 'Precipitation']

In [None]:
def get_weather(latitude, longitude, date):
    params = {'latitude': latitude, 'longitude': longitude, 'start_date': date, 'end_date': date, 
              'daily': PARAMETERS, 'mim': date, 'max': date, 'timezone': 'GMT'}
    response = requests.get(API_URL, params=params)
    if response.status_code == 200:
        return response.json()['daily']
    else:
        return None

In [None]:
def add_weather_data(df):
    df_updated = df.copy()
    df_updated[WEATHER_COLS] = np.nan

    lats = df_updated['InitialLatitude'].tolist()
    longs = df_updated['InitialLongitude'].tolist()
    dates = df_updated['FireDiscoveryDateTime'].tolist()

    final_data = [get_weather(lat, lon, date) for lat, lon, date in zip(lats, longs, dates)]

    for row in df_updated.index:
        for i in range(len(WEATHER_COLS)):
            df_updated[WEATHER_COLS[i]][row] = final_data[row][PARAMETERS[i]][0]

    return df_updated

In [None]:
def remove_outliers(df, outliers):
    for row in df.index:
        if outliers[row] == True:
            df.drop(row, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

In [None]:
def weather_outliers(df):
    df_cleaned = df.copy()

    for col in WEATHER_COLS:
        outliers = []
        data = df_cleaned[col]
        Q1 = data.quantile(0.25)
        Q3 = data.quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - (1.5 * IQR)
        upper_bound = Q3 + (1.5 * IQR)

        for x in data:
            if x < lower_bound or x > upper_bound:
                outliers.append(True)
            else:
                outliers.append(False)

        df_cleaned = remove_outliers(df_cleaned, outliers)

    return df_cleaned

In [None]:
def boxplot_outliers(df, df2):
    # Set up the subplots
    fig, axes = plt.subplots(1, 6, figsize=(15, 7))
    ylabels = ['Maximum daily air temperature at 2m above ground in °C',
               'Minimum daily air temperature at 2m above ground in °C',
               'Maximum wind speed on a day in km/h',
               'Dominant wind direction °',
               'The sum of solar radiation on a given day in MJ/m²',
               'Sum of daily precipitation in mm']

    # Create boxplots for each column
    for i, col in enumerate(WEATHER_COLS):
        axes[i].boxplot([df[col], df2[col]], labels=['Before', 'After'])
        axes[i].set_title(col + ' Boxplot')
        axes[i].set_ylabel(ylabels[i])
        axes[i].grid(True)

    plt.suptitle('Weather outliers:')
    plt.tight_layout()
    plt.show()

In [None]:
df = pd.read_csv(FINAL_AFTER_PREP_CSV)
df

In [None]:
df = add_weather_data(df)
df.to_csv(ADD_WEATHER_CSV, index=False)
df

In [None]:
df2 = weather_outliers(df)
df2.to_csv(WEATHER_OUTLIERS_CSV, index=False)
df2

In [None]:
boxplot_outliers(df, df2)