In [8]:
import numpy as np
import pandas as pd
import requests
import json

In [9]:
df = pd.read_csv('data/fire_data.csv')
date_ser = pd.to_datetime(df['Initial Source SIT209 Record Date'])
start_date, end_date = np.min(date_ser), np.max(date_ser)
start_date, end_date

  date_ser = pd.to_datetime(df['Initial Source SIT209 Record Date'])


(Timestamp('2022-12-31 00:00:00'), Timestamp('2024-04-17 00:00:00'))

In [10]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

## We recommend that you skip running the below cell, as it makes call to OpenWeatherAPI. The outputs of this cell are contained in the .json files in the data directory, so you may proceed from the following cell.

In [None]:
helper_list = list(zip(df['Latitude'], df['Longitude'], date_ser.apply(lambda x: int(x.tz_localize('UTC').timestamp()))))

key = 'INSERT API KEY'
def coord_to_data(coords):
    lat = coords[0]
    lon = coords[1]
    time = coords[2]
    url = f'https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&appid={key}'
    return requests.get(url).json()

json_name = 'data/weather_api_data.json'

data_list = []
for i in range(0, df.shape[0]):
    print(f'Iteration {i}')
    if((i % 20 == 0) or (i == (df.shape[0]-1))):
        print('Saving Results to JSON...')
        with open(json_name, "w") as data_file:
            json.dump(data_list, data_file, cls=NpEncoder)
    try:
        data_list.append((df['OBJECTID'].iloc[i], coord_to_data(helper_list[i])))
    except ConnectionError as conn:
        print(conn)
        data_list.append((df['OBJECTID'].iloc[i], None))
    except Exception as E:
        print(E)
        data_list.append((df['OBJECTID'].iloc[i], None))

extra_tuple = [[df['OBJECTID'].iloc[-1], coord_to_data(helper_list[-1])]]

In [22]:
# We apologize if this cell is messy - in practice, we ended up saving the raw data to 3 different .json files. 
# To convert these to DataFrames, we read in each one at a time.
# We also missed the last row of the original dataframe, so we append it separately for ease of use.

f_1, f_2, f_3 = open('data/weather_api_data.json'), open('data/weather_api_data_2.json'), open('data/weather_api_data_3.json')
k_1, k_2, k_3 = json.load(f_1), json.load(f_2), json.load(f_3)
k = k_1 + k_2 + k_3 + extra_tuple
running_df = pd.DataFrame()
for id, item in k:
    running_df = pd.concat([running_df, pd.concat([pd.DataFrame([id]), pd.DataFrame.from_dict(item)], axis=1)], join='outer')
f_1.close()
f_2.close()
f_3.close()

In [23]:
# These lines of code expand nested dict data into their own columns
running_df = pd.concat([running_df.drop(columns=['data']), running_df['data'].apply(pd.Series)], axis=1)
running_df = pd.concat([running_df.drop(columns=['weather']), running_df['weather'].apply(lambda x: x[0]).apply(pd.Series)], axis=1)

In [24]:
# We dropped columns that we immediately identified as being unsuitable for our statistical analysis purposes.
# This was due to either irrelevancy or missing values.
drop_list = ['lat', 'lon', 'timezone', 'timezone_offset', 'dt', 'sunrise', 'sunset', 'icon', 'snow', 'id', 'wind_gust']
running_df = running_df.drop(columns=drop_list)

In [25]:
# Renaming messy columns from expansion of dict data
weather_renamer = {0:'ID', 'main':'weather', 'description':'weather_description'}
weather_df = running_df.rename(columns=weather_renamer)

In [26]:
# Renaming all columns for ease of use further on
new_names = ['ID', 'lat', 'lon', 'start_date', 'incident name', 'incident no.', 'fully_contained', 'imt_type', 'geographic_area', 'new', 'imsr_post_date', 'irwin_id', 'irwin_discovery_time', 'most_recent_date', 'occurrence', 'size', 'nwcg_identifier', 'x','y']
fire_renamer = {k:v for k,v in zip(df.columns, new_names)}
fire_df = df.rename(columns=fire_renamer)
# Dropping unsuitable columns present in the original fire data
fire_drop_list = ['incident no.', 'imt_type', 'imsr_post_date', 'irwin_id', 'irwin_discovery_time', 'occurrence', 'most_recent_date', 'nwcg_identifier', 'x', 'y']
fire_df = fire_df.drop(columns=fire_drop_list)

In [27]:
# merging together the weather and wildfire datasets, saving the results
full_df_raw = fire_df.merge(weather_df, on='ID', how='left')
# data in the 'rain' column is in an unsuitable format
full_df_raw['rain'] = full_df_raw['rain'].apply(lambda x: x if isinstance(x, float) else x['1h'])
full_df_raw.to_csv('data/raw_data.csv')