In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [3]:
def region_dataframe(region):
    """
    returns dataframe with the following related to specific region:
    estimated_fire_area
    mean_Precipitation
    mean_RelativeHumidity
    mean_SoilWaterContent
    mean_SolarRadiation
    mean_Temperature
    mean_Windspeed
    vegetation_index_mean
    """
    
    wildfires = pd.read_csv('Data/Historical_wildfires.csv')
    wildfires['Date'] = pd.to_datetime(wildfires['Date'])
    wildfires = pd.pivot(wildfires, index = 'Date', columns = 'Region', values = 'Estimated_fire_area')
    # Instantiating another dataframe with a daterange and merge to find the missing date.
    dummy = pd.DataFrame(index = pd.date_range(start = '2005-01-01', end = '2020-10-31'))


    # 2020-03-06 have missing values for all 7 states.
    wildfires = pd.merge(wildfires, dummy, how = 'outer', left_index= True, right_index=True)

    df = wildfires[[region]]
    
    
    weather = pd.read_csv('Data/HistoricalWeather.csv')
    weather.columns = ['Date', 'Region', 'Parameter', 'count', 'min', 'max', 'mean', 'variance']
    weather['Date'] = pd.to_datetime(weather['Date'])
    weather = weather.loc[weather['Region'].eq(region)]
    weather_pivot = pd.pivot(weather, index=['Date'], columns = ['Parameter', 'Region'], values = 'mean')

    columns = []
    for i,x in weather_pivot.columns:
        columns.append('mean_{}_{}'.format(i,x))
    weather_pivot.columns = columns
    
    # Here we have monthly data that needs to be changed to daily.
    vegetation = pd.read_csv('Data/VegetationIndex.csv')
    vegetation['Date'] = pd.to_datetime(vegetation['Date'])
    vegetation = vegetation.loc[vegetation['Region'].eq(region)]

    vegetation_pivot = pd.pivot(vegetation, index='Date', columns = 'Region').resample('1D').mean()
    vegetation_pivot = vegetation_pivot[['Vegetation_index_mean']]
    for i in vegetation_pivot.columns:
        vegetation_pivot[i] = vegetation_pivot[i].interpolate()
    dummy = pd.DataFrame(index = pd.date_range(start = '2020-10-02', end = '2020-10-31'))
    vegetation_pivot = pd.concat([vegetation_pivot, dummy], axis = 0)
    for i in vegetation_pivot.columns:
        vegetation_pivot[i] = vegetation_pivot[i].fillna(method='ffill')

        
        
    columns = []
    for i,x in vegetation_pivot.columns:
        columns.append('{}_{}'.format(i,x))
    vegetation_pivot.columns = columns

    df = pd.concat([df, weather_pivot, vegetation_pivot], axis = 1)

    return df


In [4]:
regions = ['NSW', 'NT', 'QL', 'SA', 'TA', 'VI', 'WA']

In [8]:
for i in regions:
    region_dataframe(i).to_csv('{}_iso.csv'.format(i), index = True, header= True)