In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from fbprophet import Prophet

In [3]:
weather = pd.read_csv('Jan_16/HistoricalWeather.csv')
weather.columns = ['Date', 'Region', 'Parameter', 'count', 'min', 'max', 'mean', 'variance']
weather['Date'] = pd.to_datetime(weather['Date'])

In [4]:
weather

Unnamed: 0,Date,Region,Parameter,count,min,max,mean,variance
0,2005-01-01,NSW,Precipitation,8.002343e+05,0.000000,1.836935,0.044274,0.028362
1,2005-01-01,NSW,RelativeHumidity,8.002343e+05,13.877194,80.522964,36.355567,253.559937
2,2005-01-01,NSW,SoilWaterContent,8.002343e+05,0.002245,0.414305,0.170931,0.007758
3,2005-01-01,NSW,SolarRadiation,8.002343e+05,14.515009,32.169781,26.749389,6.078587
4,2005-01-01,NSW,Temperature,8.002343e+05,14.485785,35.878704,27.341182,18.562212
...,...,...,...,...,...,...,...,...
245716,2021-01-09,WA,RelativeHumidity,2.528546e+06,11.286411,84.094559,25.539865,219.361327
245717,2021-01-09,WA,SoilWaterContent,2.528546e+06,0.000000,0.374027,0.052128,0.001778
245718,2021-01-09,WA,SolarRadiation,2.528546e+06,18.421680,32.355438,29.266381,9.209243
245719,2021-01-09,WA,Temperature,2.528546e+06,20.764585,36.807751,31.943233,6.610068


In [9]:
def region_dataframe(region):
    """
    returns dataframe with the following related to specific region:
    estimated_fire_area
    mean_Precipitation
    mean_RelativeHumidity
    mean_SoilWaterContent
    mean_SolarRadiation
    mean_Temperature
    mean_Windspeed
    vegetation_index_mean
    """
    
    wildfires = pd.read_csv('Jan_16/Historical_Wildfires_BASE4.csv')
    wildfires['Date'] = pd.to_datetime(wildfires['Date'])
    wildfires = pd.pivot(wildfires, index = 'Date', columns = 'Region', values = ['Estimated_fire_area', 'Count'])
    columns = []
    for i,x in wildfires.columns:
        columns.append('{}_{}'.format(i,x))
    wildfires.columns = columns
    
    
    # Instantiating another dataframe with a daterange and merge to find the missing date.
    dummy = pd.DataFrame(index = pd.date_range(start = '2005-01-01', end = '2021-01-09'))


    # 2020-03-06 have missing values for all 7 states.
    wildfires = pd.merge(wildfires, dummy, how = 'outer', left_index= True, right_index=True)
    wildfires.fillna(0, inplace=True)
    print(wildfires.shape)

    df = wildfires[['Estimated_fire_area_{}'.format(region), 'Count_{}'.format(region)]]
    
    
    weather = pd.read_csv('Jan_16/HistoricalWeather.csv')
    weather.columns = ['Date', 'Region', 'Parameter', 'count', 'min', 'max', 'mean', 'variance']
    weather['Date'] = pd.to_datetime(weather['Date'])
    weather = weather.loc[weather['Region'].eq(region)]
    weather_pivot = pd.pivot(weather, index=['Date'], columns = ['Parameter', 'Region'], values = 'mean')

    columns = []
    for i,x in weather_pivot.columns:
        columns.append('mean_{}_{}'.format(i,x))
    weather_pivot.columns = columns
    weather_pivot.fillna(method = 'ffill', inplace=True)
    
    
    
    
    # Here we have monthly data that needs to be changed to daily.
    vegetation = pd.read_csv('Jan_16/VegetationIndex.csv')
    vegetation['Date'] = pd.to_datetime(vegetation['Date'])
    vegetation = vegetation.loc[vegetation['Region'].eq(region)]

    vegetation_pivot = pd.pivot(vegetation, index='Date', columns = 'Region').resample('1D').mean()
    vegetation_pivot = vegetation_pivot[['Vegetation_index_mean']]
    vegetation_pivot = vegetation_pivot[:'2020-12-01'].interpolate()
    vegetation_pivot = vegetation_pivot.unstack().reset_index()
    vegetation_pivot.columns = ['param', 'region', 'ds', 'y']
    vegetation_pivot = vegetation_pivot[['ds', 'y']]
    
    
    m = Prophet(
    changepoint_prior_scale= 30,
    holidays_prior_scale = 20,
    seasonality_prior_scale = 35,
    n_changepoints = 100,
    seasonality_mode = 'additive',
    daily_seasonality = False,
    weekly_seasonality=False,
    yearly_seasonality = False
        ).add_seasonality(name = 'monthly', period = 30.5, fourier_order=12
        ).add_seasonality(name = 'weekly', period = 7, fourier_order = 20
        ).add_seasonality(name = 'yearly', period = 365.25, fourier_order = 20
        ).add_seasonality(name = 'quarterly', period = 365.25/4, fourier_order = 5, prior_scale=15)
    
    print('fitting vegetation for {}'.format(region))
    m.fit(vegetation_pivot)
    future = m.make_future_dataframe(periods=52)
    pred = m.predict(future)
    vegetation_pivot = pred[['ds', 'yhat']]
    vegetation_pivot.rename(columns = {'ds':'Date', 'yhat':'Vegetation_index_mean_{}'.format(region)}, inplace= True)
    vegetation_pivot.set_index('Date', inplace=True)
    
#     for i in vegetation_pivot.columns:
#         vegetation_pivot[i] = vegetation_pivot[i].interpolate()
#     dummy = pd.DataFrame(index = pd.date_range(start = '2020-12-02', end = '2021-01-08'))
#     vegetation_pivot = pd.concat([vegetation_pivot, dummy], axis = 0)
#     for i in vegetation_pivot.columns:
#         vegetation_pivot[i] = vegetation_pivot[i].fillna(method='ffill')

        
        
#     columns = []
#     for i,x in vegetation_pivot.columns:
#         columns.append('{}_{}'.format(i,x))
#     vegetation_pivot.columns = columns

    df = pd.concat([df, weather_pivot, vegetation_pivot], axis = 1)

    df.reset_index(inplace=True)
    df.rename(columns ={'index':'Date'}, inplace=True)
    
    return df


In [14]:
regions = ['NSW', 'NT', 'QL', 'SA', 'TA', 'VI', 'WA']

In [15]:
for i in regions:
    region_dataframe(i).to_csv('{}_iso.csv'.format(i), index = False, header= True)

(5859, 14)
fitting vegetation for NSW




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(5859, 14)
fitting vegetation for NT




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(5859, 14)
fitting vegetation for QL




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(5859, 14)
fitting vegetation for SA




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(5859, 14)
fitting vegetation for TA




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(5859, 14)
fitting vegetation for VI




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(5859, 14)
fitting vegetation for WA




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

