In [1]:
#Load relevant libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings
import geopandas as gpd
import datetime as dt
from scipy.spatial.distance import cdist
from sklearn.impute import KNNImputer
from shapely.geometry import Point, Polygon
warnings.filterwarnings('ignore')

In [2]:
#Load data for cities, fires 
cities = pd.read_csv('precipitation_temp_city/city_info.csv',index_col=0)
fires = pd.read_csv('Monitoring_Trends_in_Burn_Severity_Fire_Occurrence_Locations__Feature_Layer_.csv')

In [3]:
# Restrict cities to California area for purposes of project 
California = cities.loc[cities['Lon'] < -117]
California = California.loc[California['Lon'] > -124]
California = California.loc[California['Lat'] < 42.02]
California = California.loc[California['Lat'] > 32.64]
California.Name.unique()

array(['Winnemucca', 'Reno', 'Escondido', 'Ukiah', 'Sacramento',
       'Riverside', 'SanDiego', 'Redding', 'LosAngeles', 'Fresno',
       'Bakersfield'], dtype=object)

In [4]:
California =California.drop_duplicates(subset='Name', keep='last')
California.head()

Unnamed: 0,Name,ID,Lat,Lon,Stn.Name,Stn.stDate,Stn.edDate
184,Winnemucca,USW00024128,40.9017,-117.8081,WINNEMUCCA MUNICIPAL ARPT,1949-09-15,2017-12-31
186,Reno,USW00023185,39.4839,-119.7711,RENO TAHOE INTERNATIONAL AP,1937-03-01,2017-12-31
403,Escondido,USC00042863,33.1211,-117.09,Escondido 2,1979-04-01,2017-12-31
405,Ukiah,USW00023275,39.1258,-123.2008,UKIAH MUNI AP,2000-08-01,2017-12-31
406,Sacramento,USW00023271,38.5553,-121.4183,SACRAMENTO 5 ESE,1878-01-01,2017-12-31


In [5]:
#load relevant data for each city of choice 

bakersfield_w = pd.read_csv('precipitation_temp_city/USW00023155.csv', index_col=0).reset_index(drop=True)
fresno_w = pd.read_csv('precipitation_temp_city/USW00093193.csv', index_col=0).reset_index(drop=True)
la_w = pd.read_csv('precipitation_temp_city/USW00093134.csv', index_col=0).reset_index(drop=True)
redding_w = pd.read_csv('precipitation_temp_city/USW00024257.csv', index_col=0).reset_index(drop=True)
sd_w = pd.read_csv('precipitation_temp_city/USW00023188.csv', index_col=0).reset_index(drop=True)
riverside_w = pd.read_csv('precipitation_temp_city/USW00003171.csv', index_col=0).reset_index(drop=True)
sac_w = pd.read_csv('precipitation_temp_city/USW00023271.csv', index_col=0).reset_index(drop=True)
ukiah_w = pd.read_csv('precipitation_temp_city/USW00023275.csv', index_col=0).reset_index(drop=True)
#escondido_w = pd.read_csv('precipitation_temp_city/USC00042863.csv', index_col=0).reset_index(drop=True)
reno_w = pd.read_csv('precipitation_temp_city/USW00023185.csv', index_col=0).reset_index(drop=True)
Winnemucca_w = pd.read_csv('precipitation_temp_city/USW00024128.csv', index_col=0).reset_index(drop=True)

In [6]:
#https://www.ncdc.noaa.gov/cdo-web/confirmation


# escondido_prcp = pd.read_csv('2314803.csv')
# escondido_prcp = escondido_prcp[['DATE','PRCP']]
# escondido_prcp.head()
# escondido_prcp['DATE'] = pd.to_datetime(escondido_prcp['DATE'])
# escondido_prcp.rename(columns={'DATE':'Date'}, inplace=True)
# escondido_w['Date'] = pd.to_datetime(escondido_w['Date'])
# escondido_w = pd.merge(escondido_w, escondido_prcp, on= 'Date' , how= 'left')
# escondido_w['prcp'].fillna(escondido_w['PRCP'], inplace=True)
# escondido_w.drop(['PRCP'], axis=1, inplace=True)

In [7]:
# For each city df add Lat and Long so that we can merge all data frames later 
california_cities = [bakersfield_w, fresno_w, la_w, redding_w, sd_w, riverside_w, sac_w, ukiah_w,  reno_w, Winnemucca_w]
names = ['Bakersfield', 'Fresno','LosAngeles', 'Redding', 'SanDiego', 'Riverside','Sacramento', 'Ukiah', 'Reno', 'Winnemucca']
for city, name in zip(california_cities, names):
    city['Latitude'] = California['Lat'].loc[California['Name']== name].values[0]
    city['Longitude'] = California['Lon'].loc[California['Name']== name].values[0]

In [8]:
#convert all Date columns to datetime 

for city in california_cities:
    city['Date'] = pd.to_datetime(city['Date'])

In [9]:

def drop_dates(city_list):
    new_list = []
    for city in city_list:
        city = city[(city['Date'] > '1983-12-31')]
        city = city[(city['Date'] < '2018-01-01')] 
        city = city.reset_index(drop=True)
        new_list.append(city)
        city_list = new_list
    return city_list 



In [10]:
california_cities = drop_dates(california_cities)


In [11]:
#Look at null values in each dataset

for city, name in zip(california_cities, names):
    print(name)
    print(city.isna().sum())

Bakersfield
Date         0
tmax         0
tmin         0
prcp         0
Latitude     0
Longitude    0
dtype: int64
Fresno
Date         0
tmax         0
tmin         0
prcp         0
Latitude     0
Longitude    0
dtype: int64
LosAngeles
Date         0
tmax         1
tmin         0
prcp         0
Latitude     0
Longitude    0
dtype: int64
Redding
Date          0
tmax         73
tmin         32
prcp         32
Latitude      0
Longitude     0
dtype: int64
SanDiego
Date         0
tmax         0
tmin         0
prcp         0
Latitude     0
Longitude    0
dtype: int64
Riverside
Date           0
tmax         780
tmin         772
prcp         230
Latitude       0
Longitude      0
dtype: int64
Sacramento
Date         0
tmax         2
tmin         1
prcp         0
Latitude     0
Longitude    0
dtype: int64
Ukiah
Date           0
tmax         289
tmin         223
prcp         237
Latitude       0
Longitude      0
dtype: int64
Reno
Date         0
tmax         0
tmin         0
prcp         1
Latitud

In [12]:
imputer = KNNImputer(n_neighbors=5)
imputer.fit(ukiah_w)




TypeError: invalid type promotion

In [None]:
plt.figure(figsize=(20,10))
for city, num, name in zip(california_cities, range(1,len(california_cities)+1), names):
    plt.subplot(4,3,num)
    sns.lineplot('Date', 'prcp', data = city)
    plt.title(name)
    plt.xlabel('date')
    plt.ylabel('prcp')
    plt.show

In [None]:
plt.figure(figsize=(20,20))
for city, num, name in zip(california_cities, range(1,len(california_cities)+1), names):
    plt.subplot(6,2,num)
    sns.lineplot('Date', 'tmax', data = city)
    plt.title(name)
    plt.xlabel('date')
    plt.ylabel('tmax')
    plt.show

In [None]:
plt.figure(figsize=(20,20))
for city, num, name in zip(california_cities, range(1,len(california_cities)+1), names):
    plt.subplot(6,2,num)
    sns.lineplot('Date', 'tmin', data = city)
    plt.title(name)
    plt.xlabel('date')
    plt.ylabel('tmin')
    plt.show

In [None]:
def interpolate_null(list_of_df, column):
    for city in list_of_df:
        city[column] = city[column].interpolate()
        
    return list_of_df

In [None]:
california_cities = interpolate_null(california_cities, 'tmax')
california_cities = interpolate_null(california_cities, 'tmin')



In [None]:
plt.figure(figsize=(20,10))
for city, num, name in zip(california_cities, range(1,len(california_cities)+1), names):
    plt.subplot(4,3,num)
    sns.lineplot('Date', 'tmax', data = city)
    plt.title(name)
    plt.xlabel('date')
    plt.ylabel('tmax')
    plt.show

In [None]:
#check for null values 

for city in california_cities:
    print(city.isna().sum())

In [None]:
#remove unnecessary columns from the fire data 
fires = fires[['FIRE_ID','LATITUDE', 'LONGITUDE', 'FIRE_TYPE','IG_DATE', 'ACRES']]
#remove all fires that are not wildfires
fires = fires.loc[fires['FIRE_TYPE'] == 'Wildfire']
fires.head()

In [None]:
#convert IG_DATE to datetime with YYYY-MM-DD Format labeled 'Date'

fires['Year'] = pd.DatetimeIndex(fires['IG_DATE']).year
fires['Month'] = pd.DatetimeIndex(fires['IG_DATE']).month
fires['Day'] = pd.DatetimeIndex(fires['IG_DATE']).day
fires['Date'] = pd.to_datetime([f'{y}-{m}-{d}' for y, m,d in zip(fires.Year, fires.Month, fires.Day)])
fires.drop(['IG_DATE', 'Month', 'Year', 'Day'], axis=1, inplace=True)
fires = fires.reset_index(drop=True)

Since current weather conditions are dependent on recent weather patterns, we are adding a column for total consecutive days without rain, consecutive hot days, and total precipitation for the past 90 days. 

In [None]:
def create_last_rain_column(df): 
    last_rain = []
    count = 1
    for i in range(len(df)):
        if df['prcp'][i] > 0:
            last_rain.append(0)
            count = 1
        else:
            last_rain.append(count)
            count += 1
    df['last_rain'] = last_rain       
    return  df

In [None]:
def create_heat_wave_column(df): 
    heat_wave = []
    count = 0
    for i in range(len(df)):
        if df['tmax'][i] >= 85.0:
            count += 1
            heat_wave.append(count)
            
        else:
            heat_wave.append(0)
            count = 0
    df['heat_wave'] = heat_wave      
    return  df

In [None]:
bakersfield_w.describe()

In [None]:
for city in california_cities:
    city = create_last_rain_column(city)
    city = create_heat_wave_column(city)

Now to match the fires with the nearest city location based on their latitude and Longitude.

First, we will merge all weather data into a master data frame. Then we will take the unique values for latitude and longitude and match each fire location with the nearest city. Then we can merge the fire dataframe with the weather datafram on the latitude and longitude of the cities. 

In [None]:
weather = pd.concat(california_cities)
weather.shape

In [None]:
weather['point'] = [(x,y) for x,y in zip ( weather['Latitude'], weather['Longitude'])]
fires['loc'] = [(x,y) for x,y in zip (fires['LATITUDE'], fires['LONGITUDE'])]
city_locations = weather['point'].unique()
city_locations = [list(point) for point in city_locations ]


In [None]:
city =[]
for i in range( len (fires['loc'])):
    distance = cdist([list(fires['loc'][i])], city_locations)
    minim = distance.min()
    k,j = np.where(distance == minim)
    city.append(city_locations[j[0]])
    

In [None]:
fires['city'] = city
fires.drop('loc', axis=1, inplace=True)
fires.head()

In [None]:
weather.rename(columns={'Latitude': 'city_lat', 'Longitude': 'city_long'}, inplace=True)
weather.drop('point', axis=1, inplace=True)
weather.head()

In [None]:

fires[['city_lat', 'city_long']] = pd.DataFrame(fires.city.tolist(), index= fires.index)
fires.drop('city', axis=1, inplace=True)
fires.head()

In [None]:
group1 = fires.groupby(['city_lat' , 'city_long', 'Date'])['FIRE_ID'].count().to_frame('Total_fires')
group2 = fires.groupby(['city_lat' , 'city_long', 'Date'])['ACRES'].sum().to_frame('Total_acres')
fire = pd.merge(group1, group2, on = ['city_lat' , 'city_long', 'Date'], how ='left').reset_index()
fire.head()

In [None]:
weather['Date'] = pd.to_datetime(weather['Date'])
weather.dtypes

In [None]:
data = pd.merge(weather, fire, on = ['Date','city_lat', 'city_long'], how ='left')
data.fillna(0, inplace=True)

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
plt.figure(figsize=(10,5))
plt.plot('Date', 'Total_fires', data = data)
plt.ylabel('Count Of Fires')
plt.title('Total Fires Over Time')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.plot('Date', 'Total_acres', data = data)
plt.ylabel('Count Of Fires')
plt.title('Total Acres Over Time')
plt.show()

In [None]:
plt.figure(figsize= (8,5))
sns.scatterplot(x = 'heat_wave', y = 'last_rain', hue = 'Total_acres', data = data.loc[data['Total_fires'] > 0])
plt.show()

In [None]:
plt.figure(figsize= (8,5))
sns.scatterplot(x = 'heat_wave', y = 'last_rain', hue = 'Total_fires', data = data.loc[data['Total_fires'] > 0])
plt.show()

In [None]:
plt.figure(figsize= (8,5))
sns.scatterplot(x = 'Total_fires', y = 'Total_acres', hue = 'last_rain', data = data.loc[data['Total_fires'] > 0])
plt.legend(loc='lower right')
plt.show()

In [None]:
plt.figure(figsize= (8,5))
sns.boxplot(x = 'Total_fires', y = 'last_rain', data = data.loc[data['Total_fires'] > 0])
plt.legend(loc='lower right')
plt.show()


In [None]:
fig = plt.figure(figsize=(15,8))
sns.heatmap(data.corr(),linewidths=.1,cmap="YlGnBu", annot=True)
plt.yticks(rotation=0)
plt.show()
