# Data Cleaning and Combining
Remove unnecessary, useless data and combine the two datasets into one dataset

Importing libraries & files

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, Polygon
from sklearn.model_selection import train_test_split



### Removing Missing Data and Formatting Dates in CALFIRES database (fire21_2.gdb)

In [2]:
calfires = gpd.read_file("fire21_2.gdb")
calfires.head()

Unnamed: 0,YEAR_,STATE,AGENCY,UNIT_ID,FIRE_NAME,INC_NUM,ALARM_DATE,CONT_DATE,CAUSE,COMMENTS,REPORT_AC,GIS_ACRES,C_METHOD,OBJECTIVE,FIRE_NUM,Shape_Length,Shape_Area,COMPLEX_NAME,COMPLEX_INCNUM,geometry
0,2020,CA,CDF,NEU,NELSON,13212,2020-06-18T00:00:00+00:00,2020-06-23T00:00:00+00:00,11.0,,110.0,109.60228,1.0,1.0,,3252.52328,443544.7,,,"MULTIPOLYGON (((-116841.251 97942.565, -116836..."
1,2020,CA,CDF,NEU,AMORUSO,11799,2020-06-01T00:00:00+00:00,2020-06-04T00:00:00+00:00,2.0,,670.0,685.585022,1.0,1.0,,9653.760308,2774464.0,,,"MULTIPOLYGON (((-117328.400 90212.407, -117321..."
2,2020,CA,CDF,NEU,ATHENS,18493,2020-08-10T00:00:00+00:00,2020-03-01T00:00:00+00:00,14.0,,26.0,27.30048,1.0,1.0,,1649.643235,110481.1,,,"MULTIPOLYGON (((-115605.059 92988.787, -115585..."
3,2020,CA,CDF,NEU,FLEMING,7619,2020-03-31T00:00:00+00:00,2020-04-01T00:00:00+00:00,9.0,,13.0,12.931545,1.0,1.0,,1577.155857,52332.11,,,"MULTIPOLYGON (((-110213.270 105975.579, -11020..."
4,2020,CA,CDF,NEU,MELANESE,8471,2020-04-14T00:00:00+00:00,2020-04-19T00:00:00+00:00,18.0,,10.3,10.315964,1.0,1.0,,1035.787625,41747.22,,,"MULTIPOLYGON (((-111793.600 164243.615, -11177..."


Removing unneeded colums and rows with missing data

In [3]:
calfires_trim = calfires.drop(labels=["STATE", "FIRE_NAME", "AGENCY", "UNIT_ID", "INC_NUM", "COMMENTS", "REPORT_AC", "C_METHOD", "OBJECTIVE", "FIRE_NUM", "COMPLEX_NAME", "COMPLEX_INCNUM", "CONT_DATE"], axis=1)
calfires_trim = calfires_trim.dropna()
calfires_trim

Unnamed: 0,YEAR_,ALARM_DATE,CAUSE,GIS_ACRES,Shape_Length,Shape_Area,geometry
0,2020,2020-06-18T00:00:00+00:00,11.0,109.602280,3252.523280,4.435447e+05,"MULTIPOLYGON (((-116841.251 97942.565, -116836..."
1,2020,2020-06-01T00:00:00+00:00,2.0,685.585022,9653.760308,2.774464e+06,"MULTIPOLYGON (((-117328.400 90212.407, -117321..."
2,2020,2020-08-10T00:00:00+00:00,14.0,27.300480,1649.643235,1.104811e+05,"MULTIPOLYGON (((-115605.059 92988.787, -115585..."
3,2020,2020-03-31T00:00:00+00:00,9.0,12.931545,1577.155857,5.233211e+04,"MULTIPOLYGON (((-110213.270 105975.579, -11020..."
4,2020,2020-04-14T00:00:00+00:00,18.0,10.315964,1035.787625,4.174722e+04,"MULTIPOLYGON (((-111793.600 164243.615, -11177..."
...,...,...,...,...,...,...,...
21679,2021,2021-07-04T00:00:00+00:00,9.0,8.430901,1304.534325,3.414443e+04,"MULTIPOLYGON (((233587.325 -414440.535, 233571..."
21681,2014,2014-07-29T00:00:00+00:00,1.0,6256.270996,27936.642775,2.531823e+07,"MULTIPOLYGON (((-237004.801 355976.823, -23690..."
21682,2012,2012-08-05T00:00:00+00:00,1.0,977.024963,8580.699931,3.953880e+06,"MULTIPOLYGON (((-269609.565 442201.663, -26961..."
21684,2008,2008-06-21T00:00:00+00:00,1.0,267.612793,15869.974532,1.082991e+06,"MULTIPOLYGON (((38690.174 -54105.145, 38708.27..."


Removing times from dates and saving them as date objects

In [4]:
def date_trim(date):
    return date[:10]

calfires_trim.ALARM_DATE = calfires_trim.ALARM_DATE.map(date_trim)

In [5]:
calfires_trim["ALARM_DATE"] = pd.to_datetime(calfires_trim["ALARM_DATE"], format="%Y-%m-%d", errors="coerce")
calfires_trim

Unnamed: 0,YEAR_,ALARM_DATE,CAUSE,GIS_ACRES,Shape_Length,Shape_Area,geometry
0,2020,2020-06-18,11.0,109.602280,3252.523280,4.435447e+05,"MULTIPOLYGON (((-116841.251 97942.565, -116836..."
1,2020,2020-06-01,2.0,685.585022,9653.760308,2.774464e+06,"MULTIPOLYGON (((-117328.400 90212.407, -117321..."
2,2020,2020-08-10,14.0,27.300480,1649.643235,1.104811e+05,"MULTIPOLYGON (((-115605.059 92988.787, -115585..."
3,2020,2020-03-31,9.0,12.931545,1577.155857,5.233211e+04,"MULTIPOLYGON (((-110213.270 105975.579, -11020..."
4,2020,2020-04-14,18.0,10.315964,1035.787625,4.174722e+04,"MULTIPOLYGON (((-111793.600 164243.615, -11177..."
...,...,...,...,...,...,...,...
21679,2021,2021-07-04,9.0,8.430901,1304.534325,3.414443e+04,"MULTIPOLYGON (((233587.325 -414440.535, 233571..."
21681,2014,2014-07-29,1.0,6256.270996,27936.642775,2.531823e+07,"MULTIPOLYGON (((-237004.801 355976.823, -23690..."
21682,2012,2012-08-05,1.0,977.024963,8580.699931,3.953880e+06,"MULTIPOLYGON (((-269609.565 442201.663, -26961..."
21684,2008,2008-06-21,1.0,267.612793,15869.974532,1.082991e+06,"MULTIPOLYGON (((38690.174 -54105.145, 38708.27..."


### Removing Missing data and Formatting dates in cal_temps.csv

In [6]:
cal_temps_csv = open("cal_temps.csv")
cal_temps = pd.read_csv(cal_temps_csv)
cal_temps.head()

Unnamed: 0.1,Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,InCal
0,274252,1849-01-01,13.116,2.586,Anaheim,United States,32.95N,117.77W,True
1,274253,1849-02-01,12.656,1.882,Anaheim,United States,32.95N,117.77W,True
2,274254,1849-03-01,13.551,2.035,Anaheim,United States,32.95N,117.77W,True
3,274255,1849-04-01,14.146,2.805,Anaheim,United States,32.95N,117.77W,True
4,274256,1849-05-01,13.543,1.882,Anaheim,United States,32.95N,117.77W,True


In [7]:
cal_temps_trim = cal_temps.drop(labels=["Unnamed: 0", "AverageTemperatureUncertainty", "Country", "InCal"], axis=1)
cal_temps_trim.dropna(inplace=True)

cal_temps_trim["dt"] = pd.to_datetime(cal_temps_trim["dt"], format="%Y-%m-%d")
cal_temps_trim.head()

Unnamed: 0,dt,AverageTemperature,City,Latitude,Longitude
0,1849-01-01,13.116,Anaheim,32.95N,117.77W
1,1849-02-01,12.656,Anaheim,32.95N,117.77W
2,1849-03-01,13.551,Anaheim,32.95N,117.77W
3,1849-04-01,14.146,Anaheim,32.95N,117.77W
4,1849-05-01,13.543,Anaheim,32.95N,117.77W


### Combining Datasets

```
date = fire.date
for i in 12:
    fire["i_months_before"] = list of temperatures from cities with that month label
```

In [8]:
# Finds all entries in cal_temps_trim associated with date
def find_temps(date: pd.Timestamp):
    mod_date = pd.Timestamp(f"{date.year}-{date.month}-01")
    date_temps = cal_temps_trim.loc[cal_temps_trim["dt"] == mod_date]
    temps_list = []
    for i in date_temps.index:
        temps_list.append(date_temps["AverageTemperature"][i])
    
    return temps_list

# checking to make sure it works for all possible years
print(len(find_temps(pd.Timestamp("2010-10-02"))))
print(len(find_temps(pd.Timestamp("1910-10-02"))))
print(len(find_temps(pd.Timestamp("1849-10-02"))))

def get_dates_list(date):
    return pd.date_range(end=date, freq="M", periods=12)
print(find_temps(get_dates_list(pd.Timestamp("2010-10-02"))[0]))

61
61
61
[18.266, 15.938, 15.630999999999998, 15.938, 16.637999999999998, 18.266, 15.938, 18.266, 18.266, 16.637999999999998, 16.637999999999998, 18.266, 15.938, 16.723, 15.938, 15.630999999999998, 18.266, 18.266, 16.637999999999998, 15.938, 18.266, 16.637999999999998, 18.266, 16.637999999999998, 18.266, 16.637999999999998, 12.15, 16.723, 16.637999999999998, 15.938, 18.266, 16.723, 18.266, 16.637999999999998, 16.637999999999998, 16.46, 16.637999999999998, 16.637999999999998, 16.723, 16.723, 15.938, 16.723, 11.365, 15.938, 15.81, 16.723, 18.266, 15.938, 15.938, 18.266, 15.938, 16.637999999999998, 15.938, 16.637999999999998, 15.938, 15.938, 16.637999999999998, 16.637999999999998, 15.938, 15.630999999999998, 16.637999999999998]


```
for i in fire:
    date_list = get_dates_list(fire.date)
    for i in date_list:
        fire.i_months_before = find_temps(i)
```

In [9]:
'''for i in range(1, 13):
    calfires_trim[f"{i}_MONTHS_BEFORE"] = None '''

# calfires_trim = calfires_trim.drop([21138])

def assign_temperatures(fire):
    date = fire["ALARM_DATE"]
    try:
        for i in range(1, 13):
            fire[f"{i}_MONTHS_BEFORE"] = find_temps(get_dates_list(date)[i - 1])
    except:
        print(fire)
    return fire

cal_fire_temps = calfires_trim.apply(lambda row: assign_temperatures(row), axis=1)
cal_fire_temps


YEAR_                                                        2019
ALARM_DATE                                                    NaT
CAUSE                                                        16.0
GIS_ACRES                                                 2.29694
Shape_Length                                           935.160088
Shape_Area                                            9295.387634
geometry        MULTIPOLYGON (((213452.07079999894 -468768.277...
Name: 21138, dtype: object


Unnamed: 0,10_MONTHS_BEFORE,11_MONTHS_BEFORE,12_MONTHS_BEFORE,1_MONTHS_BEFORE,2_MONTHS_BEFORE,3_MONTHS_BEFORE,4_MONTHS_BEFORE,5_MONTHS_BEFORE,6_MONTHS_BEFORE,7_MONTHS_BEFORE,8_MONTHS_BEFORE,9_MONTHS_BEFORE,ALARM_DATE,CAUSE,GIS_ACRES,Shape_Area,Shape_Length,YEAR_,geometry
0,[],[],[],[],[],[],[],[],[],[],[],[],2020-06-18,11.0,109.602280,4.435447e+05,3252.523280,2020,"MULTIPOLYGON (((-116841.251 97942.565, -116836..."
1,[],[],[],[],[],[],[],[],[],[],[],[],2020-06-01,2.0,685.585022,2.774464e+06,9653.760308,2020,"MULTIPOLYGON (((-117328.400 90212.407, -117321..."
2,[],[],[],[],[],[],[],[],[],[],[],[],2020-08-10,14.0,27.300480,1.104811e+05,1649.643235,2020,"MULTIPOLYGON (((-115605.059 92988.787, -115585..."
3,[],[],[],[],[],[],[],[],[],[],[],[],2020-03-31,9.0,12.931545,5.233211e+04,1577.155857,2020,"MULTIPOLYGON (((-110213.270 105975.579, -11020..."
4,[],[],[],[],[],[],[],[],[],[],[],[],2020-04-14,18.0,10.315964,4.174722e+04,1035.787625,2020,"MULTIPOLYGON (((-111793.600 164243.615, -11177..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21679,[],[],[],[],[],[],[],[],[],[],[],[],2021-07-04,9.0,8.430901,3.414443e+04,1304.534325,2021,"MULTIPOLYGON (((233587.325 -414440.535, 233571..."
21681,[],[],[],"[19.929, 20.657, 28.456, 20.657, 25.09, 19.929...","[20.076, 19.731, 25.7, 19.731, 24.232000000000...","[21.391, 20.471, 24.186, 20.471, 23.301, 21.39...",[],[],[],[],[],[],2014-07-29,1.0,6256.270996,2.531823e+07,27936.642775,2014,"MULTIPOLYGON (((-237004.801 355976.823, -23690..."
21682,"[16.644000000000002, 16.555, 19.772, 16.555, 1...","[17.34, 18.482, 23.024, 18.482, 21.066, 17.34,...","[18.984, 19.632, 26.095, 19.632, 23.851, 18.98...","[20.003, 19.389, 25.898000000000003, 19.389, 2...","[19.904, 20.54, 24.503, 20.54, 22.728, 19.904,...","[18.692, 17.294999999999998, 17.80700000000000...","[15.455, 11.478, 10.014, 11.478, 11.643, 15.45...","[13.258, 9.118, 6.391, 9.118, 8.86799999999999...","[15.113, 9.993, 8.607999999999999, 9.993, 11.5...","[13.664, 11.116, 9.392, 11.116, 10.60900000000...","[13.669, 11.503, 11.264, 11.503, 11.576, 13.66...","[15.094, 14.15, 14.645, 14.15, 14.751, 15.094,...",2012-08-05,1.0,977.024963,3.953880e+06,8580.699931,2012,"MULTIPOLYGON (((-269609.565 442201.663, -26961..."
21684,"[14.675, 12.265, 12.083, 12.265, 13.466, 14.67...","[15.686, 13.468, 14.29, 13.468, 14.928, 15.686...","[16.094, 16.625999999999998, 18.791, 16.625999...","[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...","[19.936, 18.781, 21.254, 18.781, 20.896, 19.93...","[18.762, 15.439999999999998, 16.168, 15.439999...","[16.98, 13.278, 13.017, 13.278, 14.4, 16.98, 1...","[12.935, 8.318999999999999, 5.96, 8.3189999999...","[12.838, 8.026, 6.731, 8.026, 8.196, 12.838, 8...","[13.18, 10.449000000000002, 9.066, 10.44900000...",2008-06-21,1.0,267.612793,1.082991e+06,15869.974532,2008,"MULTIPOLYGON (((38690.174 -54105.145, 38708.27..."


In [10]:
cols = ["ALARM_DATE", "YEAR_", "CAUSE", "1_MONTHS_BEFORE", "2_MONTHS_BEFORE", "3_MONTHS_BEFORE", "4_MONTHS_BEFORE", "5_MONTHS_BEFORE", "6_MONTHS_BEFORE", "7_MONTHS_BEFORE", "8_MONTHS_BEFORE", "9_MONTHS_BEFORE", "10_MONTHS_BEFORE", "11_MONTHS_BEFORE", "12_MONTHS_BEFORE", "GIS_ACRES", "Shape_Area", "Shape_Length", "geometry"]
cal_fire_temps = cal_fire_temps[cols]
cal_fire_temps

Unnamed: 0,ALARM_DATE,YEAR_,CAUSE,1_MONTHS_BEFORE,2_MONTHS_BEFORE,3_MONTHS_BEFORE,4_MONTHS_BEFORE,5_MONTHS_BEFORE,6_MONTHS_BEFORE,7_MONTHS_BEFORE,8_MONTHS_BEFORE,9_MONTHS_BEFORE,10_MONTHS_BEFORE,11_MONTHS_BEFORE,12_MONTHS_BEFORE,GIS_ACRES,Shape_Area,Shape_Length,geometry
0,2020-06-18,2020,11.0,[],[],[],[],[],[],[],[],[],[],[],[],109.602280,4.435447e+05,3252.523280,"MULTIPOLYGON (((-116841.251 97942.565, -116836..."
1,2020-06-01,2020,2.0,[],[],[],[],[],[],[],[],[],[],[],[],685.585022,2.774464e+06,9653.760308,"MULTIPOLYGON (((-117328.400 90212.407, -117321..."
2,2020-08-10,2020,14.0,[],[],[],[],[],[],[],[],[],[],[],[],27.300480,1.104811e+05,1649.643235,"MULTIPOLYGON (((-115605.059 92988.787, -115585..."
3,2020-03-31,2020,9.0,[],[],[],[],[],[],[],[],[],[],[],[],12.931545,5.233211e+04,1577.155857,"MULTIPOLYGON (((-110213.270 105975.579, -11020..."
4,2020-04-14,2020,18.0,[],[],[],[],[],[],[],[],[],[],[],[],10.315964,4.174722e+04,1035.787625,"MULTIPOLYGON (((-111793.600 164243.615, -11177..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21679,2021-07-04,2021,9.0,[],[],[],[],[],[],[],[],[],[],[],[],8.430901,3.414443e+04,1304.534325,"MULTIPOLYGON (((233587.325 -414440.535, 233571..."
21681,2014-07-29,2014,1.0,"[19.929, 20.657, 28.456, 20.657, 25.09, 19.929...","[20.076, 19.731, 25.7, 19.731, 24.232000000000...","[21.391, 20.471, 24.186, 20.471, 23.301, 21.39...",[],[],[],[],[],[],[],[],[],6256.270996,2.531823e+07,27936.642775,"MULTIPOLYGON (((-237004.801 355976.823, -23690..."
21682,2012-08-05,2012,1.0,"[20.003, 19.389, 25.898000000000003, 19.389, 2...","[19.904, 20.54, 24.503, 20.54, 22.728, 19.904,...","[18.692, 17.294999999999998, 17.80700000000000...","[15.455, 11.478, 10.014, 11.478, 11.643, 15.45...","[13.258, 9.118, 6.391, 9.118, 8.86799999999999...","[15.113, 9.993, 8.607999999999999, 9.993, 11.5...","[13.664, 11.116, 9.392, 11.116, 10.60900000000...","[13.669, 11.503, 11.264, 11.503, 11.576, 13.66...","[15.094, 14.15, 14.645, 14.15, 14.751, 15.094,...","[16.644000000000002, 16.555, 19.772, 16.555, 1...","[17.34, 18.482, 23.024, 18.482, 21.066, 17.34,...","[18.984, 19.632, 26.095, 19.632, 23.851, 18.98...",977.024963,3.953880e+06,8580.699931,"MULTIPOLYGON (((-269609.565 442201.663, -26961..."
21684,2008-06-21,2008,1.0,"[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...","[19.936, 18.781, 21.254, 18.781, 20.896, 19.93...","[18.762, 15.439999999999998, 16.168, 15.439999...","[16.98, 13.278, 13.017, 13.278, 14.4, 16.98, 1...","[12.935, 8.318999999999999, 5.96, 8.3189999999...","[12.838, 8.026, 6.731, 8.026, 8.196, 12.838, 8...","[13.18, 10.449000000000002, 9.066, 10.44900000...","[14.675, 12.265, 12.083, 12.265, 13.466, 14.67...","[15.686, 13.468, 14.29, 13.468, 14.928, 15.686...","[16.094, 16.625999999999998, 18.791, 16.625999...",267.612793,1.082991e+06,15869.974532,"MULTIPOLYGON (((38690.174 -54105.145, 38708.27..."


In [11]:
null_years = []
null_indices = []

for i in cal_fire_temps.index:
    for j in range(1, 13):
        if cal_fire_temps[f"{j}_MONTHS_BEFORE"][i] == []:
            if not cal_fire_temps["YEAR_"][i] in null_years:
                null_years.append(cal_fire_temps["YEAR_"][i])
            if not i in null_indices:
                null_indices.append(i)

print(null_years)
print(len(null_indices))

['2020', '2016', '2014', '2013', '2015', '2017', '2018', '2019', '2021']
3115


In [12]:
cal_fire_temps = cal_fire_temps.drop(null_indices)
cal_fire_temps

Unnamed: 0,ALARM_DATE,YEAR_,CAUSE,1_MONTHS_BEFORE,2_MONTHS_BEFORE,3_MONTHS_BEFORE,4_MONTHS_BEFORE,5_MONTHS_BEFORE,6_MONTHS_BEFORE,7_MONTHS_BEFORE,8_MONTHS_BEFORE,9_MONTHS_BEFORE,10_MONTHS_BEFORE,11_MONTHS_BEFORE,12_MONTHS_BEFORE,GIS_ACRES,Shape_Area,Shape_Length,geometry
489,2007-10-21,2007,14.0,"[17.88, 15.861999999999998, 15.863, 15.8619999...","[17.609, 12.479, 11.754, 12.479, 14.687, 17.60...","[14.373, 9.202, 7.236000000000001, 9.202, 10.0...","[12.751, 8.165, 5.541, 8.165, 8.391, 12.751, 8...","[13.76, 10.718, 9.472, 10.718, 10.897, 13.76, ...","[15.414, 14.002, 14.239, 14.002, 14.809, 15.41...","[14.753, 14.117, 15.045, 14.117, 14.889, 14.75...","[16.518, 16.666, 20.099, 16.666, 18.107, 16.51...","[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...","[19.936, 18.781, 21.254, 18.781, 20.896, 19.93...",25.736713,1.041528e+05,1902.439051,"MULTIPOLYGON (((138036.906 -402646.363, 138086..."
490,2007-10-22,2007,14.0,"[17.88, 15.861999999999998, 15.863, 15.8619999...","[17.609, 12.479, 11.754, 12.479, 14.687, 17.60...","[14.373, 9.202, 7.236000000000001, 9.202, 10.0...","[12.751, 8.165, 5.541, 8.165, 8.391, 12.751, 8...","[13.76, 10.718, 9.472, 10.718, 10.897, 13.76, ...","[15.414, 14.002, 14.239, 14.002, 14.809, 15.41...","[14.753, 14.117, 15.045, 14.117, 14.889, 14.75...","[16.518, 16.666, 20.099, 16.666, 18.107, 16.51...","[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...","[19.936, 18.781, 21.254, 18.781, 20.896, 19.93...",2824.877197,1.143187e+07,20407.965662,"MULTIPOLYGON (((130072.487 -398622.842, 130094..."
491,2007-10-20,2007,2.0,"[17.88, 15.861999999999998, 15.863, 15.8619999...","[17.609, 12.479, 11.754, 12.479, 14.687, 17.60...","[14.373, 9.202, 7.236000000000001, 9.202, 10.0...","[12.751, 8.165, 5.541, 8.165, 8.391, 12.751, 8...","[13.76, 10.718, 9.472, 10.718, 10.897, 13.76, ...","[15.414, 14.002, 14.239, 14.002, 14.809, 15.41...","[14.753, 14.117, 15.045, 14.117, 14.889, 14.75...","[16.518, 16.666, 20.099, 16.666, 18.107, 16.51...","[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...","[19.936, 18.781, 21.254, 18.781, 20.896, 19.93...",58410.335938,2.363782e+08,169150.715690,"MULTIPOLYGON (((114013.974 -379231.746, 114190..."
492,2007-09-11,2007,14.0,"[20.645, 18.949, 22.739, 18.949, 22.201, 20.64...","[17.88, 15.861999999999998, 15.863, 15.8619999...","[17.609, 12.479, 11.754, 12.479, 14.687, 17.60...","[14.373, 9.202, 7.236000000000001, 9.202, 10.0...","[12.751, 8.165, 5.541, 8.165, 8.391, 12.751, 8...","[13.76, 10.718, 9.472, 10.718, 10.897, 13.76, ...","[15.414, 14.002, 14.239, 14.002, 14.809, 15.41...","[14.753, 14.117, 15.045, 14.117, 14.889, 14.75...","[16.518, 16.666, 20.099, 16.666, 18.107, 16.51...","[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...",172.214951,6.969292e+05,6117.777086,"MULTIPOLYGON (((176902.236 -388673.082, 176907..."
493,2007-11-24,2007,14.0,"[17.609, 12.479, 11.754, 12.479, 14.687, 17.60...","[14.373, 9.202, 7.236000000000001, 9.202, 10.0...","[12.751, 8.165, 5.541, 8.165, 8.391, 12.751, 8...","[13.76, 10.718, 9.472, 10.718, 10.897, 13.76, ...","[15.414, 14.002, 14.239, 14.002, 14.809, 15.41...","[14.753, 14.117, 15.045, 14.117, 14.889, 14.75...","[16.518, 16.666, 20.099, 16.666, 18.107, 16.51...","[17.834, 18.829, 23.84800000000001, 18.829, 21...","[20.307, 20.325, 26.785, 20.325, 25.265, 20.30...","[21.683000000000003, 20.601, 26.483, 20.601, 2...","[19.936, 18.781, 21.254, 18.781, 20.896, 19.93...","[18.762, 15.439999999999998, 16.168, 15.439999...",4707.997070,1.905259e+07,22907.182174,"MULTIPOLYGON (((115905.006 -436381.137, 115926..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21274,2009-05-09,2009,2.0,"[16.094, 16.625999999999998, 18.791, 16.625999...","[19.447000000000006, 19.473, 24.153, 19.473, 2...","[20.075, 20.335, 27.228, 20.335, 25.032, 20.07...","[21.066, 20.797, 27.174, 20.797, 25.2320000000...","[21.199, 20.014, 24.06, 20.014, 23.37, 21.199,...","[20.638, 17.351, 18.062, 17.351, 19.398, 20.63...","[18.488, 13.813, 12.864999999999998, 13.813, 1...","[13.147, 8.017000000000001, 6.107, 8.017000000...","[15.767, 10.129, 8.587, 10.129, 11.882, 15.767...","[13.298, 10.359000000000002, 8.850999999999997...","[13.963, 12.058, 11.344, 12.058, 11.996, 13.96...","[14.861, 13.786, 14.201, 13.786, 13.968, 14.86...",6.031826,2.440993e+04,753.392432,"MULTIPOLYGON (((130420.164 -429839.949, 130429..."
21275,2013-05-02,2013,14.0,"[16.644000000000002, 16.555, 19.772, 16.555, 1...","[17.34, 18.482, 23.024, 18.482, 21.066, 17.34,...","[18.984, 19.632, 26.095, 19.632, 23.851, 18.98...","[22.415, 20.531, 28.076, 20.531, 26.8920000000...","[22.852, 19.045, 25.108, 19.045, 24.745, 22.85...","[19.892, 17.294999999999998, 17.93099999999999...","[17.245, 13.367, 12.477, 13.367, 13.903, 17.24...","[13.531, 8.95, 7.395, 8.95, 8.712, 13.531, 8.9...","[13.404000000000002, 8.32, 6.501, 8.32, 8.8129...","[12.835, 10.229, 8.346, 10.229, 9.804, 12.835,...","[15.344, 13.505999999999998, 13.748, 13.505999...","[15.380999999999998, 15.996, 16.457, 15.996, 1...",23676.656250,9.581602e+07,137799.335386,"MULTIPOLYGON (((90209.824 -428920.708, 90181.6..."
21646,1933-10-03,1933,7.0,"[17.532, 16.099999999999998, 16.511, 16.099999...","[18.003, 14.157, 13.277, 14.157, 15.33, 18.003...","[12.052, 6.25, 4.373, 6.25, 7.105, 12.052, 6.2...","[11.380999999999998, 6.374, 3.937, 6.374, 6.98...","[11.494000000000002, 8.582, 6.553999999999999,...","[13.349, 12.066, 10.939, 12.066, 11.737, 13.34...","[13.178, 13.152, 13.592, 13.152, 12.899, 13.17...","[13.214, 13.663, 14.890999999999998, 13.663, 1...","[15.778, 16.759, 21.362, 16.759, 19.182, 15.77...","[18.445, 19.955, 27.705, 19.955, 24.506, 18.44...","[19.338, 19.582, 26.231, 19.582, 23.84, 19.338...","[17.413, 17.631, 20.92, 17.631, 19.573, 17.413...",47.111866,1.906550e+05,3156.528595,"MULTIPOLYGON (((157155.388 -429058.944, 157154..."
21682,2012-08-05,2012,1.0,"[20.003, 19.389, 25.898000000000003, 19.389, 2...","[19.904, 20.54, 24.503, 20.54, 22.728, 19.904,...","[18.692, 17.294999999999998, 17.80700000000000...","[15.455, 11.478, 10.014, 11.478, 11.643, 15.45...","[13.258, 9.118, 6.391, 9.118, 8.86799999999999...","[15.113, 9.993, 8.607999999999999, 9.993, 11.5...","[13.664, 11.116, 9.392, 11.116, 10.60900000000...","[13.669, 11.503, 11.264, 11.503, 11.576, 13.66...","[15.094, 14.15, 14.645, 14.15, 14.751, 15.094,...","[16.644000000000002, 16.555, 19.772, 16.555, 1...","[17.34, 18.482, 23.024, 18.482, 21.066, 17.34,...","[18.984, 19.632, 26.095, 19.632, 23.851, 18.98...",977.024963,3.953880e+06,8580.699931,"MULTIPOLYGON (((-269609.565 442201.663, -26961..."


seeing if random state ensures identical results across files and runs

In [13]:
fire_X = cal_fire_temps.loc[cal_fire_temps.index, ["ALARM_DATE", "CAUSE", "YEAR_", "1_MONTHS_BEFORE", "2_MONTHS_BEFORE", "3_MONTHS_BEFORE", "4_MONTHS_BEFORE", "5_MONTHS_BEFORE", "6_MONTHS_BEFORE", "7_MONTHS_BEFORE", "8_MONTHS_BEFORE", "9_MONTHS_BEFORE", "10_MONTHS_BEFORE", "11_MONTHS_BEFORE", "12_MONTHS_BEFORE"]]
fire_y = cal_fire_temps.loc[cal_fire_temps.index, ["geometry"]]
fire_train_1, fire_test = train_test_split(fire_X, train_size=.8, random_state=1)
print(fire_train_1.head())
fire_train_2, fire_test2 = train_test_split(fire_X, train_size=.8, random_state=1)
print(fire_train_2.head())

      ALARM_DATE  CAUSE YEAR_  \
14280 1995-09-10    4.0  1995   
17699 2012-08-13    1.0  2012   
3867  1912-02-01   14.0  1912   
18814 1979-07-17    1.0  1979   
13335 1984-06-24    7.0  1984   

                                         1_MONTHS_BEFORE  \
14280  [20.683000000000003, 19.103, 22.402, 19.103, 2...   
17699  [20.003, 19.389, 25.898000000000003, 19.389, 2...   
3867   [11.225, 8.124, 6.598999999999998, 8.124, 7.94...   
18814  [19.088, 19.834, 25.951, 19.834, 23.811, 19.08...   
13335  [16.782, 18.536, 22.206000000000003, 18.536, 1...   

                                         2_MONTHS_BEFORE  \
14280  [18.148, 16.072, 16.084999999999994, 16.072, 1...   
17699  [19.904, 20.54, 24.503, 20.54, 22.728, 19.904,...   
3867   [14.268, 12.435, 12.151, 12.435, 12.3459999999...   
18814  [19.371, 20.242, 25.447, 20.242, 23.416, 19.37...   
13335  [19.09, 19.826, 23.94, 19.826, 23.01, 19.09, 1...   

                                         3_MONTHS_BEFORE  \
14280  [13.79, 9.22