In [1]:
import pickle
import os
import pandas as pd

In [2]:
os.chdir('/Users/sallamander/galvanize/forest-fires/data/pickled_data/MODIS/')

In [3]:
with open('df_2001.pkl') as f: 
    df_2001 = pickle.load(f)

In [4]:
df_2001.sort(['year', 'month', 'day', 'LAT', 'LONG'], inplace=True)

In [5]:
lat_long_df = pd.DataFrame(df_2001.groupby(['LAT', 'LONG']).count()['AREA']).reset_index().rename(columns={'AREA': 'COUNT'})

The lat_long_df now holds a count of the number of times a lat, long pair ends up in our dataframe. I need to figure out what this means - is it a fire that occurs over several days, are there just mistakenly fires that are in the datframe multiple times, or what?

In [6]:
# Check shape before merging to make sure it remains the same. 
df_2001.shape

(29573, 17)

In [7]:
lat_long_2001_df = df_2001.merge(lat_long_df, on=['LAT', 'LONG'])

In [8]:
lat_long_2001_df.shape

(29573, 18)

In [9]:
lat_long_2001_df.head(5)

Unnamed: 0,AREA,PERIMETER,FIRE_,FIRE_ID,LAT,LONG,TEMP,SPIX,TPIX,GMT,SAT_SRC,FRP,CONF,JULIAN,year,month,day,COUNT
0,0.0,0.0,29173,4735,26.479,-80.63,327.8,1.6,1.2,1601,T,64.7,81,1,2001,1,1,1
1,0.0,0.0,29168,4734,26.481,-80.646,344.9,1.6,1.2,1601,T,120.6,93,1,2001,1,1,1
2,0.0,0.0,28768,11545,26.847,-80.483,313.2,1.8,1.3,1644,T,36.2,57,2,2001,1,2,1
3,0.0,0.0,28762,11544,26.85,-80.501,311.5,1.8,1.3,1644,T,31.9,49,2,2001,1,2,1
4,0.0,0.0,28749,11548,26.861,-81.121,311.4,1.6,1.3,1644,T,30.4,49,2,2001,1,2,1


In [10]:
lat_long_2001_df.query('COUNT >=4')

Unnamed: 0,AREA,PERIMETER,FIRE_,FIRE_ID,LAT,LONG,TEMP,SPIX,TPIX,GMT,SAT_SRC,FRP,CONF,JULIAN,year,month,day,COUNT
2960,0.0,0.0,6061,341731,42.68,-111.594,309.0,1.0,1.0,529,T,25.5,64,90,2001,3,31,4
2961,0.0,0.0,6058,785481,42.68,-111.594,314.4,1.0,1.0,526,T,23.6,85,218,2001,8,6,4
2962,0.0,0.0,6059,1287414,42.68,-111.594,327.4,1.3,1.1,549,T,67.4,100,294,2001,10,21,4
2963,0.0,0.0,6060,1310972,42.68,-111.594,306.8,1.3,1.2,511,T,32.6,49,300,2001,10,27,4
3020,0.0,0.0,21014,345601,32.703,-108.122,322.7,1.4,1.2,514,T,55.1,100,92,2001,4,2,4
3021,0.0,0.0,21015,387153,32.703,-108.122,323.8,1.0,1.0,1813,T,9.4,67,107,2001,4,17,4
3022,0.0,0.0,21013,785459,32.703,-108.122,313.6,1.0,1.0,524,T,18.1,83,218,2001,8,6,4
3023,0.0,0.0,21012,1412149,32.703,-108.122,311.6,1.4,1.2,1748,T,21.9,50,326,2001,11,22,4
3958,0.0,0.0,10990,365476,38.695,-90.132,321.0,1.2,1.1,1721,T,15.0,40,99,2001,4,9,4
3959,0.0,0.0,10993,433695,38.695,-90.132,310.2,1.0,1.0,413,T,16.5,70,118,2001,4,28,4


From the above, it looks like it might be hard to follow fires across time, at least if we assume that the fires are 
occuring at the same latitude/longitude throughout their lifespan(which is probably an incredibly simplistic and 
incorrect assumption). Let's try seeing if we restrict fires to +/- one degree what happens. I'll focus on the lat/long coordinates from the fires above, since these combos of lat/long coordinates show up 4 times a year. 

In [11]:
lat_long_df['LAT'] = lat_long_df['LAT'].astype(float)
lat_long_df['LONG'] = lat_long_df['LONG'].astype(float)

In [12]:
type(lat_long_df['LAT'][0])

numpy.float64

In [13]:
lat_long_2001_df.query('LAT > 42.18 & LAT < 43.18 & LONG < -111.094 & LONG > -112.094').sort(['year', 'month', 'day'])

Unnamed: 0,AREA,PERIMETER,FIRE_,FIRE_ID,LAT,LONG,TEMP,SPIX,TPIX,GMT,SAT_SRC,FRP,CONF,JULIAN,year,month,day,COUNT
1878,0.000000E+000,0.000000E+000,6020,292092,42.684,-111.588,308.3,1.2,1.1,523,T,29.8,60,67,2001,3,8,3
2061,0.000000E+000,0.000000E+000,6085,303930,42.678,-111.597,307.1,1.0,1.0,542,T,23.1,52,72,2001,3,13,2
2960,0.000000E+000,0.000000E+000,6061,341731,42.680,-111.594,309.0,1.0,1.0,529,T,25.5,64,90,2001,3,31,4
1879,0.000000E+000,0.000000E+000,6018,347057,42.684,-111.588,313.9,1.3,1.1,1853,T,23.2,60,92,2001,4,2,3
5267,0.000000E+000,0.000000E+000,6092,410495,42.677,-111.600,306.3,1.0,1.0,535,T,20.4,44,113,2001,4,23,1
5268,0.000000E+000,0.000000E+000,6079,410494,42.679,-111.588,318.2,1.0,1.0,535,T,32.6,95,113,2001,4,23,1
5467,0.000000E+000,0.000000E+000,6385,422545,42.294,-111.332,320.3,1.6,1.2,1859,T,23.3,72,115,2001,4,25,1
5468,0.000000E+000,0.000000E+000,6376,422546,42.295,-111.338,326.0,1.6,1.2,1859,T,36.3,79,115,2001,4,25,1
5469,0.000000E+000,0.000000E+000,6353,422544,42.298,-111.351,319.4,1.6,1.2,1859,T,20.3,71,115,2001,4,25,1
5471,0.000000E+000,0.000000E+000,6112,422542,42.674,-111.587,315.5,1.5,1.2,1859,T,16.5,48,115,2001,4,25,1


In [14]:
lat_long_2001_df.query('LAT > 42.679 & LAT < 42.681 & LONG < -111.593 & LONG > -111.595').sort(['year', 'month', 'day'])

Unnamed: 0,AREA,PERIMETER,FIRE_,FIRE_ID,LAT,LONG,TEMP,SPIX,TPIX,GMT,SAT_SRC,FRP,CONF,JULIAN,year,month,day,COUNT
1878,0.000000E+000,0.000000E+000,6020,292092,42.684,-111.588,308.3,1.2,1.1,523,T,29.8,60,67,2001,3,8,3
2061,0.000000E+000,0.000000E+000,6085,303930,42.678,-111.597,307.1,1.0,1.0,542,T,23.1,52,72,2001,3,13,2
2960,0.000000E+000,0.000000E+000,6061,341731,42.680,-111.594,309.0,1.0,1.0,529,T,25.5,64,90,2001,3,31,4
1879,0.000000E+000,0.000000E+000,6018,347057,42.684,-111.588,313.9,1.3,1.1,1853,T,23.2,60,92,2001,4,2,3
5268,0.000000E+000,0.000000E+000,6079,410494,42.679,-111.588,318.2,1.0,1.0,535,T,32.6,95,113,2001,4,23,1
5471,0.000000E+000,0.000000E+000,6112,422542,42.674,-111.587,315.5,1.5,1.2,1859,T,16.5,48,115,2001,4,25,1
5846,0.000000E+000,0.000000E+000,6103,432508,42.676,-111.592,332.7,1.1,1.1,1847,T,35.4,85,117,2001,4,27,1
6099,0.000000E+000,0.000000E+000,6108,439149,42.675,-111.589,319.6,1.0,1.0,1835,T,12.1,53,119,2001,4,29,1
6340,0.000000E+000,0.000000E+000,6011,448833,42.684,-111.599,317.9,1.8,1.3,1905,T,42.6,64,122,2001,5,2,1
6621,0.000000E+000,0.000000E+000,6080,462899,42.679,-111.587,320.9,1.0,1.0,1840,T,14.5,64,126,2001,5,6,1


So it looks like the basis of this project will be some kind of grouping algorithm. If we go out a half degree in the longitude/latitude direction, it looks like we have some groupings of fires. Let's checkout 2015 though and see what that looks like. 

In [15]:
os.chdir('/Users/sallamander/galvanize/forest-fires/data/pickled_data/MODIS/')

In [16]:
with open('df_2015.pkl') as f: 
    df_2015 = pickle.load(f)

In [17]:
lat_long_df = pd.DataFrame(df_2015.groupby(['LAT', 'LONG']).count()['AREA']).reset_index().rename(columns={'AREA': 'COUNT'})

In [18]:
lat_long_2015_df = df_2015.merge(lat_long_df, on=['LAT', 'LONG'])

In [19]:
lat_long_2015_df['COUNT'].max()

12

In [20]:
lat_long_2015_df.query('COUNT >=10').sort(['LAT', 'LONG', 'year', 'month', 'day'])

Unnamed: 0,AREA,PERIMETER,FIRE_,FIRE_ID,LAT,LONG,JULIAN,GMT,TEMP,SPIX,TPIX,SAT_SRC,CONF,FRP,year,month,day,COUNT
100061,0.0000000000000E+000,0.0000000000000E+000,100062,113496,3.341800E+001,-1.10866E+002,81,2031,323.3,1.0,1.0,A,37,11.9,2015,3,22,10
100065,0.0000000000000E+000,0.0000000000000E+000,100066,544572,3.341800E+001,-1.10866E+002,81,2031,323.3,1.0,1.0,A,37,11.9,2015,3,22,10
100056,0.0000000000000E+000,0.0000000000000E+000,100057,915021,3.341800E+001,-1.10866E+002,216,520,325.5,1.1,1.0,T,100,24.4,2015,8,4,10
100063,0.0000000000000E+000,0.0000000000000E+000,100064,482505,3.341800E+001,-1.10866E+002,216,520,325.5,1.1,1.0,T,100,24.4,2015,8,4,10
100057,0.0000000000000E+000,0.0000000000000E+000,100058,920971,3.341800E+001,-1.10866E+002,221,538,311.1,1.1,1.1,T,82,10.5,2015,8,9,10
100058,0.0000000000000E+000,0.0000000000000E+000,100059,920970,3.341800E+001,-1.10866E+002,221,534,311.1,1.1,1.1,T,74,15.0,2015,8,9,10
100062,0.0000000000000E+000,0.0000000000000E+000,100063,488455,3.341800E+001,-1.10866E+002,221,538,311.1,1.1,1.1,T,82,10.5,2015,8,9,10
100064,0.0000000000000E+000,0.0000000000000E+000,100065,488454,3.341800E+001,-1.10866E+002,221,534,311.1,1.1,1.1,T,74,15.0,2015,8,9,10
100059,0.0000000000000E+000,0.0000000000000E+000,100060,35823,3.341800E+001,-1.10866E+002,228,544,320.6,1.3,1.1,T,100,23.5,2015,8,16,10
100060,0.0000000000000E+000,0.0000000000000E+000,100061,35822,3.341800E+001,-1.10866E+002,228,540,320.5,1.3,1.1,T,100,31.4,2015,8,16,10


In [21]:
lat_long_2015_df.query('COUNT >=3 & COUNT <= 5').sort(['LAT', 'LONG', 'year', 'month', 'day'])

Unnamed: 0,AREA,PERIMETER,FIRE_,FIRE_ID,LAT,LONG,JULIAN,GMT,TEMP,SPIX,TPIX,SAT_SRC,CONF,FRP,year,month,day,COUNT
140049,0.0000000000000E+000,0.0000000000000E+000,140050,4356,2.596500E+001,-9.79430E+001,226,1935,335.6,1.3,1.1,A,87,19.2,2015,8,14,3
140050,0.0000000000000E+000,0.0000000000000E+000,140051,4357,2.596500E+001,-9.79430E+001,226,1937,335.5,1.3,1.1,A,79,16.6,2015,8,14,3
140051,0.0000000000000E+000,0.0000000000000E+000,140052,4355,2.596500E+001,-9.79430E+001,226,1931,335.4,1.3,1.1,A,87,19.1,2015,8,14,3
140008,0.0000000000000E+000,0.0000000000000E+000,140009,38846,2.600000E+001,-9.83110E+001,229,449,309.4,1.0,1.0,T,72,6.9,2015,8,17,3
140009,0.0000000000000E+000,0.0000000000000E+000,140010,38845,2.600000E+001,-9.83110E+001,229,444,309.4,1.0,1.0,T,66,9.8,2015,8,17,3
140010,0.0000000000000E+000,0.0000000000000E+000,140011,38844,2.600000E+001,-9.83110E+001,229,450,309.4,1.0,1.0,T,66,9.9,2015,8,17,3
139795,0.0000000000000E+000,0.0000000000000E+000,139796,96243,2.605400E+001,-8.07780E+001,41,1803,324.8,3.3,1.7,A,78,110.2,2015,2,10,4
139797,0.0000000000000E+000,0.0000000000000E+000,139798,527319,2.605400E+001,-8.07780E+001,41,1803,324.8,3.3,1.7,A,78,110.2,2015,2,10,4
139796,0.0000000000000E+000,0.0000000000000E+000,139797,101421,2.605400E+001,-8.07780E+001,51,1836,313.4,1.0,1.0,A,38,13.5,2015,2,20,4
139798,0.0000000000000E+000,0.0000000000000E+000,139799,532497,2.605400E+001,-8.07780E+001,51,1836,313.4,1.0,1.0,A,38,13.5,2015,2,20,4
