# GFS data

In [3]:
import os
REP_DIR = "/home/cagraff/Documents/dev/fire_prediction/"
os.chdir(REP_DIR)
from data import data
import numpy as np
from tabulate import tabulate
from util.daymonth import monthday2day, day2monthday

In [4]:
# Load data
gfs = data.load_gfs_dict()
gfs.keys()

['lats',
 'temp',
 'days',
 'rain',
 'humidity',
 'vpd',
 'lons',
 'valid_bits',
 'wind']

In [46]:
temp = gfs['temp']
days = gfs['days']

print np.shape(temp)
print np.shape(days)

(30, 52, 3429)
(3429, 3)


## Number of missing days

In [7]:
days_per_year = 365
years = range(2007, 2017)
num_years = len(years)

def is_leap_year(year):
    return year % 4 == 0

# Missing per year
missing = []
for year in years:
    sel = [x for x in days if x[0]==year]
    
    days_present = len(sel)
    days_missing = (days_per_year + is_leap_year(year)) - days_present
    
    
    missing.append((year, days_present, days_missing))

# Missing total
missing.append(('Total', sum([x[1] for x in missing]), sum([x[2] for x in missing])))
print tabulate(missing, headers=['Year', 'Present', 'Missing'])

  

Year      Present    Missing
------  ---------  ---------
2007          280         85
2008          366          0
2009          365          0
2010          363          2
2011          335         30
2012          345         21
2013          363          2
2014          357          8
2015          325         40
2016          330         36
Total        3429        224


## Number of missing days (in fire season)

In [8]:
fire_season = (133,242)
print 'Fire Season:', day2monthday(133), 'to', day2monthday(242), '\n'
days_per_season = fire_season[1] - fire_season[0] + 1

# Missing per year
missing = []
for year in years:
    sel = [x for x in days if x[0]==year and (monthday2day(x[1], x[2], is_leap_year(x[0])) in range(fire_season[0], fire_season[1]+1))]
    
    days_present = len(sel)
    days_missing = days_per_season - days_present
    
    
    missing.append((year, days_present, days_missing))

# Missing total
missing.append(('Total', sum([x[1] for x in missing]), sum([x[2] for x in missing])))
print tabulate(missing, headers=['Year', 'Present', 'Missing'])


Fire Season: (5, 14) to (8, 31) 

Year      Present    Missing
------  ---------  ---------
2007          103          7
2008          110          0
2009          110          0
2010          110          0
2011           86         24
2012           95         15
2013          108          2
2014          108          2
2015          101          9
2016           88         22
Total        1019         81


## Number of missing days per month

In [38]:
months = range(1, 13)

MONTH_IND = 0
PRESENT_IND = 1
MISSING_IND = 2

def days_per_month(month, is_leap):
    if is_leap:
        month_arr = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    else:
        month_arr = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    return month_arr[month-1]

missing = np.zeros((12,3), dtype=np.int32)
missing[:, MONTH_IND] = months

# Missing per month
for year in years:
    for month in months:
        sel = [x for x in days if x[0]==year and x[1]==month]

        days_present = len(sel)
        days_missing = days_per_month(month, is_leap_year(year)) - days_present


        missing[month-1,PRESENT_IND] += days_present
        missing[month-1,MISSING_IND] += days_missing
        



# Missing total
present_total = np.sum(missing[:, PRESENT_IND])
missing_total = np.sum(missing[:, MISSING_IND])
missing = list(missing)
missing.append(['Total', present_total, missing_total])

print tabulate(missing, headers=['Month', 'Present', 'Missing'])


Month      Present    Missing
-------  ---------  ---------
1              280         30
2              260         23
3              285         25
4              281         19
5              299         11
6              274         26
7              288         22
8              287         23
9              283         17
10             305          5
11             295          5
12             292         18
Total         3429        224


## List of Missing Days

In [51]:
def generate_month(year, month):
    days = range(1, days_per_month(month, is_leap_year(year))+1)
    return [(year, month, d) for d in days]

missing_days = []

for year in years:
    for month in months:
        days_in_month = days_per_month(month, is_leap_year(year))
        sel = [x for x in days if x[0]==year and x[1]==month]
        if len(sel) < days_in_month:
            missing_days += set(generate_month(year, month)).difference(set(sel))
            
missing_days.sort()
print len(missing_days)
print missing_days

224
[(2007, 1, 7), (2007, 1, 8), (2007, 1, 9), (2007, 1, 10), (2007, 1, 11), (2007, 1, 12), (2007, 1, 13), (2007, 1, 14), (2007, 1, 15), (2007, 1, 16), (2007, 1, 17), (2007, 1, 18), (2007, 1, 19), (2007, 1, 20), (2007, 1, 21), (2007, 1, 22), (2007, 1, 23), (2007, 1, 24), (2007, 1, 25), (2007, 1, 26), (2007, 1, 27), (2007, 1, 28), (2007, 1, 29), (2007, 1, 30), (2007, 1, 31), (2007, 2, 7), (2007, 2, 8), (2007, 2, 9), (2007, 2, 10), (2007, 2, 11), (2007, 2, 12), (2007, 2, 13), (2007, 2, 14), (2007, 2, 15), (2007, 2, 16), (2007, 2, 17), (2007, 2, 18), (2007, 2, 19), (2007, 2, 20), (2007, 2, 21), (2007, 2, 22), (2007, 2, 23), (2007, 2, 24), (2007, 2, 25), (2007, 2, 26), (2007, 2, 27), (2007, 2, 28), (2007, 3, 3), (2007, 3, 4), (2007, 3, 9), (2007, 3, 10), (2007, 3, 11), (2007, 3, 16), (2007, 3, 17), (2007, 3, 20), (2007, 4, 13), (2007, 4, 16), (2007, 4, 29), (2007, 6, 12), (2007, 6, 21), (2007, 6, 29), (2007, 7, 10), (2007, 7, 14), (2007, 7, 22), (2007, 8, 11), (2007, 9, 15), (2007, 9, 16),