# GFS data

In [None]:
import os
REP_DIR = "/home/cagraff/Documents/dev/fire_prediction/"
os.chdir(REP_DIR)
from data import data
import numpy as np
from tabulate import tabulate
from util.daymonth import monthday2day, day2monthday

In [None]:
# Load data
gfs = data.load_gfs_dict()
gfs.keys()

In [None]:
temp = gfs['temp']
days = gfs['days']

print np.shape(temp)
print np.shape(days)

## Number of missing days

In [None]:
days_per_year = 365
years = range(2007, 2017)
num_years = len(years)

def is_leap_year(year):
    return year % 4 == 0

# Missing per year
missing = []
for year in years:
    sel = [x for x in days if x[0]==year]
    
    days_present = len(sel)
    days_missing = (days_per_year + is_leap_year(year)) - days_present
    
    
    missing.append((year, days_present, days_missing))

# Missing total
missing.append(('Total', sum([x[1] for x in missing]), sum([x[2] for x in missing])))
print tabulate(missing, headers=['Year', 'Present', 'Missing'])

  

## Number of missing days (in fire season)

In [None]:
fire_season = (133,242)
print 'Fire Season:', day2monthday(133), 'to', day2monthday(242), '\n'
days_per_season = fire_season[1] - fire_season[0] + 1

# Missing per year
missing = []
for year in years:
    sel = [x for x in days if x[0]==year and (monthday2day(x[1], x[2], is_leap_year(x[0])) in range(fire_season[0], fire_season[1]+1))]
    
    days_present = len(sel)
    days_missing = days_per_season - days_present
    
    
    missing.append((year, days_present, days_missing))

# Missing total
missing.append(('Total', sum([x[1] for x in missing]), sum([x[2] for x in missing])))
print tabulate(missing, headers=['Year', 'Present', 'Missing'])


## Number of missing days per month

In [None]:
months = range(1, 13)

MONTH_IND = 0
PRESENT_IND = 1
MISSING_IND = 2

def days_per_month(month, is_leap):
    if is_leap:
        month_arr = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    else:
        month_arr = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    return month_arr[month-1]

missing = np.zeros((12,3), dtype=np.int32)
missing[:, MONTH_IND] = months

# Missing per month
for year in years:
    for month in months:
        sel = [x for x in days if x[0]==year and x[1]==month]

        days_present = len(sel)
        days_missing = days_per_month(month, is_leap_year(year)) - days_present


        missing[month-1,PRESENT_IND] += days_present
        missing[month-1,MISSING_IND] += days_missing
        



# Missing total
present_total = np.sum(missing[:, PRESENT_IND])
missing_total = np.sum(missing[:, MISSING_IND])
missing = list(missing)
missing.append(['Total', present_total, missing_total])

print tabulate(missing, headers=['Month', 'Present', 'Missing'])


## List of Missing Days

In [None]:
def generate_month(year, month):
    days = range(1, days_per_month(month, is_leap_year(year))+1)
    return [(year, month, d) for d in days]

missing_days = []

for year in years:
    for month in months:
        days_in_month = days_per_month(month, is_leap_year(year))
        sel = [x for x in days if x[0]==year and x[1]==month]
        if len(sel) < days_in_month:
            missing_days += set(generate_month(year, month)).difference(set(sel))
            
missing_days.sort()
print len(missing_days)
print missing_days