In [None]:
import array, csv, math, os, time
from datetime import timedelta, date, datetime

def DateRange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

def LngLatToWebMercator(lnglat, scale = 1.):
    (lng, lat) = lnglat
    x = (lng + 180.0) * 256.0 / 360.0
    y = 128.0 - math.log(math.tan((lat + 90.0) * math.pi / 360.0)) * 128.0 / math.pi
    return [x*scale, y*scale]


def FormatFilename(year_str, date_str):
    return "%s/VNF_npp_d%s_noaa_v21.ez.csv" % (year_str, date_str)

def ProcessFile(filename):
    data = []
    if os.path.exists(filename):
        with open(filename) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                qf_fit = int(row['QF_Fit'])
                temp_bb = int(row['Temp_BB'])        
                if qf_fit >= 34 and temp_bb >= 500 and temp_bb <= 3000 and temp_bb != 1800:
                    lat = float(row['Lat_GMTCO'])
                    lng = float(row['Lon_GMTCO'])
                    x,y = LngLatToWebMercator((lng, lat))            
                    date_mscan = row['Date_Mscan'].split(".")[0]
                    epoch = time.mktime(time.strptime(date_mscan, "%Y/%m/%d %H:%M:%S"))
                    data += [x,y,epoch]
    return data

def CreateIndex(points):
    index = {}
    for epoch in points[2::3]:
        d = datetime.fromtimestamp(epoch)
        idx = str(d.year) + str(d.month).zfill(2)
        if idx not in index:
            index[idx] = {'count': 0}
        index[idx]['count'] += 1

    first = 0
    for idx in sorted(index.keys()):
        index[idx]['first'] = first
        first += index[idx]['count']
    return index

In [None]:
# Unzip all the years data files
for year in range(2012,2018):
    cmd = "gunzip %s/*" % year
    !$cmd


In [None]:
# Process a single year
points = []
start_date = date(2016, 1, 1)
end_date = date(2017, 1, 1)
for single_date in DateRange(start_date, end_date):
    year_str = single_date.strftime("%Y")
    date_str = single_date.strftime("%Y%m%d")
    filename = FormatFilename(year_str, date_str)
    points += ProcessFile(filename)
array.array('f', points).tofile(open("viirs-2016.bin", 'w'))


In [None]:
# Create monthly slices of the data
points = []
start_date = date(2012, 3, 1)
end_date = date(2017, 4, 1)
file_name = "2012_03"
for single_date in DateRange(start_date, end_date):
    year_str = single_date.strftime("%Y")
    month_str = single_date.strftime("%m")
    date_str = single_date.strftime("%Y%m%d")
    if file_name != "%s_%s" % (year_str, month_str):
        array.array('f', points).tofile(open("viirs-%s.bin" % file_name, 'w'))
        points = []
        file_name = "%s_%s" % (year_str, month_str)
    filename = FormatFilename(year_str, date_str)
    points += ProcessFile(filename)
array.array('f', points).tofile(open("viirs-%s.bin" % file_name, 'w'))

In [None]:
# Create yearly
points = []
start_date = date(2012, 3, 1)
end_date = date(2017, 4, 1)
for single_date in DateRange(start_date, end_date):
    year_str = single_date.strftime("%Y")
    date_str = single_date.strftime("%Y%m%d")
    filename = FormatFilename(year_str, date_str)
    points += ProcessFile(filename)
array.array('f', points).tofile(open("viirs_2012-2017.bin", 'w'))


In [None]:
# Produce timestamps for GSS
dates = []
start_date = date(2012, 3, 1)
end_date = date(2017, 4, 1)
for single_date in DateRange(start_date, end_date):
    dates.append('"' + single_date.strftime("%Y-%m-%d") + '"')
",".join(dates)

In [None]:
# Create yearly
points = []
start_date = date(2014, 8, 17)
end_date = date(2017, 10, 10)
for single_date in DateRange(start_date, end_date):
    year_str = single_date.strftime("%Y")
    date_str = single_date.strftime("%Y%m%d")
    filename = FormatFilename(year_str, date_str)
    points += ProcessFile(filename)
array.array('f', points).tofile(open("viirs_20140817-20170917.bin", 'w'))


In [None]:
# Produce indexes timestamps for GSS
index = CreateIndex(points)

In [None]:
index

In [None]:
# Produce timestamps for GSS
dates = []
start_date = date(2014, 9, 17)
end_date = date(2017, 10, 10)
for single_date in DateRange(start_date, end_date):
    dates.append('"' + single_date.strftime("%Y-%m-%d") + '"')
",".join(dates)