# Grab the data

This notebook runs out to National Centers for Environmental Information and grabs the global summary of the year. It then filters it down to just the data we need to generate the contour plot in [this other notebook](hardiness.ipynb).

In [1]:
from io import BytesIO
import tarfile
from urllib.request import urlopen
import csv
import pandas as pd
import numpy as np

url = 'https://www.ncei.noaa.gov/data/gsoy/archive/gsoy-latest.tar.gz'
b = BytesIO(urlopen(url).read())


In [2]:

def extractor():
    with tarfile.open(mode='r', fileobj=b) as archive:
        for m in archive.getmembers():
            if not m.name.startswith('US'):
                continue

            yield archive.extractfile(m)
        

df_main = pd.DataFrame(columns=['DATE', 'LATITUDE', 'LONGITUDE', 'EMNT'])

gen = extractor()
for g in gen:
    df = pd.read_csv(g)
    if not 'EMNT' in df.keys():
        continue
    filtered = df[['DATE', 'LATITUDE', 'LONGITUDE', 'EMNT']].loc[df['DATE'] >= 2014].dropna()
    df_main = pd.concat([df_main, filtered])
    

In [3]:
df_main.shape

(36254, 4)

In [4]:
df_main.head()

Unnamed: 0,DATE,LATITUDE,LONGITUDE,EMNT
41,2014,32.9452,-85.948,-15.6
42,2015,32.9452,-85.948,-12.8
43,2016,32.9452,-85.948,-6.7
44,2017,32.9452,-85.948,-10.0
45,2018,32.9452,-85.948,-12.2


In [5]:
df_main.to_csv('gsoy.csv')