In [1]:
import pandas as pd
import numpy as np

In [2]:
import sys 
sys.path.insert(0, '../Key')
from NOAA_key import key

In [3]:
import requests

def make_request(endpoint, payload=None): 
    """
    Make a request to weather api, passing headers and optional payload.abs
    
    Params:
        - endpoint: endpoint of API to make a GET request to
        - payloads: a dict of data to pass along with the request
    """
    
    url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/"
    
    data = requests.get(f'{url}{endpoint}', 
                        headers={'token': key},
                        params=payload)
    
    return data

In [4]:
# test that response is active
response = make_request('datasets')
json_response = response.json()
response.status_code

200

In [6]:
json_response.keys()

dict_keys(['metadata', 'results'])

In [7]:
json_response['metadata']

{'resultset': {'offset': 1, 'count': 11, 'limit': 25}}

In [9]:
# look at one result
json_response['results'][0]

{'uid': 'gov.noaa.ncdc:C00861',
 'mindate': '1763-01-01',
 'maxdate': '2020-09-15',
 'name': 'Daily Summaries',
 'datacoverage': 1,
 'id': 'GHCND'}

In [14]:
# list comprehension to look at all ids and names 
[(data['id'], data['name']) for data in json_response['results']]

[('GHCND', 'Daily Summaries'),
 ('GSOM', 'Global Summary of the Month'),
 ('GSOY', 'Global Summary of the Year'),
 ('NEXRAD2', 'Weather Radar (Level II)'),
 ('NEXRAD3', 'Weather Radar (Level III)'),
 ('NORMAL_ANN', 'Normals Annual/Seasonal'),
 ('NORMAL_DLY', 'Normals Daily'),
 ('NORMAL_HLY', 'Normals Hourly'),
 ('NORMAL_MLY', 'Normals Monthly'),
 ('PRECIP_15', 'Precipitation 15 Minute'),
 ('PRECIP_HLY', 'Precipitation Hourly')]

In [27]:
response = make_request('datacategories', payload={'datasetid' : 'GHCND'})
json_response = response.json()
json_response

{'metadata': {'resultset': {'offset': 1, 'count': 9, 'limit': 25}},
 'results': [{'name': 'Evaporation', 'id': 'EVAP'},
  {'name': 'Land', 'id': 'LAND'},
  {'name': 'Precipitation', 'id': 'PRCP'},
  {'name': 'Sky cover & clouds', 'id': 'SKY'},
  {'name': 'Sunshine', 'id': 'SUN'},
  {'name': 'Air Temperature', 'id': 'TEMP'},
  {'name': 'Water', 'id': 'WATER'},
  {'name': 'Wind', 'id': 'WIND'},
  {'name': 'Weather Type', 'id': 'WXTYPE'}]}

In [38]:
response = make_request('datatypes', payload={'datacategoryid' : 'temp', 'limit' : 100})
json_response = response.json()

# get all ids and names
[(data['id'], data['name']) for data in json_response['results']][-5:] # look at the last 5

[('MNTM', 'Monthly mean temperature'),
 ('TAVG', 'Average Temperature.'),
 ('TMAX', 'Maximum temperature'),
 ('TMIN', 'Minimum temperature'),
 ('TOBS', 'Temperature at the time of observation')]

In [40]:
import pprint
pprint.pprint(json_response['results'])

[{'datacoverage': 1,
  'id': 'CDSD',
  'maxdate': '2020-08-01',
  'mindate': '1763-01-01',
  'name': 'Cooling Degree Days Season to Date'},
 {'datacoverage': 1,
  'id': 'DATN',
  'maxdate': '2020-09-15',
  'mindate': '1863-05-04',
  'name': 'Number of days included in the multiday minimum temperature (MDTN)'},
 {'datacoverage': 1,
  'id': 'DATX',
  'maxdate': '2020-09-14',
  'mindate': '1863-05-04',
  'name': 'Number of days included in the multiday maximum temperature (MDTX)'},
 {'datacoverage': 1,
  'id': 'DLY-DUTR-NORMAL',
  'maxdate': '2010-12-31',
  'mindate': '2010-01-01',
  'name': 'Long-term averages of daily diurnal temperature range'},
 {'datacoverage': 1,
  'id': 'DLY-DUTR-STDDEV',
  'maxdate': '2010-12-31',
  'mindate': '2010-01-01',
  'name': 'Long-term standard deviations of daily diurnal temperature range'},
 {'datacoverage': 1,
  'id': 'DLY-TAVG-NORMAL',
  'maxdate': '2010-12-31',
  'mindate': '2010-01-01',
  'name': 'Long-term averages of daily average temperature'},
 

In [41]:
# get NYC central park data
response = make_request(
    'data', 
    {
        'datasetid' : 'GHCND',
        'stationid' : 'GHCND:USW00094728',
        'locationid' : 'CITY:US360019',
        'startdate' : '2018-10-01',
        'enddate' : '2018-10-31',
        'datatypeid' : ['TOBS', 'TMIN', 'TMAX'],
        'units' : 'metric',
        'limit' : 100
    }
) 

In [42]:
df = pd.DataFrame(response.json()['results'])
df.head()

Unnamed: 0,date,datatype,station,attributes,value
0,2018-10-01T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",24.4
1,2018-10-01T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",17.2
2,2018-10-02T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",25.0
3,2018-10-02T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",18.3
4,2018-10-03T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",23.3


In [49]:
df.rename(columns={
    "attributes" : 'Flags',
    "value" : 'Temp_Celcius'},
    inplace=True)
df.head()

Unnamed: 0,DATE,DATATYPE,STATION,FLAGS,TEMP_CELCIUS
0,2018-10-01T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",24.4
1,2018-10-01T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",17.2
2,2018-10-02T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",25.0
3,2018-10-02T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",18.3
4,2018-10-03T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",23.3


In [50]:
# convert to all lowercase
df.rename(str.lower, axis='columns', inplace=True)
df.head()

Unnamed: 0,date,datatype,station,flags,temp_celcius
0,2018-10-01T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",24.4
1,2018-10-01T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",17.2
2,2018-10-02T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",25.0
3,2018-10-02T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",18.3
4,2018-10-03T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",23.3


In [51]:
df.dtypes

date             object
datatype         object
station          object
flags            object
temp_celcius    float64
dtype: object

In [52]:
df["date"] = pd.to_datetime(df['date'])
df.dtypes

date            datetime64[ns]
datatype                object
station                 object
flags                   object
temp_celcius           float64
dtype: object

In [53]:
df['date'].describe()

count                      62
unique                     31
top       2018-10-01 00:00:00
freq                        2
first     2018-10-01 00:00:00
last      2018-10-31 00:00:00
Name: date, dtype: object

In [58]:
# use assign to create datetime and temp_f column
df = df.assign(
    date = pd.to_datetime(df['date']),
    temp_f = (df['temp_celcius'] * 9/5) +32, 
    station = df['station'].astype('category'), 
    datatype = df['datatype'].astype('category')
)
df.dtypes

date            datetime64[ns]
datatype              category
station               category
flags                   object
temp_celcius           float64
temp_f                 float64
dtype: object