# NOAA data

Documentation is here: https://www.ncdc.noaa.gov/cdo-web/webservices/v2#gettingStarted

## Setup

In [1]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [2]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

This is a Python notebook, but below is an R cell. The `%%R` at the top of the cell indicates that the code in this cell will be R code.

In [3]:
%%R

# My commonly used R imports

require('tidyverse')

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


Loading required package: tidyverse


## Getting data from API

In [4]:
import dotenv 
import os 

# read NOAA_API_TOKEN from .env file
dotenv.load_dotenv()
noaa_api_token = os.environ['NOAA_API_TOKEN']


In [5]:
import requests


params = {
    "datasetid": "GHCND",
    "locationid": "ZIP:28801",
    "startdate": "2010-05-01",
    "enddate": "2010-05-01",
    "stationid": "GHCND:USW00013872" 
}


url = "https://www.ncei.noaa.gov/cdo-web/api/v2/data?"
response = requests.get(url, 
                        params=params,
                        headers={'token': noaa_api_token})
data = response.json()

In [6]:
pd.DataFrame(data['results'])

Unnamed: 0,date,datatype,station,attributes,value
0,2010-05-01T00:00:00,PRCP,GHCND:USW00013872,",,0,2400",3
1,2010-05-01T00:00:00,SNOW,GHCND:USW00013872,",,0,",0
2,2010-05-01T00:00:00,SNWD,GHCND:USW00013872,",,0,",0
3,2010-05-01T00:00:00,TMAX,GHCND:USW00013872,",,0,2400",267
4,2010-05-01T00:00:00,TMIN,GHCND:USW00013872,",,0,2400",139
5,2010-05-01T00:00:00,TOBS,GHCND:USW00013872,",,0,2400",206
