In [1]:
import json
import requests
import pandas as pd
from config import api_key
from city_list import clist,alist

## Use the NOAA API
## Find city id based on name. Offset is 1000 to capture all of the US cities that lie between result 1000 and end of dataset.

In [2]:
base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/locations"
params = {
    "locationcategoryid":"City",
    "limit":"1000",
    "offset":"1001"
}
headers = {
    "token": api_key
}
response = requests.get(base_url, params=params,headers=headers)
response.json()

{'metadata': {'resultset': {'offset': 1001, 'count': 1987, 'limit': 1000}},
 'results': [{'mindate': '2003-11-01',
   'maxdate': '2019-09-07',
   'name': 'Mus, TU',
   'datacoverage': 0.9973,
   'id': 'CITY:TU000042'},
  {'mindate': '2003-11-01',
   'maxdate': '2019-09-07',
   'name': 'Nevsehir, TU',
   'datacoverage': 1,
   'id': 'CITY:TU000043'},
  {'mindate': '2001-07-01',
   'maxdate': '2019-09-07',
   'name': 'Nigde, TU',
   'datacoverage': 0.9999,
   'id': 'CITY:TU000044'},
  {'mindate': '1927-03-01',
   'maxdate': '2019-09-07',
   'name': 'Rize, TU',
   'datacoverage': 1,
   'id': 'CITY:TU000046'},
  {'mindate': '2003-11-01',
   'maxdate': '2019-09-07',
   'name': 'Sakarya, TU',
   'datacoverage': 0.9986,
   'id': 'CITY:TU000047'},
  {'mindate': '1951-05-08',
   'maxdate': '2019-09-07',
   'name': 'Samsun, TU',
   'datacoverage': 0.9986,
   'id': 'CITY:TU000048'},
  {'mindate': '1963-01-01',
   'maxdate': '2019-09-07',
   'name': 'Siirt, TU',
   'datacoverage': 0.9993,
   'id': 

## Put response json into dataframe

In [3]:
response_json = response.json()
df = pd.DataFrame(response_json["results"])
df.head()

Unnamed: 0,datacoverage,id,maxdate,mindate,name
0,0.9973,CITY:TU000042,2019-09-07,2003-11-01,"Mus, TU"
1,1.0,CITY:TU000043,2019-09-07,2003-11-01,"Nevsehir, TU"
2,0.9999,CITY:TU000044,2019-09-07,2001-07-01,"Nigde, TU"
3,1.0,CITY:TU000046,2019-09-07,1927-03-01,"Rize, TU"
4,0.9986,CITY:TU000047,2019-09-07,2003-11-01,"Sakarya, TU"


## Use imported city list to put city lookup into format necessary for this API

In [4]:
city_df = pd.DataFrame(clist, columns=["city"])
city_df["state"]=alist
city_df["lookup"]=city_df["city"]+", "+city_df["state"]+" US"
city_df.head()

Unnamed: 0,city,state,lookup
0,Birmingham,AL,"Birmingham, AL US"
1,Mobile,AL,"Mobile, AL US"
2,Montgomery,AL,"Montgomery, AL US"
3,Huntsville,AL,"Huntsville, AL US"
4,Tuscaloosa,AL,"Tuscaloosa, AL US"


## Reduce columns from weather location dataframe into just id lookup and name

In [5]:
df_ids = df[["id","name"]]
df_ids.head()

Unnamed: 0,id,name
0,CITY:TU000042,"Mus, TU"
1,CITY:TU000043,"Nevsehir, TU"
2,CITY:TU000044,"Nigde, TU"
3,CITY:TU000046,"Rize, TU"
4,CITY:TU000047,"Sakarya, TU"


## Reduce columns from city dataframe into just lookup value

In [6]:
cities = city_df["lookup"]
cities.head()

0    Birmingham, AL US
1        Mobile, AL US
2    Montgomery, AL US
3    Huntsville, AL US
4    Tuscaloosa, AL US
Name: lookup, dtype: object

## Reduce datarame of cities to just cities in our list

In [7]:
df_our_cities = df_ids[df_ids["name"].isin(cities)]
df_our_cities.head()

Unnamed: 0,id,name
72,CITY:US010004,"Birmingham, AL US"
80,CITY:US010012,"Huntsville, AL US"
82,CITY:US010014,"Mobile, AL US"
83,CITY:US010015,"Montgomery, AL US"
87,CITY:US010019,"Tuscaloosa, AL US"


In [9]:
df_index = df_our_cities.set_index("name")
df_index.head()

Unnamed: 0_level_0,id
name,Unnamed: 1_level_1
"Birmingham, AL US",CITY:US010004
"Huntsville, AL US",CITY:US010012
"Mobile, AL US",CITY:US010014
"Montgomery, AL US",CITY:US010015
"Tuscaloosa, AL US",CITY:US010019


## Loop through cities and get data and add to dataframe

In [10]:
CDD_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-CLDD-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid
    }
    response = requests.get(base_url, params=params,headers=headers)
    CDD = response.json()["results"][0]["value"]
    CDD_list.append(CDD)
df_index["CDD"] = CDD_list
df_index.head()

Unnamed: 0_level_0,id,CDD
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Birmingham, AL US",CITY:US010004,1933
"Huntsville, AL US",CITY:US010012,1540
"Mobile, AL US",CITY:US010014,2537
"Montgomery, AL US",CITY:US010015,2113
"Tuscaloosa, AL US",CITY:US010019,2348


In [11]:
HDD_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-HTDD-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid
    }
    response = requests.get(base_url, params=params,headers=headers)
    HDD = response.json()["results"][0]["value"]
    HDD_list.append(HDD)
df_index["HDD"] = HDD_list
df_index.head()

Unnamed: 0_level_0,id,CDD,HDD
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Birmingham, AL US",CITY:US010004,1933,2770
"Huntsville, AL US",CITY:US010012,1540,3517
"Mobile, AL US",CITY:US010014,2537,1656
"Montgomery, AL US",CITY:US010015,2113,2278
"Tuscaloosa, AL US",CITY:US010019,2348,2338


In [12]:
Tavg_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-TAVG-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid,
        "units":"metric"
    }
    response = requests.get(base_url, params=params,headers=headers)
    Tavg = response.json()["results"][0]["value"]*9/5+32
    Tavg_list.append(Tavg)
df_index["Tavg"] = Tavg_list
df_index.head()

Unnamed: 0_level_0,id,CDD,HDD,Tavg
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Birmingham, AL US",CITY:US010004,1933,2770,62.78
"Huntsville, AL US",CITY:US010012,1540,3517,59.54
"Mobile, AL US",CITY:US010014,2537,1656,67.46
"Montgomery, AL US",CITY:US010015,2113,2278,64.58
"Tuscaloosa, AL US",CITY:US010019,2348,2338,64.94


In [None]:
Trange_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-DUTR-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid,
        "units":"metric"
    }
    response = requests.get(base_url, params=params,headers=headers)
    Trange = response.json()["results"][0]["value"]*9/5+32
    Trange_list.append(Trange)
df_index["Trange"] = Trange_list
df_index.head()

In [None]:
Tmax_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-TMAX-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid,
        "units":"metric"
    }
    response = requests.get(base_url, params=params,headers=headers)
    Tmax = response.json()["results"][0]["value"]*9/5+32
    Tmax_list.append(Tmax)
df_index["Tmax"] = Tmax_list
df_index.head()

In [None]:
Tmin_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-TMIN-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid,
        "units":"metric"
    }
    response = requests.get(base_url, params=params,headers=headers)
    Tmin = response.json()["results"][0]["value"]*9/5+32
    Tmin_list.append(Tmin)
df_index["Tmin"] = Tmin_list
df_index.head()

In [None]:
Prcp_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-PRCP-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid
    }
    response = requests.get(base_url, params=params,headers=headers)
    Prcp = response.json()["results"][0]["value"]
    Prcp_list.append(Prcp)
df_index["Prcp"] = Prcp_list
df_index.head()

In [None]:
Snow_list = []
for cityid in df_index["id"]:
    base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/data"
    params = {
        "datasetid":"NORMAL_ANN",
        "datatypeid":"ANN-SNOW-NORMAL",
        "startdate":"2010-01-01",
        "enddate":"2010-01-01",
        "locationid":cityid
    }
    response = requests.get(base_url, params=params,headers=headers)
    Snow = response.json()["results"][0]["value"]
    Snow_list.append(Snow)
df_index["Snow"] = Snow_list
df_index.head()

## Reset index

In [None]:
df_index = df_index.reset_index()

## Save to CSV

In [None]:
df_index.to_csv("weather_data.csv")