### Notebook Overview
1. #### Auto Download county weather data from NOAA at: ftp://ftp.ncdc.noaa.gov/pub/data/cirs/climdiv/
  - Data: precipitation, Tmax, Tmin, Tavg

  - These county-level files are downloaded to your working directory
  
    - climdiv-pcpncy-vx.y.z-YYYYMMDD

    - climdiv-tmaxcy-vx.y.z-YYYYMMDD

    - climdiv-tmincy-vx.y.z-YYYYMMDD
    
    - climdiv-tmpccy-vx.y.z-YYYYMMDD  
      

2. ####  Read in FIPS transforms: NOAA uses different state FIPS codes 
  
  - You will need these files in your working directory for required FIPS transforms

    - `noaa_fips.txt`

    - `noaa_states.txt`

    - `state_fips.txt`
    
3. ####  Read in NOAA data (dataframes)

4. ####  Format dataframes (year filter and pivot to usable form)

5. ####  Write formatted data file to working directory

In [None]:
# ALL IMPORTS
from ftplib import FTP
import os
import pandas as pd
import itertools
import time
import numpy as np
import random as rd

pd.set_option('display.max_rows', 500)

### Enter the start and end years of the weather data you would like (max range: 1895-2020)

In [None]:
start_user = 2010
end_user = 2020

### 1. Download county data from NOAA

In [None]:
def countdown(t):
    while t: 
        mins, secs = divmod(t, 60) 
        timer = '{:02d}:{:02d}'.format(mins, secs) 
        print("Retry in: " + timer + " seconds", end="\r") 
        time.sleep(1) 
        t -= 1

def download_noaa(max_tries, min_delay, max_delay):
    
    i = 1
    while i <= max_tries:
        
        try:
            # Download (TO WRKDIR) 4 county weather files from NOAA ftp
            ftp = FTP('ftp.ncdc.noaa.gov') # ftp access to ncdc.noaa.gov
            ftp.login()                     # anonymous ftp login
            ftp.cwd('pub/data/cirs/climdiv') # change directory
  
            # Get all the files on the ftp page and Filter to only the 4 county files
            dirs = ftp.nlst() 
            description_files = [i for i in dirs if len(i.split('.'))>1]

            #Delete any partial downloads
            for file in description_files:
                if os.path.exists(file):
                    os.remove(file)
                    print(f"Deleted {file}")
            
            files_to_download = []
            for file in description_files:

                if "climdiv-pcpncy" in file or "climdiv-tmaxcy" in file or "climdiv-tmincy"in file or "climdiv-tmpccy" in file:
                    files_to_download.append(file)
            
            for file in files_to_download:  
                if os.path.isfile(file):
                    print('Already downloaded file: '+ file)
                    continue
  
                with open(file, 'wb') as fp:
                    print(f'Downloading: {file.split("/")[-1]}')
                    ftp.retrbinary('RETR ' + file, fp.write)
        
            i = 11  
            print("\n")        
            print(f"Complete. Files downloaded to: {os.getcwd()}") 
            
        except Exception as e:
            print(f'Exception: {e}')
            if i <= max_tries:
                
                sleep_time = rd.randint(min_delay, max_delay)
                countdown(sleep_time)
                
                continue
                
            else:
                print(f'Exceeded {max_tries} max download attempts')
                break
        i += 1  
    return files_to_download    

In [None]:
files_to_download = download_noaa(5, 30, 60)

### 2. Read in FIPS transforms

In [None]:
# Build transform from noaa state fips to census fips

# Read in FIPS files:
dir_ = f'{os.getcwd()}/noaa_to_census'

# NOAA state-level FIPS from NOAA README
noaa = f"{dir_}/noaa_states.txt"
noaa_conv = pd.read_csv(noaa, sep=",", converters={'code_noaa': lambda x: str(x)},engine='python')

# Census state-level FIPS 
state_fips = f"{dir_}/state_fips.txt"
census_conv = pd.read_csv(state_fips, sep="\t", converters={'code': lambda x: str(x)}, engine='python')

# No need for full state name; will use abbreviations
del census_conv["Name"]

# NOAA county-level FIPS with name
noaa_fn = f"{dir_}/noaa_fips.txt"
noaa_fips= pd.read_csv(noaa_fn, sep="\t", converters={'noaa_fips': lambda x: str(x)},engine='python')

In [None]:
# Build lists to map NOAA to census state codes
fips_rs = pd.concat([noaa_conv, census_conv], axis=1)
noaa_code = list(fips_rs["code_noaa"])
noaa_state = list(fips_rs["state_noaa"])
census_state = list(fips_rs["state"])
census_code = list(fips_rs["code"])

In [None]:
# build dict to map census state FIPS to NOAA state fips
trans = {}
for i in range(len(census_state)):
    state = census_state[i]
    fips = census_code[i]
    trans[state] = [fips]
    
for temp_st in trans.keys():
    for i in range(len(noaa_state)):
        temp_noaa_st = noaa_state[i]
        
        if temp_st == temp_noaa_st:
            trans[temp_st].append(noaa_code[i])  

# Delete census keys that do not have data in the NOAA data            
del_keys = []            
for temp_st in trans.keys():            
    if len(trans[temp_st]) == 1:
           del_keys.append(temp_st)
[trans.pop(key) for key in del_keys]

#remove state abbrev as key: noaa state fips = key
transformer = {}

for key in trans.keys():
    census = trans[key][0]
    noaa = trans[key][1]
    state_abbr = key
    transformer[noaa] = [census, state_abbr] 

In [None]:
# Take a look at the transform
# KEY = NOAA state FIPS :: VALUE = census state FIPS and state abbreviation
print(dict(itertools.islice(transformer.items(), 10)))

In [None]:
# And a little transform tester:
# NOAA Ohio = 33, Census Ohio = 39
# NOAA Oregon = 35, Census Oregon = 41
tests = ["33", "35"]
for test in tests:
    print(f'NOAA {test} = Census {transformer[test][0]} = {transformer[test][1]}')

### 3. Read in NOAA Data

In [None]:
# Filter to year range user requested:
base_years = [i for i in range(1895, 2021)]
user_years = [i for i in range(start_user, end_user +1)]
yr_filter = set(base_years) ^ set(user_years)
yr_filter_str = [str(i) for i in yr_filter]

def year_filter(df, yr_filter_str):

    df['year'] = df.noaa_code.apply(lambda x: x[-4:])
    df = df[~df["year"].str.contains('|'.join(yr_filter_str))]
    df = df.reset_index(drop=True)
    
    del df['year']
    
    return df

# readin NOAA data and apply year filter
def read_filter_data(file):
    
    fn = file.split("/")[-1]
    print(f'Reading:    {fn}')

    names = ['noaa_code',1,2,3,4,5,6,7,8,9,10,11,12] 
    df = pd.read_csv(file, delim_whitespace=True, 
                     converters={'noaa_code': lambda x: str(x)},
                     engine='python',
                     names=names, 
                     header=None)

    # Filter by selected years:
    print(f"Filtering:  {fn}")
    df = year_filter(df, yr_filter_str)
    
    return df

# pivot wx data from column to row
def restack_df(df,fn):
    
    if fn == "01":
        wx = "precipitation"
    if fn == "02":
        wx = "Tavg"        
    if fn == "27":
        wx = "Tmax"        
    if fn == "28":
        wx = "Tmin"
    
    df = pd.DataFrame(df.set_index('noaa_code')\
                      .stack())\
                      .reset_index()\
                      .rename(columns={'level_1': 'month', 0: wx})
    return df

# Build full census FIPS to add to df    
def census_fip(row):
    county_fip = row.noaa_fips[-3:]
    census_fips = row.census_state_fips + county_fip
    
    return census_fips    
    
# Generate timestamp
def gen_ts(row):
    mon = row.month
    noaa_code = row.noaa_code
    if int(mon)<=9:
        mon = "0" + str(mon)    
    return noaa_code[-4:] + '-' + str(mon) + "-" + "28"

# Remove "County" from county name
def format_county(name):
    if "County" in name:
        name = name.replace("County", "").strip()
    return name

# To avoid 4 columns of noaa_codes, replace the wx-type with "wx"
def replace_it(x):
    temp = x[5:7]
    x = x.replace(temp,"wx")
    return x

In [None]:
# Back-up if ftp site fails; must have these files already in the directory
files_to_download=["climdiv-pcpncy-v1.0.0-20201104", 
                   "climdiv-tmaxcy-v1.0.0-20201104", 
                   "climdiv-tmincy-v1.0.0-20201104", 
                   "climdiv-tmpccy-v1.0.0-20201104"]

starter = f"{os.getcwd()}/"
files = [starter + file for file in files_to_download]

In [None]:
# Read in and filter NOAA data
df_list = []
for file in files:
    
    df_list.append(read_filter_data(file))
    
print("Complete")    

In [None]:
# restack wx data column-to-row 
df_stack = []
for df in df_list:

    fn = df.noaa_code.iloc[0][5:7]

    df_ = restack_df(df,fn)
    
    df_ = df_[~df_['noaa_code'].astype(str).str.startswith('50')]
    
    df_['noaa_fips'] = df_.noaa_code.apply(lambda x: x[:5])
    
    df_stack.append(df_)    

In [None]:
# Convert NOAA to Census FIPS
transformer_df = pd.DataFrame.from_dict(transformer).transpose().rename(columns = {0:'census_state_fips', 1: 'state'})
noaa_fips['county_name'] = noaa_fips['county_name'].apply(lambda x: format_county(x))

df_aug = []
for df in df_stack:
    
    df_ = df.join(noaa_fips.set_index('noaa_fips'), how='left', on='noaa_fips')
    df_['noaa_state_fips'] = df_.noaa_fips.apply(lambda x: x[:2])
    df_ = df_.join(transformer_df, how='left', on='noaa_state_fips')
    df_['census_county_fips'] = df_.apply(lambda row: census_fip(row), axis=1)
    %time df_['timestamp'] = df_.apply(lambda row: gen_ts(row), axis=1)
    
    df_aug.append(df_)

df_join = []
for df in df_aug:
    del df["census_state_fips"]
    del df["noaa_state_fips"]
    df.rename(columns = {'noaa_fips':'noaa_county_fips'}, inplace = True) 
    
    df = df.replace(-99.90,np.NaN)
    df = df.replace(-9.99,np.NaN)
    
    df_join.append(df)
print("Complete with join")    

In [None]:
# Combine the wx dataframes into one df
result = pd.concat(df_join, axis=1)
_, i = np.unique(result.columns, return_index=True)
res = result.iloc[:, i]
res = res[["timestamp","county_name","state", "census_county_fips","noaa_county_fips","precipitation", "Tavg", "Tmin", "Tmax","noaa_code"]]
#res['noaa_code'] = res.noaa_code.apply(lambda x: replace_it(x))

In [None]:
res

In [None]:
# write csv to working directory:
res.to_csv(fr'{os.getcwd()}/county_wx_{start_user}_{end_user}.csv', index = False)