In [2]:
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import numpy.ma as ma
import os, sys
import itertools
from datetime import datetime, timezone
import calendar
import json
from operator import itemgetter
import textwrap

In [113]:
def _month_range(year):
    '''
    This function will define a range of months, based on the entered param, year.

    Parameters
    ----------
    year : int, the year for which a range of months is being defined

    Returns
    ----------
    months : list, a list of months as integers

    '''

    # set up current date parameters to check with
    current_date = datetime.now()
    current_year = current_date.year
    current_month = current_date.month

    # check if current year
    # if it is current year, then we need to automatically define a
    # range of months based on the current month
    if year == current_year:
        return [x for x in range(current_month + 1)][2:]  # set up the months to be the last most recently completed month, starting with Feb for Jan product

    # if it is not the current year, then set up a list with all months
    elif year != current_year:
        return [x for x in range(13)][2:]  # start in feb for jan CLM product

In [119]:
def _get_cf6_product_links(pil, months, y):
    '''
    A function that will query the IEM API for CF6 products, and return the corresponding API links to those products

    Parameters
    ----------
    pil : str, 6 character pil for CLM products, e.g., CLMLIT
    months : list, a list of months as integers, for which to find urls for
    y : int, year in YYYY format

    Returns
    ----------
    api_links : list, a list of API link strings

    '''

    print(f'\nSearching for {y} CF6{pil[3:]} Sheets...')
    print('*NOTE*, Final CF6 sheet for any given month are issued on the first of the following month...\n')

    # y = datetime.now().year
    # months = [x for x in range(13)][2:]  # start in feb for jan CLM product
    #days = [x for x in range(15)][1:]

    # empty list to store api_links for each monthly product
    api_links = []

    for m in months:
        print(f'---{calendar.month_name[m - 1][:3]} {y}---')
        #for d in days:

        # this is the api link that returns a json file with info, either including product issuance info, or little info, in json format
        # url = 'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil=CLMLIT&date=2023-02-04'
        url = f'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil=CF6&date={y}-{m:02}-01'

        # A GET request to the API
        response = requests.get(url)

        # turn the request into a json, and subsequently a dictionary
        response = response.json()

        for item in response['data']:
            if pil[3:] in item['pil']:
                api_links.append(item['text_link'])
                break

    # ------------------------------------------------------------
    # Check if we need December
    # small section to do dec, because the dec CLM product is issued in jan of the following year,
    # but only run for non current year settings, if it is the current year,
    # then its possible we are not there yet
    if y != datetime.now().year:

        m = 1
        print(f'---{calendar.month_name[12][:3]} {y}---')
        #for d in days:

        url = f'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil=CF6&date={y + 1}-{m:02}-01'

        # A GET request to the API
        response = requests.get(url)

        # turn the request into a json, and subsequently a dictionary
        response = response.json()

        for item in response['data']:
            if pil[3:] in item['pil']:
                api_links.append(item['text_link'])
                break

    print(f'Finished searching... Found {len(api_links)} {y} CF6{pil[3:]} Sheets...\n')
    return api_links

In [120]:
months = _month_range(2023)

cf6_links = _get_cf6_product_links('CLMHRO', months, 2023)

cf6_links


Searching for 2023 CF6HRO Sheets...
*NOTE*, Final CF6 sheet for any given month are issued on the first of the following month...

---Jan 2023---
---Feb 2023---
---Mar 2023---
---Apr 2023---
---May 2023---
---Jun 2023---
---Jul 2023---
---Aug 2023---
---Sep 2023---
---Oct 2023---
---Nov 2023---
---Dec 2023---
Finished searching... Found 12 2023 CF6HRO Sheets...



['https://mesonet.agron.iastate.edu/api/1/nwstext/202302011330-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202303011330-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202304011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202305010626-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202306010629-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202307011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202308011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202309011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202310010641-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202311011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202312011330-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202401011330-KLZK-CXUS54-C

In [147]:
year = 2023

In [223]:
# establish our main working dictionary
data_dict = {}

# for i in range(1, 13):
#     _data_dict = {f'{calendar.month_name[i][:3].upper()}': {}}
#     data_dict.update(_data_dict)

for i in range(len(months)):
    _data_dict = {f'{calendar.month_name[i+1][:3].upper()}': {}}
    data_dict.update(_data_dict)

if year != datetime.now().year:
    data_dict['DEC'] = {}

# now add the months
for key in data_dict:
    _data_dict = {
        'monthly_max_min': [],
        'high_dates': None,
        'low_dates': None,
        'monthly_avg_temps_dfn': [],  # avg high, low, mean, mean dfn
        'monthly_rain_and_dfn': [],  # monthly total rain and dfn
        'max_clndr_24hr_rain': [],  # [max 24 hr rainfall (float), 24 hr rainfall dates, max calendar day rainfall (float), dates]
        'monthly_snow_sdepth_dfn': [],
        # monthly total snow, total snow dfn, greatest snow depth, and date(s) of greatest snow depth
        'max_clndr_24hr_snow': []  # max 24 hr total snow and date(s) of 24 hr total snow
    }
    data_dict[key].update(_data_dict)

# add in a sub-dictionary for the misc temp data
_data_dict = {
    'n days - minT <= 32': 0,
    'n days - minT <= 20': 0,
    'n days - minT <= 0': 0,
    'n days - maxT <= 32': 0,
    'n days - maxT >= 90': 0,
    'n days - maxT >= 100': 0,
    'n days - maxT >= 105': 0,
    'n days - maxT >= 110': 0,
}

data_dict['misc_temp_data'] = _data_dict

cf6_dict = {}

for i in range(len(months)):
    _cf6_dict = {f'{calendar.month_name[i+1][:3].upper()}': {}}
    cf6_dict.update(_cf6_dict)

if year != datetime.now().year:
    cf6_dict['DEC'] = {}

for key in cf6_dict:
    _cf6_dict = {'max_clndr_24hr_snow' : [], # max 24 hr snow val, and date(s) as taken from the CF6 sheet
                 'max_sdepth' : []} # greatest snow depth, and date(s) as taken from the CF6 sheet
    cf6_dict[key].update(_cf6_dict)

cf6_dict['pres'] = {'min_pres' : [], # min pres (inHg) and the date of occurrence
                    'max_pres' : [] # max pres (inHg) and the date of occurrence
                    }

cf6_dict['wind'] = {'max_gst' : [], # wind gusts >= 50 mph
                    'max_wdr' : [], # corresponding wind directions for the significant wind speeds
                    'max_wd_dates' : [], # corresponding dates for the max wind gusts
                    }

data_dict['cf6_data'] = cf6_dict

In [151]:
data_dict

{'JAN': {'monthly_max_min': [],
  'high_dates': None,
  'low_dates': None,
  'monthly_avg_temps_dfn': [],
  'monthly_rain_and_dfn': [],
  'max_clndr_24hr_rain': [],
  'monthly_snow_sdepth_dfn': [],
  'max_clndr_24hr_snow': []},
 'FEB': {'monthly_max_min': [],
  'high_dates': None,
  'low_dates': None,
  'monthly_avg_temps_dfn': [],
  'monthly_rain_and_dfn': [],
  'max_clndr_24hr_rain': [],
  'monthly_snow_sdepth_dfn': [],
  'max_clndr_24hr_snow': []},
 'MAR': {'monthly_max_min': [],
  'high_dates': None,
  'low_dates': None,
  'monthly_avg_temps_dfn': [],
  'monthly_rain_and_dfn': [],
  'max_clndr_24hr_rain': [],
  'monthly_snow_sdepth_dfn': [],
  'max_clndr_24hr_snow': []},
 'APR': {'monthly_max_min': [],
  'high_dates': None,
  'low_dates': None,
  'monthly_avg_temps_dfn': [],
  'monthly_rain_and_dfn': [],
  'max_clndr_24hr_rain': [],
  'monthly_snow_sdepth_dfn': [],
  'max_clndr_24hr_snow': []},
 'MAY': {'monthly_max_min': [],
  'high_dates': None,
  'low_dates': None,
  'monthly_av

In [126]:
def _get_wind_data(data_dict, pil, year, max_wd):

    '''
    This function will utilize the daily climate page API from IEM, to get max wind values
    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM and CF6 product data
    pil : The product ID, this is passed down to the function through main, e.g. 'CLMLZK'
    year : int, the climate year
    max_wd : int, the max wind threshold to make an entry for.

    Returns
    ----------
    None

    '''

    # the IEM API for daily climate data, for a given site and year
    url = f"https://mesonet.agron.iastate.edu/json/cli.py?station=K{pil[3:]}&year={year}"
    
    # A GET request to the API
    response = requests.get(url)
    
    # turn the request into a json, and subsequently a dictionary
    response = response.json()
    
    for item in response['results']:
        # max wind gust, direction, and date
        if str(item['highest_gust_speed']) not in 'M' and item['highest_gust_speed'] >= max_wd:
            data_dict['cf6_data']['wind']['max_gst'].append(item['highest_gust_speed']) # add the wind gust
            data_dict['cf6_data']['wind']['max_wdr'].append(item['highest_gust_direction']) # add the wind gust direction
            data_dict['cf6_data']['wind']['max_wd_dates'].append(item['valid'])
    return



In [132]:
response['results']

[{'station': 'KHRO',
  'valid': '2024-01-01',
  'state': 'AR',
  'wfo': 'LZK',
  'link': '/api/1/nwstext/202401020730-KLZK-CDUS44-CLIHRO',
  'product': '202401020730-KLZK-CDUS44-CLIHRO',
  'name': 'HARRISON/BOONE CO.',
  'high': 40,
  'high_record': 73,
  'high_record_years': [2023],
  'high_normal': 47,
  'high_depart': -7,
  'high_time': '354 PM',
  'low': 24,
  'low_record': -4,
  'low_record_years': [1928, 1974],
  'low_normal': 28,
  'low_depart': -4,
  'low_time': '1159 PM',
  'precip': 0.0,
  'precip_normal': 0.1,
  'precip_month': 0.0,
  'precip_month_normal': 0.1,
  'precip_jan1': 0.0,
  'precip_jan1_normal': 0.1,
  'precip_jun1': 'M',
  'precip_jun1_normal': 'M',
  'precip_jul1': 'M',
  'precip_dec1': 0.85,
  'precip_dec1_normal': 3.04,
  'precip_record': 2.42,
  'precip_record_years': [1966],
  'snow': 0.0,
  'snowdepth': 0.0,
  'snow_normal': 'M',
  'snow_month': 0.0,
  'snow_jun1': 'M',
  'snow_jul1': 'T',
  'snow_dec1': 'T',
  'snow_record': 3.0,
  'snow_record_years': [1

In [139]:
for item in response['results']:
    # max wind gust, direction, and date
    if str(item['highest_gust_speed']) not in 'M' and item['highest_gust_speed'] >= 50.0:
        data_dict['cf6_data']['wind']['max_gst'].append(item['highest_gust_speed']) # add the wind gust
        data_dict['cf6_data']['wind']['max_wdr'].append(item['highest_gust_direction']) # add the wind gust direction
        data_dict['cf6_data']['wind']['max_wd_dates'].append(item['valid'])



{'max_gst': [55], 'max_wdr': [150], 'max_wd_dates': ['2024-01-08']}

In [137]:
data_dict['cf6_data']['wind']

{'max_gst': [], 'max_wdr': [], 'max_wd_dates': []}

In [104]:
'''
Lets try to get some data from the CF6 instead... its a little more consistent


'''

url = 'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KHRO&pil=CF6&date=2024-02-01'


# A GET request to the API
response = requests.get(url)

# turn the request into a json, and subsequently a dictionary
response = response.json()

#response


In [94]:
response['data']

[{'index': 0,
  'entered': '2024-03-01T07:42:00Z',
  'pil': 'CF6HRO',
  'product_id': '202403010742-KLZK-CXUS54-CF6HRO',
  'cccc': 'KLZK',
  'count': 1,
  'link': 'https://mesonet.agron.iastate.edu/p.php?pid=202403010742-KLZK-CXUS54-CF6HRO',
  'text_link': 'https://mesonet.agron.iastate.edu/api/1/nwstext/202403010742-KLZK-CXUS54-CF6HRO'},
 {'index': 1,
  'entered': '2024-03-01T07:42:00Z',
  'pil': 'CF6LIT',
  'product_id': '202403010742-KLZK-CXUS54-CF6LIT',
  'cccc': 'KLZK',
  'count': 1,
  'link': 'https://mesonet.agron.iastate.edu/p.php?pid=202403010742-KLZK-CXUS54-CF6LIT',
  'text_link': 'https://mesonet.agron.iastate.edu/api/1/nwstext/202403010742-KLZK-CXUS54-CF6LIT'},
 {'index': 2,
  'entered': '2024-03-01T07:42:00Z',
  'pil': 'CF6LZK',
  'product_id': '202403010742-KLZK-CXUS54-CF6LZK',
  'cccc': 'KLZK',
  'count': 1,
  'link': 'https://mesonet.agron.iastate.edu/p.php?pid=202403010742-KLZK-CXUS54-CF6LZK',
  'text_link': 'https://mesonet.agron.iastate.edu/api/1/nwstext/202403010742

In [105]:
pil = 'CLMHRO'

for item in response['data']:
    if pil[3:] in item['pil']:
        cf6_url = item['text_link']

In [177]:
html = urlopen(cf6_links[0]).read()
soup = BeautifulSoup(html, features="html.parser")

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()  # rip it out

# get text
text = soup.get_text()

In [178]:
text.splitlines()

['264 ',
 'CXUS54 KLZK 011330',
 'CF6HRO',
 'PRELIMINARY LOCAL CLIMATOLOGICAL DATA (WS FORM: F-6)',
 '',
 '                                          STATION:   HARRISON',
 '                                          MONTH:     JANUARY',
 '                                          YEAR:      2023',
 '                                          LATITUDE:   36 15 N      ',
 '                                          LONGITUDE:  93  9 W    ',
 '',
 '  TEMPERATURE IN F:       :PCPN:    SNOW:  WIND      :SUNSHINE: SKY     :PK WND ',
 '1   2   3   4   5  6A  6B    7    8   9   10  11  12  13   14  15   16   17  18',
 '                                     12Z  AVG MX 2MIN',
 'DY MAX MIN AVG DEP HDD CDD  WTR  SNW DPTH SPD SPD DIR MIN PSBL S-S WX    SPD DR',
 '',
 ' 1  73  42  58  21   7   0 0.00  0.0    M  8.6 17 170   M    M   2 12     22 140',
 ' 2  75  50  63  26   2   0 0.37  0.0    0  8.1 20 150   M    M   6 123    26 120',
 ' 3  65  42  54  17  11   0 0.13  0.0    0  6.2 18 240   M    M   1 

In [182]:
cf6_snow_text = text.split('\nPRELIMINARY LOCAL CLIMATOLOGICAL DATA (WS FORM: F-6) , PAGE 2\n\n')[-1]
cf6_snow_text = cf6_snow_text.split('\n                        SNOW, ICE PELLETS, HAIL    4 = ICE PELLETS              \n')[-1]



cf6_snow_text.splitlines()

max_sd = cf6_snow_text.split('GRTST DEPTH:')[-1].split('7 = DUSTSTORM OR SANDSTORM:')[0]
max_sd = list(filter(None, (max_sd.split(' '))))
max_sd

['10', 'ON', '25']

In [None]:
# alright, lets get the max snow data

In [225]:



def _get_cf6_snow_data(data_dict, url, idx)
    
    html = urlopen(url).read()
    soup = BeautifulSoup(html, features="html.parser")
    
    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()  # rip it out
    
    # get text
    text = soup.get_text()

    # parse out the snow info in the text
    cf6_snow_text = text.split('\nPRELIMINARY LOCAL CLIMATOLOGICAL DATA (WS FORM: F-6) , PAGE 2\n\n')[-1]
    cf6_snow_text = cf6_snow_text.split('\n                        SNOW, ICE PELLETS, HAIL    4 = ICE PELLETS              \n')[-1]
    #new_text.splitlines()
    
    # get the max 24 hr snow text
    max24hr_sn = cf6_snow_text.split('GRTST 24HR')[-1].split('6 = FREEZING RAIN OR DRIZZLE')[0]
    max24hr_sn = list(filter(None, (max24hr_sn.split(' '))))

    # get the max snow depth text
    max_sd = cf6_snow_text.split('GRTST DEPTH:')[-1].split('7 = DUSTSTORM OR SANDSTORM:')[0]
    max_sd = list(filter(None, (max_sd.split(' '))))
    print(max_sd)
    
    # establish our idx
    #idx = calendar.month_name[idx+1][:3].upper()

    # mini function for getting dates from the text
    def _find_date(lst, sd = False, sn = True):

        # this will take care of odd syntax issues that come up, e.g. 2, 1 on the dates when they span multiple dates
        if len(lst) > 3:
            _lst = lst[:2]
            lst_ = lst[2:]
            _lst.append("".join(lst_))
            lst = _lst

        
        dt = lst[2] # should be index value 2, sometimes, 'nn-nn', or can be 'nn-', 'nn', if one day

        # ----------------------------
        # for max 24 hr snow
        if sn:
            # syntax for a single date range
            if '-' in dt:
                dt = dt.split('-')[0]
                dt = f'{(idx+1):02}/{dt}'
                data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()]['max_clndr_24hr_snow'].append(dt) 
                return
            else:
                return

        # ----------------------------
        # for max snow depth
        elif sd:
            if ',' in dt: # for multiple days

                # split the string and sort if necessary
                days = sorted([int(dt.split(',')[0]), int(dt.split(',')[-1])])
                days = np.arange(days[0], days[1]+1)

                days = [f'{idx:02}/{d:02}' for d in days] # convert the days to 'mm/dd'
                data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()]['max_sdepth'].append(days)  
            
            else: # this should be single day stats
                #dt = dt.split('-')[0]
                dt = f'{(idx+1):02}/{int(dt):02}'
                data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()]['max_sdepth'].append(dt)             
    
    # ---------------------------------------------------
    # Max 24 hr snow values/date(s)
    # for when max 24 hr snow is trace
    if max24hr_sn[0] in 'T':
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()]['max_clndr_24hr_snow'].append(max24hr_sn[0])
        # get the dates
        _find_date(max24hr_sn, sd = False, sn = True)

    # for missing data
    elif max24hr_sn[0] in 'M':
        blank_data = [np.nan, np.nan] # for missing max 24 hr snow data, and no date
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()].update({'max_clndr_24hr_snow' : blank_data})      
    
    # for when max 24 hr snow is 0.0, then no dates either...
    elif float(max24hr_sn[0]) == 0.0:
        blank_data = [0.0, np.nan] # no max 24 hr snow data, and no date
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()].update({'max_clndr_24hr_snow' : blank_data}) 

    # for when max 24 hr snow is > 0.0
    elif float(max24hr_sn[0]) > 0.0:
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()]['max_clndr_24hr_snow'].append(float(max24hr_sn[0]))
        _find_date(max24hr_sn, sd = False, sn = True)

    # ---------------------------------------------------
    # Greatest Snow Depth values/date(s)

    # for when max snow depth is marked as missing... not sure how much this will come up...
    if max_sd[0] in 'M':
        blank_data = [np.nan, np.nan] # for missing max 24 hr snow data, and no date
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()].update({'max_sdepth' : blank_data})   

    if max_sd[0] in 'T':
        blank_data = [0, np.nan] # for weird exceptions where snow depth was marked as trace... should not be accurate...
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()].update({'max_sdepth' : blank_data})   

    # for when max snow depth is 0, then no dates either
    elif int(max_sd[0]) == 0:
        blank_data = [0, np.nan] # for missing max 24 hr snow data, and no date
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()].update({'max_sdepth' : blank_data})  

    elif int(max_sd[0]) > 0:
        data_dict['cf6_data'][calendar.month_name[idx+1][:3].upper()]['max_sdepth'].append(int(max_sd[0]))
        _find_date(max_sd, sd = True, sn = False)









['10', 'ON', '25']
['1', 'ON', '2,', '1']
['0']
['0']
['0']
['0']
['0']
['0']
['0']
['T']
['0']
['0']


In [226]:
data_dict['cf6_data']


{'JAN': {'max_clndr_24hr_snow': [9.0, '01/24', 9.0, '01/24'],
  'max_sdepth': [10, 10, '01/25']},
 'FEB': {'max_clndr_24hr_snow': [0.0, nan],
  'max_sdepth': [1, ['01/01', '01/02']]},
 'MAR': {'max_clndr_24hr_snow': [0.0, nan], 'max_sdepth': [0, nan]},
 'APR': {'max_clndr_24hr_snow': [0.0, nan], 'max_sdepth': [0, nan]},
 'MAY': {'max_clndr_24hr_snow': [0.0, nan], 'max_sdepth': [0, nan]},
 'JUN': {'max_clndr_24hr_snow': ['T', '06/8'], 'max_sdepth': [0, nan]},
 'JUL': {'max_clndr_24hr_snow': [0.0, nan], 'max_sdepth': [0, nan]},
 'AUG': {'max_clndr_24hr_snow': [0.0, nan], 'max_sdepth': [0, nan]},
 'SEP': {'max_clndr_24hr_snow': [nan, nan], 'max_sdepth': [0, nan]},
 'OCT': {'max_clndr_24hr_snow': ['T', '10/29'], 'max_sdepth': [0, nan]},
 'NOV': {'max_clndr_24hr_snow': [0.0, nan], 'max_sdepth': [0, nan]},
 'DEC': {'max_clndr_24hr_snow': ['T', '12/25'], 'max_sdepth': [0, nan]},
 'pres': {'min_pres': [], 'max_pres': []},
 'wind': {'max_gst': [], 'max_wdr': [], 'max_wd_dates': []}}

In [196]:
cf6_links

['https://mesonet.agron.iastate.edu/api/1/nwstext/202302011330-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202303011330-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202304011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202305010626-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202306010629-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202307011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202308011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202309011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202310010641-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202311011230-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202312011330-KLZK-CXUS54-CF6HRO',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202401011330-KLZK-CXUS54-C

In [176]:
text.splitlines()

['534 ',
 'CXUS54 KLZK 011330',
 'CF6HRO',
 'PRELIMINARY LOCAL CLIMATOLOGICAL DATA (WS FORM: F-6)',
 '',
 '                                          STATION:   HARRISON',
 '                                          MONTH:     DECEMBER',
 '                                          YEAR:      2023',
 '                                          LATITUDE:   36 15 N      ',
 '                                          LONGITUDE:  93  9 W    ',
 '',
 '  TEMPERATURE IN F:       :PCPN:    SNOW:  WIND      :SUNSHINE: SKY     :PK WND ',
 '1   2   3   4   5  6A  6B    7    8   9   10  11  12  13   14  15   16   17  18',
 '                                     12Z  AVG MX 2MIN',
 'DY MAX MIN AVG DEP HDD CDD  WTR  SNW DPTH SPD SPD DIR MIN PSBL S-S WX    SPD DR',
 '',
 ' 1  56  43  50   7  15   0 0.00  0.0    0  7.8 17 160   M    M  10        25 250',
 ' 2  52  41  47   4  18   0 0.02  0.0    0  4.3 12 120   M    M   7        16 130',
 ' 3  63  36  50   7  15   0 0.00  0.0    0  7.6 22 270   M    M   1

In [109]:

# get the max 24 hr snow text
max24hr_sn = cf6_snow_text.split('GRTST 24HR')[-1].split('6 = FREEZING RAIN OR DRIZZLE')[0]
max24hr_sn = list(filter(None, (max24hr_sn.split(' '))))

if float(max24hr_sn[0]) == 0.0:
    sn_data_lst.append(float(max24hr_sn[0]))

'  0.0            '

In [110]:
max24hr_sn = list(filter(None, (max24hr_sn.split(' '))))

In [111]:
max24hr_sn

['0.0']

In [None]:
sn_data_lst = []

sn_data_lst.append(

In [112]:
if float(max24hr_sn[0]) == 0.0:
    sn_data_lst.append(float(max24hr_sn[0]))

true


In [98]:
# monthly max wind speed logic, for significant wind gusts in the additional events space


new_text = text.split('SPD DR\n================================================================================\n\n')[-1]
new_text = new_text.split('\n================================================================================\nNOTES')[0]
new_text = new_text.split('\n================================================================================\n')[0]



wsp = []
wdr = []
date = []

for idx, line in enumerate(new_text.splitlines()):
    #print(idx+1, list(filter(None, (line[72:].split(' ')))))

    wnd_data = list(filter(None, (line[72:].split(' '))))
    if wnd_data[0] not in ['M'] and int(wnd_data[0]) >= 45:
        wsp.append(int(wnd_data[0]))
        wdr.append(int(wnd_data[1]))
        date.append(idx+1)
        
        
wsp, wdr, date

([46], [240], [27])

In [None]:
# dict_constructor.py script below, as of Aug 4, 2024

In [None]:
# Import our modules

# BEAUTIFUL SOUP
from bs4 import BeautifulSoup

# BUILT IN
import calendar
import itertools
from operator import itemgetter
import os, sys
import requests
from urllib.request import urlopen

# MISC
from datetime import datetime, timezone
import json
import numpy.ma as ma
import numpy as np
import pandas as pd
import textwrap

###########################################################################################

'''
dict_constructor.py
Written by: Erik Green, WFO Little Rock, Aug 2024

dict_constructor.py fetches the API urls for each CLM product for a site, 
through the IEM API. From there, a main data dictionary is constructed.

Notes, per Thomas:
On the CLM, the greatest 24 hr total is the true 24 hr total (can overlap days)
            the greatest storm total is the actual calendar day max
'''

###########################################################################################
# dict_constructor.py- Get the url's for each CLM, and construct a dictionary with that data
###########################################################################################
# ----------------------------------------------------------
# Main Working Functions
# ----------------------------------------------------------
def _get_product_links(pil, y):
    '''
    A function that will query the IEM API for CLM products, and return the corresponding API links to those products

    Parameters
    ----------
    pil : str, 6 character pil for CLM products, e.g., CLMLIT
    y : int, year in YYYY format

    Returns
    ----------
    api_links : list, a list of API link strings

    '''

    print(f'Searching for {y} {pil} products...\n')

    # declare year and months to query
    # for a year in review, need to get Feb (Jan) to Dec (Nov), then Jan (Dec) of the new year, to complete one calendar year

    # y = datetime.now().year
    months = [x for x in range(13)][2:]  # start in feb for jan CLM product
    days = [x for x in range(15)][1:]

    # empty list to store api_links for each monthly product
    api_links = []

    for m in months:
        print(f'---{calendar.month_name[m][:3]} {y}---')
        for d in days:

            # this is the api link that returns a json file with info, either including product issuance info, or little info, in json format
            # url = 'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil=CLMLIT&date=2023-02-04'
            url = f'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil={pil}&date={y}-{m:02}-{d:02}'

            # A GET request to the API
            response = requests.get(url)

            # turn the request into a json, and subsequently a dictionary
            response = response.json()

            if not response['data']:
                print(f'   -No {pil} product issued on {m:02}/{d:02}/{y}')
                continue

            elif response['data']:
                print(f'   -{pil} Product issued on {m:02}/{d:02}/{y}\n')
                api_links.append(response['data'][0]['text_link'])
                break

    # ------------------------------------------------------------
    # small section to do dec, because the dec CLM product is issued in jan of the following year
    m = 1
    print(f'---{calendar.month_name[m][:3]} {y}---')
    for d in days:

        url = f'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil={pil}&date={y + 1}-{m:02}-{d:02}'

        # A GET request to the API
        response = requests.get(url)

        # turn the request into a json, and subsequently a dictionary
        response = response.json()

        if not response['data']:
            print(f'   -No {pil} product issued on {m:02}/{d:02}/{y}')
            continue

        elif response['data']:
            print(f'   -{pil} Product issued on {m:02}/{d:02}/{y}\n')
            api_links.append(response['data'][0]['text_link'])
            break

    print(f'Finished searching for {y} {pil} products...\n')
    return api_links

# ----------------------------------------------------------
def _parse_timestamp(url):
    '''
    This function will take a url string, and create a datetime object from the timestamp in the url.

    Parameters
    ----------
    url : str, the API url with the timestamp in the url

    Returns
    ----------
    timestamp : a datetime object with the url timestamp
    station : str, the three letter station ID

    '''

    timestamp = url.split('nwstext/')[-1].split('-KLZK')[0]

    y = int(timestamp[:4])
    m = int(timestamp[4:6])
    # if the month of the timestamp is january, then this is the dec product
    if m == 1:
        m = 12
    # otherwise, apply the standard correction of one month subtracted
    else:
        m = m - 1
    d = int(timestamp[6:8])
    hh = int(timestamp[8:10])
    mm = int(timestamp[10:])

    station = url.split('CLM')[-1]

    return datetime(y, m, d, hh, mm), station

# ----------------------------------------------------------
def _parse_clm_text(url):
    '''
    This function will parse the API url text for climate data, and return the unfiltered text info to keys in a dictionary.

    Parameters
    ----------
    url : str, the API url

    Returns
    ----------
    text_dict : dictionary, the dictionary with all unfiltered climate text info

    '''

    html = urlopen(url).read()
    soup = BeautifulSoup(html, features="html.parser")

    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()  # rip it out

    # get text
    text = soup.get_text()

    # parse out the highest and lowest temp data for the month from the text
    high_temp_data = text.split('HIGHEST         ')[-1].split('LOWEST')[0]
    low_temp_data = text.split('LOWEST           ')[-1].split('AVG. MAXIMUM')[0]

    # parse out the avg high and low temp data, and the avg monthly temp data
    avg_high_temp_data = text.split('AVG. MAXIMUM  ')[-1].split('\nAVG. MINIMUM  ')[0]
    avg_low_temp_data = text.split('AVG. MINIMUM  ')[-1].split('\nMEAN')[0]
    avg_monthly_data = text.split('MEAN  ')[-1].split('\nDAYS MAX >= 90')[0]

    # parse out the precip and snow data
    monthly_precip_data = text.split('SNOWFALL (INCHES)')[0].split('PRECIPITATION (INCHES)')[-1].split('\nTOTALS')[-1]
    monthly_snow_data = text.split('SNOWFALL (INCHES)')[-1].split('\nTOTALS')[-1].split('\nDEGREE DAYS')[0]

    # misc temp data
    misc_temp_data = text.split('\nTEMPERATURE (F)\n')[-1].split('\n\nPRECIPITATION (INCHES)')[0]

    values = [high_temp_data, low_temp_data,
                avg_high_temp_data, avg_low_temp_data, avg_monthly_data,
                monthly_precip_data, monthly_snow_data,
                misc_temp_data]

    keys = ['high_temp_data_text', 'low_temp_data_text',
            'avg_high_temp_data_text', 'avg_low_temp_data_text', 'avg_monthly_data_text',
            'monthly_precip_data_text', 'monthly_snow_data_text',
            'misc_temp_data']

    text_dict = dict(zip(keys, values))

    return text_dict

# ----------------------------------------------------------
def _get_temp_data(data_dict, high_temp_data, low_temp_data,
                   avg_high_temp_data, avg_low_temp_data,
                   avg_monthly_data, month):
    '''
    Worker function to extract high and low data/dates from the CLM products.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    high_temp_data : str, a parsed string that contains the monthly high temp data from the CLM
    low_temp_data : str, a parsed string that contains the monthly low temp data from the CLM
    avg_high_temp_data : str, a parsed string that contains the avg monthly high temp data from the CLM
    avg_low_temp_data : str, a parsed string that contains the avg monthly low temp data from the CLM
    avg_monthly_data : str, a parsed string that contains the avg monthly mean temp data from the CLM
    (no longer needed) station : str, the three character station ID for each climate site
    month : int, the numerical month number, can be obtained from the timestamp variable

    Returns
    ----------
    None

    '''

    # Helper functions
    # --------------------------------------------------------
    def _count_spaces(line):

        # this should be the date in the line
        split_date = list(filter(None, (line.split(' '))))[0]

        # this is the number of spaces for it to be a date occurred that month
        if line.split(split_date)[0].count(' ') < 30:
            return True

        else:
            return False

    # --------------------------------------------------------

    month = calendar.month_name[month][:3].upper()

    # first filter the line of temp data
    filtered_low_temp_data = list(filter(None, low_temp_data.split(' ')))  # low temp data
    filtered_high_temp_data = list(filter(None, high_temp_data.split(' ')))  # high temp data

    # lets add the high and low values to the dictionary
    data_dict[month]['monthly_max_min'].append(int(filtered_high_temp_data[0]))
    data_dict[month]['monthly_max_min'].append(int(filtered_low_temp_data[0]))

    # now add the monthly avg high, low, and avg temps, and dfn
    data_dict[month]['monthly_avg_temps_dfn'].append(
        float(list(filter(None, (avg_high_temp_data.split(' '))))[0]))  # monthly avg high
    data_dict[month]['monthly_avg_temps_dfn'].append(
        float(list(filter(None, (avg_low_temp_data.split(' '))))[0]))  # monthly avg low
    data_dict[month]['monthly_avg_temps_dfn'].append(
        float(list(filter(None, (avg_monthly_data.split(' '))))[0]))  # monthly avg mean temp
    data_dict[month]['monthly_avg_temps_dfn'].append(
        float(list(filter(None, (avg_monthly_data.split(' '))))[2]))  # monthly avg mean temp dfn

    ## LOW TEMP DATA DATES ##
    # ---------------------------------------------------------------
    # check how many lines exist in the list
    # if only one line, then just one date to grab
    if len(low_temp_data.splitlines()) == 1:
        data_dict[month].update({'low_dates': str(filtered_low_temp_data[1])})

    # if there are multiple lines of low temp data, then check if these are the dates (occurrence dates, not records) that we want
    elif len(low_temp_data.splitlines()) > 1 and not _count_spaces(low_temp_data.splitlines()[1]):
        data_dict[month].update({'low_dates': str(filtered_low_temp_data[1])})

    # if there are multiple rows of low temp data, then multiple dates to grab that are valid
    else:
        # grab the first date in the line
        data_dict[month].update({'low_dates': [filtered_low_temp_data[1]]})

        # now iterate and find the rest of the dates
        for line in low_temp_data.splitlines()[1:]:
            if _count_spaces(line):
                data_dict[month]['low_dates'].append(list(filter(None, (line.split(' '))))[0])

    ## HIGH TEMP DATA DATES ##
    # ---------------------------------------------------------------
    # check how many lines exist in the list
    # if only one line, then just one date to grab
    if len(high_temp_data.splitlines()) == 1:
        data_dict[month].update({'high_dates': str(filtered_high_temp_data[1])})

    # if there are multiple lines of high temp data, then check if these are the dates (occurrence dates, not records) that we want
    elif len(high_temp_data.splitlines()) > 1 and not _count_spaces(high_temp_data.splitlines()[1]):
        data_dict[month].update({'high_dates': str(filtered_high_temp_data[1])})

    # if there are multiple rows of high temp data, then multiple dates to grab
    else:

        # grab the first date in the line
        data_dict[month].update({'high_dates': [filtered_high_temp_data[1]]})

        # now iterate and find the rest of the dates
        for line in high_temp_data.splitlines()[1:]:
            if _count_spaces(line):
                data_dict[month]['high_dates'].append(list(filter(None, (line.split(' '))))[0])

# ----------------------------------------------------------
def _get_precip_data(data_dict, monthly_precip_data, month):
    '''
    This function will parse the precip text info from the API url, and will append precip data
    to the main working dictionary.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    monthly_precip_data : str, the parsed monthly precip data as a string
    month : int, the month as an integer

    Returns
    ----------
    None

    '''

    # Helper Sub-function of the main function
    # --------------------------------------------------------

    def _check_record(param):
        if 'R' in str(param):
            return param.split('R')[0]
        else:
            return param

    # --------------------------------------------------------

    # convert the integer month to the three letter month name
    month = calendar.month_name[month][:3].upper()

    # ---- Monthly Rain Data -----
    monthly_total_rain = _check_record(list(filter(None, (monthly_precip_data.split(' '))))[0])  # monthly total rain
    monthly_rain_dfn = _check_record(list(filter(None, (monthly_precip_data.split(' '))))[2])

    # add the monthly precip total and dfn
    data_dict[month]['monthly_rain_and_dfn'].append(float(monthly_total_rain))  # monthly total
    data_dict[month]['monthly_rain_and_dfn'].append(float(monthly_rain_dfn))  # monthly dfn

    # now lets get the calendar max, 24 hr max values and dates for rain
    # Note: On the CLM,
    # the greatest 24 hr total is the true 24 hr total(can overlap days)
    # the greatest storm total is the actual calendar day max
    for idx, line in enumerate(monthly_precip_data.split('\nGREATEST\n')[-1].splitlines()):
        # print(list(filter(None, (line.split(' ')))))
        # the first line is the true 24 hr total (can overlap days)
        if idx == 0:
            max_24hr_rain = float(list(filter(None, (line.split(' '))))[3])
            max_24hr_rain_dates = " ".join(list(filter(None, (line.split(' '))))[4:])

        # this is the true calendar day max (reads as storm total on the CLM)
        elif idx == 1:
            max_clndr_day_rain = float(list(filter(None, (line.split(' '))))[-1])

    # check for a different max storm total rainfall date
    lst = monthly_precip_data.split('\nGREATEST\n')[-1][:-2].splitlines()
    empty_date = '(MM/DD(HH))'  # string we are searching for that designates no storm total date i.e. it equals the 24 hr total

    # if we find the empty_date search string, then add the 24 hr total dates to storm total dates
    if any(empty_date in x for x in lst):
        # max_stormtotal_rain_dates = " ".join(list(filter(None, (monthly_precip_data.split('\nGREATEST\n')[-1][:-2].splitlines()[0].split(' '))))[4:])
        max_clndr_day_rain_dates = max_24hr_rain_dates

    # if we do not find the empty_date search string, then handle adding a potentially different storm total date
    else:
        max_clndr_day_rain_dates = " ".join(
            list(filter(None, (monthly_precip_data.split('\nGREATEST\n')[-1][:-2].splitlines()[1].split(' '))))[3:])

    # 24 hour (calendar day max) rainfall data
    data_dict[month]['max_clndr_24hr_rain'].append(max_24hr_rain)  # 24 hr max (can overlap dates)
    data_dict[month]['max_clndr_24hr_rain'].append(max_24hr_rain_dates)  # 24 hr max dates

    data_dict[month]['max_clndr_24hr_rain'].append(max_clndr_day_rain)  # max storm total (calendar day max)
    data_dict[month]['max_clndr_24hr_rain'].append(max_clndr_day_rain_dates)  # max storm total dates (set equal to 24 hr max for now)

# ----------------------------------------------------------
def _get_snow_data(data_dict, monthly_snow_data, month):
    '''
    This function will parse the precip text info from the API url, and will append snow data
    to the main working dictionary.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    monthly_snow_data : str, the parsed monthly snow data as a string
    month : int, the month as an integer

    Returns
    ----------
    None

    '''

    # Helper Sub-functions of the main function
    # --------------------------------------------------------
    def _check_record(param):
        if 'R' in str(param):
            return param.split('R')[0]
        else:
            return param

    # --------------------------------------------------------
    def _check_missing_snow(param, month):
        try:
            data_dict[month]['monthly_snow_sdepth_dfn'].append(float(param))

        except ValueError:
            if 'T' in str(param):
                data_dict[month]['monthly_snow_sdepth_dfn'].append(param)
            else:
                data_dict[month]['monthly_snow_sdepth_dfn'].append(0.0)

    # --------------------------------------------------------

    # first, if month is not in a primary cool season month, then auto fill values and end the function call
    if month not in [1, 2, 3, 4, 10, 11, 12]:
        # convert the integer month to the three letter month name
        month = calendar.month_name[month][:3].upper()

        # this will add [0.0, 0.0, 0.0, np.nan]
        blank_values = [0.0, 0.0, 0.0, np.nan]
        data_dict[month].update({'monthly_snow_sdepth_dfn': blank_values})

        # this will add [0.0, np.nan]
        blank_values = [0.0, np.nan]
        data_dict[month].update({'max_clndr_24hr_snow': blank_values})

        return

    # if we do have cool season month, then proceed

    # convert the integer month to the three letter month name
    month = calendar.month_name[month][:3].upper()

    # ---- Monthly Snow Data ----
    # add the monthly snow total and dfn
    monthly_total_snow = _check_record(list(filter(None, (monthly_snow_data.split(' '))))[0])  # monthly total snow
    monthly_grtst_sdepth = _check_record(
        list(filter(None, (monthly_snow_data.split('\nGREATEST\n SNOW DEPTH')[-1].splitlines()[0].split(' '))))[
            0])  # monthly greatest snow depth
    monthly_snow_dfn = _check_record(list(filter(None, (monthly_snow_data.split(' '))))[2])  # monthly total snow dfn
    monthly_grtst_24hr_snow = _check_record(list(filter(None, (
        monthly_snow_data.split('\nGREATEST\n')[-1].split(' 24 HR TOTAL')[-1].splitlines()[0].split(' '))))[0])

    # Add the data to the main dictionary
    # for the non winter months, monthly snow may just be set to 'MM' in the CLM product
    _check_missing_snow(monthly_total_snow, month)  # this will add the monthly total snow
    _check_missing_snow(monthly_snow_dfn, month)  # this will add the monthly total snow dfn
    _check_missing_snow(monthly_grtst_sdepth, month)  # this will add the greatest monthly snow depth

    # ---- Greatest Snow Depth Dates ----
    # Now lets check for dates on greatest snow depth and 24 max total
    lst = monthly_snow_data.split('\nGREATEST\n')[-1].splitlines()

    # if monthly max snow depth is missing, set dates to nan
    if monthly_grtst_sdepth in ['0', 'MM']:
        # if monthly_grtst_sdepth == 'MM':
        monthly_grtst_sdepth_dates = np.nan

    # for greatest snow depth date (multiple dates)
    if any('/' in x for x in lst[0]) and len(lst) > 2:

        monthly_grtst_sdepth_dates = []

        # get the first date and make sure it is valid
        split_date = list(filter(None, (lst[0].split(' '))))[-1]
        if lst[0].split(split_date)[0].split(monthly_grtst_sdepth)[-1].count(' ') <= 3:
            monthly_grtst_sdepth_dates.append(split_date)

        # now iterate over the remainder of possible dates
        for idx, item in enumerate(
                lst[1:-1]):  # skip the first, and the last line because the last line is the 24 hr max data
            split_date = list(filter(None, (item.split(' '))))[0]
            if lst[idx].split(split_date).count(' ') <= 25:
                monthly_grtst_sdepth_dates.append(split_date)

    # for greatest snow depth date (only one date)
    elif any('/' in x for x in lst[0]) and len(lst) == 2:

        # get the first date and make sure it is valid
        split_date = list(filter(None, (lst[0].split(' '))))[-1]
        if lst[0].split(split_date)[0].split(monthly_grtst_sdepth)[-1].count(' ') <= 3:
            monthly_grtst_sdepth_dates = split_date
    else:
        monthly_grtst_sdepth_dates = np.nan

    # ---- Max 24 hr Snow Dates ----
    # lets follow similar logic here...
    # if monthly max 24 hr snow is missing, then set dates to nan
    if monthly_grtst_24hr_snow in ['0.0', 'MM']:
        # if monthly_grtst_24hr_snow == 'MM':
        monthly_grtst_24hr_snow_dates = np.nan

    # for greatest snow depth date
    elif any('/' in x for x in lst[-1]):

        # lets get the first occurrence of a date and use that to split
        first_date = list(filter(None, (lst[-1].split(monthly_grtst_24hr_snow)[-1].split('TO')[0].split(' '))))
        first_date = list(filter(lambda k: 'R' not in k,
                                 first_date))  # in case there is a record value for 24 hr snow, there will still be an 'R'

        # now lets split based on the first date and count the spaces
        if lst[-1].split(first_date[0]).count(' ') <= 4:
            monthly_grtst_24hr_snow_dates = " ".join(
                list(filter(None, (monthly_snow_data.split('\nGREATEST\n')[-1].splitlines()[1].split(' '))))[4:7])

        else:
            monthly_grtst_24hr_snow_dates = np.nan

    else:
        monthly_grtst_24hr_snow_dates = np.nan

    # now add the data to the main dictionary
    data_dict[month]['monthly_snow_sdepth_dfn'].append(
        monthly_grtst_sdepth_dates)  # add the greatest snow depth date(s) to the dictionary

    try:
        data_dict[month]['max_clndr_24hr_snow'].append(
            float(monthly_grtst_24hr_snow))  # add the greatest 24 hr snow total to the dictionary
    except ValueError:
        if monthly_grtst_24hr_snow in ['0.0', 'MM']:
            data_dict[month]['max_clndr_24hr_snow'].append(0.0)  # add the greatest 24 hr snow total to the dictionary
        elif monthly_grtst_24hr_snow in ['T']:
            data_dict[month]['max_clndr_24hr_snow'].append(
                'T')  # add trace for the greatest 24 hr snow total to the dictionary

    data_dict[month]['max_clndr_24hr_snow'].append(
        monthly_grtst_24hr_snow_dates)  # add the greatest 24 hr snow total dates to the dictionary

# ----------------------------------------------------------
def _get_misc_temp_data(data_dict, misc_temp_data):

    '''
    This function will parse the misc temp days info from the API url, and will append
    the data to the main working dictionary.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    misct_temp_data : str, the parsed monthly misc temp data as a string

    Returns
    ----------
    None

    '''
    # -------------------------------------------
    # gather the data from the url link
    # days with minT <= 32
    d_mnT_lt_32 = misc_temp_data.split('\nDAYS MIN <= 32')[-1].split(' ')
    d_mnT_lt_32 = int(list(filter(None, (d_mnT_lt_32)))[0])

    # days with minT <= 0
    d_mnT_lt_0 = misc_temp_data.split('\nDAYS MIN <= 0')[-1].split(' ')
    d_mnT_lt_0 = int(list(filter(None, (d_mnT_lt_0)))[0])

    # days with maxT <= 32
    d_mxT_lt_32 = misc_temp_data.split('\nDAYS MAX <= 32')[-1].split(' ')
    d_mxT_lt_32 = int(list(filter(None, (d_mxT_lt_32)))[0])

    # days with maxT >= 90
    d_mxT_gt_90 = misc_temp_data.split('\nDAYS MAX >= 90')[-1].split(' ')
    d_mxT_gt_90 = int(list(filter(None, (d_mxT_gt_90)))[0])

    # -------------------------------------------
    # now lets add them to the main dictionary
    data_dict['misc_temp_data'].update(
        {'n days - minT <= 32': (data_dict['misc_temp_data']['n days - minT <= 32'] + d_mnT_lt_32)})
    data_dict['misc_temp_data'].update(
        {'n days - minT <= 0': (data_dict['misc_temp_data']['n days - minT <= 0'] + d_mnT_lt_0)})
    data_dict['misc_temp_data'].update(
        {'n days - maxT <= 32': (data_dict['misc_temp_data']['n days - maxT <= 32'] + d_mxT_lt_32)})
    data_dict['misc_temp_data'].update(
        {'n days - maxT >= 90': (data_dict['misc_temp_data']['n days - maxT >= 90'] + d_mxT_gt_90)})


# -----------------------------------------------------------
# Main Script - construct_data_dict()
# -----------------------------------------------------------
def construct_data_dict(pil, year):

    '''

    Parameters
    ----------
    pil : str, the six letter climate product pil for a site, e.g. 'CLMLZK'
    year : int, year, formatted as YYYY

    Returns
    -------
    data_dict : dict, the dictionary that includes all of our main climate data from the CLM products
    '''

    # establish our main working dictionary
    data_dict = {}

    for i in range(1, 13):
        _data_dict = {f'{calendar.month_name[i][:3].upper()}': {}}
        data_dict.update(_data_dict)

    # now add the months
    for key in data_dict:
        _data_dict = {
            'monthly_max_min': [],
            'high_dates': None,
            'low_dates': None,
            'monthly_avg_temps_dfn': [],  # avg high, low, mean, mean dfn
            'monthly_rain_and_dfn': [],  # monthly total rain and dfn
            'max_clndr_24hr_rain': [],  # [max 24 hr rainfall (float), 24 hr rainfall dates, max calendar day rainfall (float), dates]
            'monthly_snow_sdepth_dfn': [],
            # monthly total snow, total snow dfn, greatest snow depth, and date(s) of greatest snow depth
            'max_clndr_24hr_snow': []  # max 24 hr total snow and date(s) of 24 hr total snow
        }

        data_dict[key].update(_data_dict)

    # add in a sub-dictionary for the misc temp data
    _data_dict = {
        'n days - minT <= 32': 0,
        'n days - minT <= 0': 0,
        'n days - maxT <= 32': 0,
        'n days - maxT >= 90': 0,
    }

    data_dict['misc_temp_data'] = _data_dict

    # -----------------------------------------------------------
    # first lets get the API links for a station
    api_links = _get_product_links(pil, year) # these variables will be replaced with place holders in the main function

    print('Writing Data to Dictionary...\n')

    for url in api_links:
        print('...')

        timestamp, station = _parse_timestamp(url)  # get the datetime and station from the url string
        # high_temp_data, low_temp_data = parse_clm_text(url) # this will get the high temp and low temp data from the text within the url
        text_dict = _parse_clm_text(url)  # this will get the high temp and low temp data from the text within the url

        # parse the high/low temp and add it to the main data dictionary
        _get_temp_data(data_dict,
                      text_dict['high_temp_data_text'], text_dict['low_temp_data_text'],
                      text_dict['avg_high_temp_data_text'], text_dict['avg_low_temp_data_text'],
                      text_dict['avg_monthly_data_text'],
                      timestamp.month)

        # parse the precip data and add it to the main data dictionary
        _get_precip_data(data_dict, text_dict['monthly_precip_data_text'], timestamp.month)

        # parse the snow data and add it to the main data dictionary
        _get_snow_data(data_dict, text_dict['monthly_snow_data_text'], timestamp.month)

        # parse the misc temp data
        _get_misc_temp_data(data_dict, text_dict['misc_temp_data'])

    print(f'Done Writing Data to Dictionary for {year} {pil[3:]} Annual Summary...\n')

    return data_dict


###########################################################################################
# Below is for testing, otherwise, construct_data_dict will simply be imported
###########################################################################################


if __name__ == '__main__':

    pil = 'CLMLZK'
    year = 2021
    data_dict = construct_data_dict(pil, year)

    # for quickly saving a dictionary to a text file, for easier testing
    with open(f'./{year}_{pil[3:]}_Annual_Summary.txt', 'w') as f:
        f.write(json.dumps(data_dict))

    f.close()

In [None]:
# cla_formatter.py script below, as of Aug 4, 2024

In [None]:
# Import our modules

# BEAUTIFUL SOUP
from bs4 import BeautifulSoup

# BUILT IN
import calendar
import itertools
from operator import itemgetter
import os, sys
import requests
from urllib.request import urlopen

# MISC
from datetime import datetime, timezone
import json
import numpy.ma as ma
import numpy as np
import pandas as pd
import textwrap

# DICT CONSTRUCTOR
from dict_constructor import construct_data_dict

###########################################################################################

'''
cla_formatter.py
Written by: Erik Green, WFO Little Rock, Aug 2024

CLA Formatter utilizes the functions:
    dict_constructor.py
    (api script for xmacis record info)
    (api script for iem record info)
    
The script first utilizes dict_constructor.py, where it fetches the API urls 
for each CLM product for a site, through the IEM API. From there, a main data 
dictionary is constructed.

In cla_formatter.py, the main data dictionary is utilized to generate a text file that contains
all annual supplemental data to be included in the CLA product for a climate site.



'''


###########################################################################################
# cla_formatter.py - Now that we have the dictionary constructed, lets write the text file...
###########################################################################################
# ----------------------------------------------------------
# Helping Sub-Functions
# ----------------------------------------------------------

# ----------------------------------------------------------
def _find_numeric_suffix(myDate):
    '''
    This function will take a string date, formatted as 'nn', e.g. '05', and assign a suffix based on the number.

    Parameters
    ----------
    myDate : str, a date number, formatted as 'nn', e.g. '05'

    Returns
    ----------
    myDate : str, formatted as 'nnTH', 'nnST', 'nnND', 'nnRD'

    '''

    date_suffix = ["TH", "ST", "ND", "RD"]

    if int(myDate) % 10 in [1, 2, 3] and int(myDate) not in [11, 12, 13]:
        return f'{myDate}{date_suffix[int(myDate) % 10]}'
    else:
        return f'{myDate}{date_suffix[0]}'

# -----------------------------------------------------------
def _get_station_name(pil):

    '''
    Parameters
    ----------
    pil : str, the six letter climate product pil, e.g. 'CLMLZK'

    Returns
    -------
    stn_name : str, the three letter station ID, parsed from pil, e.g. 'LZK'
    '''

    if pil[3:] == 'LZK':
        stn_name = 'NORTH LITTLE ROCK'
    elif pil[3:] == 'LIT':
        stn_name = 'LITTLE ROCK'
    elif pil[3:] == 'HRO':
        stn_name = 'HARRISON'
    elif pil[3:] == 'PBF':
        stn_name = 'PINE BLUFF'

    return stn_name


# ----------------------------------------------------------
# Main Working Functions
# ----------------------------------------------------------
def _make_temp_table(data_dict, f, year, stn_name):
    '''
    This function will write in the annual temperature summary table to the supplemental CLA product text file.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    f : file, the text file that is currently open that is being written to
    year : int, the climate year
    stn_name : str, the three letter climate station ID

    Returns
    ----------
    None

    '''

    # put together the data for iterating quickly
    months = [calendar.month_name[i][:3].upper() for i in range(1, 13)]

    avg_high_temps = [data_dict[m]['monthly_avg_temps_dfn'][0] for m in months]
    avg_low_temps = [data_dict[m]['monthly_avg_temps_dfn'][1] for m in months]
    avg_temps = [data_dict[m]['monthly_avg_temps_dfn'][2] for m in months]
    avg_temps_dfn = [data_dict[m]['monthly_avg_temps_dfn'][3] for m in months]

    high_temps = [data_dict[m]['monthly_max_min'][0] for m in months]
    high_temp_dates = [data_dict[m]['high_dates'] for m in months]

    low_temps = [data_dict[m]['monthly_max_min'][1] for m in months]
    low_temp_dates = [data_dict[m]['low_dates'] for m in months]

    # now lets write the table into the text file
    f.write(f'\n{year} TEMPERATURE AVERAGES AND EXTREMES                     {stn_name}, ARKANSAS\n')
    f.write(table_sep)
    f.write('             AVERAGE TEMPERATURES             |            TEMPERATURE EXTREMES\n')
    f.write('MONTH     HIGH     LOW     MONTHLY     DFN    |    MAX      DATE(S)         MIN      DATE(S)\n')
    f.write(table_sep)

    # order is as follows: month, avg max, avg min, avg mean, avg dfn, max, max dates, min, min dates
    for m, avgmx, avgmn, avgt, dfn, mx, mxdt, mn, mndt in zip(
                                                            months,
                                                            avg_high_temps, avg_low_temps, avg_temps, avg_temps_dfn,
                                                            high_temps, high_temp_dates,
                                                            low_temps, low_temp_dates
                                                             ):
        # do some formatting here...
        # start with constants for certain spaces in the sequence
        space1 = space * 4  # the space between the dfn, and the | separator
        space2 = space * 7  # the space between the monthly high temp, and high temp dates
        space3 = space * 14  # the space b/w the max temp dates, and the monthly min temp
        space4 = space * 7  # the space b/w the min temp, and min temp dates

        # format the dfn spacing and positive dfn's
        if dfn >= 10.0 or dfn <= -10.0:
            space1 = space * 3

        if dfn > 0.0:
            dfn = f'+{dfn}'

        # format the max temp spacing if > 100
        if mx >= 100:
            space2 = space * 6

        # format the min temp spacing if < -10
        if mn <= -10:
            space4 = space * 6

        # format the max temp dates
        if isinstance(mxdt, list):
            adj = (len(mxdt) * 3) - 3
            space3 = space * (14 - adj)
            mxdt = [i.split('/')[-1] for i in mxdt]
            mxdt = "/".join(sorted(mxdt, key=int))  # sort the days in ascending order

        else:
            mxdt = mxdt.split('/')[-1]

        # format the min temp dates
        if isinstance(mndt, list):
            mndt = [i.split('/')[-1] for i in mndt]
            mndt = "/".join(sorted(mndt, key=int))  # sort the days in ascending order
        else:
            mndt = mndt.split('/')[-1]

        f.write(
            f'{m}       {avgmx}    {avgmn}     {avgt}       {dfn}{space1}|    {mx}{space2}{mxdt}{space3}{mn}{space4}{mndt}\n')

    # -------------------------------------------------
    # next add in the annual summary data
    f.write(table_sep)  # add a table separator

    # constant space values we will need to adjust
    space1 = space * 4  # space b/w avg dfn and the | separator
    space2 = space * 7  # space b/w the yearly max temp and the yearly max temp dates
    space3 = space * 10  # space b/w the yearly max temp dates and the yearly min
    space4 = space * 7  # space b/w the yearly min and yearly min dates

    yrly_avgmx = np.round(np.mean(avg_high_temps), 1)
    yrly_avgmn = np.round(np.mean(avg_low_temps), 1)
    yrly_avg = np.round(np.mean(avg_temps), 1)

    yrly_avg_dfn = np.round(np.sum(avg_temps_dfn), 1)
    if yrly_avg_dfn >= 10.0 or yrly_avg_dfn <= -10.0:
        space1 = space * 3

    if yrly_avg_dfn > 0.0:
        yrly_avg_dfn = f'+{yrly_avg_dfn}'

    yrly_mx = np.max(high_temps)
    if yrly_mx >= 100.0:
        space2 = space * 6

    yrly_mn = np.min(low_temps)
    if yrly_mn <= -10.0:
        space4 = space * 6

    # -------------------------------------------------
    def _find_annual_extreme_dates(temp_dates_lst, idx, max_temp=False):

        '''
        This function will parse a temp dates list with a given index and return a string of the extreme dates

        Parameters
        ----------
        temp_dates_lst : list, a list of the extreme temperature dates for a month
        idx : int, the index value of the extreme temperature date for the year
        max_temp : bool, default = False, if max_temp, then edit space3 variable if necessary

        Returns
        ----------
        dt : str, a string of the formatted extreme temp dates
        _space3 : str, a string of formatted spaces for the space 3 variable in the annual values line

        '''

        _space3 = space3  # we only return this for the max temp dates

        if isinstance(temp_dates_lst[idx], list):
            if max_temp:
                adj = (len(temp_dates_lst[idx]) * 3) - 3
                _space3 = space * (10 - adj)

            dt = [i.split('/')[-1] for i in temp_dates_lst[idx]]
            dt = "/".join(sorted(dt, key=int)),  # sort the days in ascending order
        else:
            dt = temp_dates_lst[idx].split('/')[-1]

        return dt, _space3

    # -------------------------------------------------
    # format the annual max temp date(s)
    yrly_mx = np.max(high_temps)
    idx = high_temps.index(yrly_mx)

    mxdt, space3 = _find_annual_extreme_dates(high_temp_dates, idx, max_temp=True)
    yrly_mxdt = f'{calendar.month_name[idx + 1][:3].upper()} {mxdt}'

    # format the annual min temp date(s)
    yrly_mn = np.min(low_temps)
    idx = low_temps.index(yrly_mn)

    mndt, _ = _find_annual_extreme_dates(low_temp_dates, idx, max_temp=False)
    yrly_mndt = f'{calendar.month_name[idx + 1][:3].upper()} {mndt}'

    # now lets write in the data
    f.write(
        f'ANNUAL    {yrly_avgmx}    {yrly_avgmn}     {yrly_avg}       {yrly_avg_dfn}{space1}|    {yrly_mx}{space2}{yrly_mxdt}{space3}{yrly_mn}{space4}{yrly_mndt}\n')

# ----------------------------------------------------------
def _make_precip_table(data_dict, f, year, stn_name):
    '''
    This function will write in the annual rain summary table to the supplemental CLA product text file.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    f : file, the text file that is currently open that is being written to
    year : int, the climate year
    stn_name : str, the three letter climate station ID

    Returns
    ----------
    None

    '''

    # # --------------------------------------------
    # add table headers and column information

    f.write(f'\n\n{year} RAINFALL, DEPARTURES, AND EXTREMES                    {stn_name}, ARKANSAS\n')
    f.write(table_sep)
    f.write('MONTH        RAINFALL       DFN               MAX/CALENDAR DAY            MAX/24 HOURS\n')
    f.write(table_sep)

    # --------------------------------------------
    # now lets compile and organize the data
    # put together the data for iterating quickly
    months = [calendar.month_name[i][:3].upper() for i in range(1, 13)]

    precip = [data_dict[m]['monthly_rain_and_dfn'][0] for m in months]
    precip_dfn = [data_dict[m]['monthly_rain_and_dfn'][1] for m in months]

    # the calendar day max is actually index 3 and 4
    precip_mx_clndr = [data_dict[m]['max_clndr_24hr_rain'][2] for m in months] # previously idx 0
    precip_mx_clndr_dates = [data_dict[m]['max_clndr_24hr_rain'][3] for m in months] # previously idx 1

    # these are the true 24 hr totals, can overlap days
    precip_mx_storm_total = [data_dict[m]['max_clndr_24hr_rain'][0] for m in months] # previously idx 2
    precip_mx_storm_total_dates = [data_dict[m]['max_clndr_24hr_rain'][1] for m in months] # previously idx 3

    # for loop begins
    for m, pcp, dfn, pcp_mx_clndr, pcp_mx_clndr_dates, pcp_mx_storm_total, pcp_mx_storm_total_dates in zip(
            months,
            precip, precip_dfn,
            precip_mx_clndr, precip_mx_clndr_dates,
            precip_mx_storm_total, precip_mx_storm_total_dates
    ):
        # --------------------------------------------
        # lets do some formatting here
        # start with constants for certain spaces in the sequence
        space1 = space * 10  # the space b/w the precip and the departure
        space2 = space * 14  # the space b/w the precip dfn and the precip calendar day max
        space3 = space * 19  # space b/w precip calendar day max date and the max precip storm total

        # edit space 1 if monthly precip is greater than 10 inches
        if pcp >= 10.00:
            space1 = space * 9

        # edit space 2 if monthly precip dfn is >= 10.0 or <= -10.0
        if dfn >= 10.0 or dfn <= -10.0:
            space2 = space * 12

            # assign a sign to dfn if it is positive
        if dfn >= 0.0:
            dfn = f'+{dfn:.2f}'

        # --------------------------------------------
        # lets format the max calendar day rainfall dates here...
        # for date in pcp_mx_clndr_dates:
        bdt = list(filter(None, (pcp_mx_clndr_dates.split('TO')[0].split(' '))))[0]
        edt = list(filter(None, (pcp_mx_clndr_dates.split('TO')[-1].split(' '))))[0]

        # if the dates are the same, then format for one day
        if bdt == edt:
            clndr_mxdt = _find_numeric_suffix(bdt.split('/')[-1])

        # if the dates are not the same, then format for multiple dates
        elif bdt != edt:
            bdt = _find_numeric_suffix(bdt.split('/')[-1])
            edt = _find_numeric_suffix(edt.split('/')[-1])
            clndr_mxdt = f'{bdt}-{edt}'
            space3 = space * 14

        # --------------------------------------------
        # lets format the max storm total rainfall dates here...
        bdt = list(filter(None, (pcp_mx_storm_total_dates.split('TO')[0].split(' '))))[0]
        edt = list(filter(None, (pcp_mx_storm_total_dates.split('TO')[-1].split(' '))))[0]

        # if the dates are the same, then format for one day
        if bdt == edt:
            stormtotal_mxdt = _find_numeric_suffix(bdt.split('/')[-1])

        # if the dates are not the same, then format for multiple dates
        elif bdt != edt:
            bdt = _find_numeric_suffix(bdt.split('/')[-1])
            edt = _find_numeric_suffix(edt.split('/')[-1])
            stormtotal_mxdt = f'{bdt}-{edt}'

        # write in the line of monthly data
        f.write(
            f'{m}          {pcp:.2f}{space1}{dfn}{space2}{pcp_mx_clndr:.2f}/{clndr_mxdt}{space3}{pcp_mx_storm_total:.2f}/{stormtotal_mxdt}\n')
        # end of for loop

    # --------------------------------------------
    # constant space values that may be edited based on the data
    space1 = space * 14  # space b/w yearly dfn, and yrly calendar day max precip
    space2 = space * 17  # space b/w yearly calendar day max precip date, and yearly daily max storm total precip value

    # now lets add the annual data
    yrly_rain = np.round(np.sum(precip), 2)
    yrly_dfn = np.round(np.sum(precip_dfn), 2)

    # adjust space1 if yearly precip dfn is >= 10.0 or <= -10.0
    if yrly_dfn >= 10.0 or yrly_dfn <= -10.0:
        space1 = space * 13

    # if the yrly precip dfn is >= 0.0, then assign a positive sign in the string
    if yrly_dfn >= 0.0:
        yrly_dfn = f'+{yrly_dfn}'

    # get the max calendar day and storm total precip values
    # mx_clndr_pcp = np.max(precip_mx_clndr)
    # mx_stormtotal_pcp = np.max(precip_mx_storm_total)

    # need to format the annual precip extreme dates...

    # -------------------------------------------------
    def _find_annual_extreme_dates(pcp_dates_lst, idx, clndr_day_max=False):

        '''
        This function will parse a precip dates list with a given index and return a string of the extreme dates

        pcp_dates_lst : list, a list of the extreme precip dates for a month
        idx : int, the index value of the extreme precip date for the year
        clndr_day_max : bool, default = False, if clndr_day_max, then edit space2 variable if necessary

        returns:
        yrly_mxdt : str, a string of the formatted extreme precip dates
        _space2 : str, a string of formatted spaces for the space 2 variable in the annual values line

        '''
        _space2 = space2  # we only edit this for the calendar day max precip date, if it is two dates

        bdt = list(filter(None, (pcp_dates_lst[idx].split('TO')[0].split(' '))))[0]
        edt = list(filter(None, (pcp_dates_lst[idx].split('TO')[-1].split(' '))))[0]

        if bdt == edt:
            yrly_mxdt = bdt.split('/')[-1]

        elif bdt != edt:
            bdt = bdt.split('/')[-1]
            edt = edt.split('/')[-1]
            yrly_mxdt = f'{bdt}-{edt}'

            if clndr_day_max:
                _space2 = space * 14

        return yrly_mxdt, _space2

    # -------------------------------------------------

    # yearly calendar day max precip date
    clndr_pcp_mx = np.max(precip_mx_clndr)
    idx = precip_mx_clndr.index(clndr_pcp_mx)  # index value of the calendar day max precip

    clndr_max_pcp_dt, space2 = _find_annual_extreme_dates(precip_mx_clndr_dates, idx, clndr_day_max=True)
    yrly_clndr_pcp_mx = f'{calendar.month_name[idx + 1][:3].upper()} {clndr_max_pcp_dt}'

    # -------------------------------------------------

    # yearly daily storm total max precip date
    storm_total_pcp_mx = np.max(precip_mx_storm_total)
    idx = precip_mx_storm_total.index(storm_total_pcp_mx)

    storm_total_pcp_max_dt, _ = _find_annual_extreme_dates(precip_mx_storm_total_dates, idx, clndr_day_max=False)
    yrly_storm_total_pcp_max_dt = f'{calendar.month_name[idx + 1][:3].upper()} {storm_total_pcp_max_dt}'

    # -------------------------------------------------
    # write in the annual precip summary data
    f.write(table_sep)
    f.write(
        f'ANNUAL       {yrly_rain}         {yrly_dfn}{space1}{clndr_pcp_mx}/{yrly_clndr_pcp_mx}{space2}{storm_total_pcp_mx}/{yrly_storm_total_pcp_max_dt}\n')

# ----------------------------------------------------------
def _make_snow_table(data_dict, f, year, stn_name):
    '''
    This function will write in the annual snow summary table to the supplemental CLA product text file.

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    f : file, the text file that is currently open that is being written to
    year : int, the climate year
    stn_name : str, the three letter climate station ID

    Returns
    ----------
    None

    '''

    # --------------------------------------------
    # add table headers and column information
    f.write(f'\n\n{year} SNOWFALL, DEPARTURES, AND EXTREMES                    {stn_name}, ARKANSAS\n')
    f.write(table_sep)
    f.write('MONTH   SNOW      DFN       MAX/CALENDAR DAY       MAX/24 HOUR       GREATEST DEPTH/DATE\n')
    f.write(table_sep)

    # --------------------------------------------
    # now lets compile and organize the data
    # put together the data for iterating quickly
    snow_months = ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']  # these are the months we need for the snow table summary

    snow = [data_dict[m]['monthly_snow_sdepth_dfn'][0] for m in snow_months]
    snow_dfn = [data_dict[m]['monthly_snow_sdepth_dfn'][1] for m in snow_months]

    mx_sdepth = [data_dict[m]['monthly_snow_sdepth_dfn'][2] for m in snow_months]
    mx_sdepth_dt = [data_dict[m]['monthly_snow_sdepth_dfn'][3] for m in snow_months]

    mx_24hr_snow = [data_dict[m]['max_clndr_24hr_snow'][0] for m in snow_months]
    mx_24hr_snow_dt = [data_dict[m]['max_clndr_24hr_snow'][1] for m in snow_months]

    # for loop begins
    for m, sn, dfn, mx_sd, mx_sd_dt, mx_24hr_sn, mx_24hr_sn_dt in zip(
            snow_months,
            snow, snow_dfn,
            mx_sdepth, mx_sdepth_dt,
            mx_24hr_snow, mx_24hr_snow_dt
    ):
        # constant space values that may need to be edited
        space1 = space * 6  # space b/w the monthly snow, and the dfn
        space2 = space * 7  # space b/w the dfn and the calendar day max snow/date
        space3 = space * 20  # space b/w the calendar day max snow/date and the 24 hr max snow date
        space4 = space * 15  # space b/w 24 hour max, and

        # lets do some formatting here
        # account for when there is trace monthly snow
        if sn == 'T':
            sn = 'T  '

        # edit space1 when snow is >= 10.0
        if isinstance(sn, float) and sn >= 10.0:  # used isinstance to keep value error from popping up when making comparison
            space1 = space * 5

        # adjust dfn parameters
        if dfn >= 10.0:  # snow dfn will never less than 10 inches for our climate sites
            dfn = f'+{dfn}'
            space2 = space * 6

        elif dfn == 0.0:
            dfn = f' {dfn}'

        # --------------------------------------------
        # lets format the max calendar day and 24 hr snowfall dates here...
        if isinstance(mx_24hr_sn_dt, str):  # if no max snowfall value, then date values are set to nan, so check for this

            bdt = list(filter(None, (mx_24hr_sn_dt.split('TO')[0].split(' '))))[0]
            edt = list(filter(None, (mx_24hr_sn_dt.split('TO')[-1].split(' '))))[0]

            # if the dates are the same, then format for one day
            if bdt == edt:
                mx_24hr_sn_dt = _find_numeric_suffix(bdt.split('/')[-1])

                if isinstance(mx_24hr_sn, float) and mx_24hr_sn >= 10.0:
                    space3 = space * 9
                    space4 = space * 6

                elif isinstance(mx_24hr_sn, float) and mx_24hr_sn <= 10.0:
                    space3 = space * 15
                    space4 = space * 10

                elif mx_24hr_sn == 'T':
                    space3 = space * 17
                    space4 = space * 12

            # if the dates are not the same, then format for multiple dates
            elif bdt != edt:
                bdt = _find_numeric_suffix(bdt.split('/')[-1])
                edt = _find_numeric_suffix(edt.split('/')[-1])
                mx_24hr_sn_dt = f'{bdt}-{edt}'

                if isinstance(mx_24hr_sn, float) and mx_24hr_sn >= 10.0:
                    space3 = space * 9
                    space4 = space * 4

                elif isinstance(mx_24hr_sn, float) and mx_24hr_sn <= 10.0:
                    space3 = space * 10
                    space4 = space * 5

                elif mx_24hr_sn == 'T':
                    space3 = space * 11
                    space4 = space * 6

            # format the max calendar day, and 24 hour snow fall values and dates (if applicable)
            mx_24hr_sn = f'{mx_24hr_sn}/{mx_24hr_sn_dt}'

        # lets format the max snow depth/dates
        if mx_sd > 0.0:
            dt = _find_numeric_suffix(mx_sd_dt.split('/')[-1])
            mx_sd = f'{int(mx_sd)}/{dt}'

        else:
            mx_sd = '0'

        f.write(f'{m}     {sn}{space1}{dfn}{space2}{mx_24hr_sn}{space3}{mx_24hr_sn}{space4}{mx_sd}\n')
        # end of for loop

    # -----------------------------------------------------------
    # now lets add the annual data max values at the bottom of the table

    # constant space values
    space1 = space * 6  # space b/w the yearly snow and the yearly dfn
    space2 = space * 7  # space b/w the yearly snow dfn and the calendar day max snow value/date
    space3 = space * 13 # space b/w the annual calendar day max snow/date and annual 24 hr snow max/date
    space4 = space * 15 # space b/w the 24 hr snow max/dates and the annual greatest snow depth

    # -------------------------------------------
    # get the annual snowfall total
    sn_filtered = [s for s in snow if s != 'T']  # filter out trace values

    # if the two lists are equal after filtering, then we filtered no T's
    if len(snow) == len(sn_filtered):
        yrly_sn = np.sum(sn_filtered)

    # if the two lists are not equal after filtering, then we filtered T's out...
    elif len(snow) != len(sn_filtered):
        if np.sum(sn_filtered) == 0.0:
            yrly_sn = 'T'

        elif np.sum(sn_filtered) > 0.0:
            yrly_sn = np.sum(sn_filtered)

    if isinstance(yrly_sn, float) and yrly_sn >= 10.0:
        space1 = space * 5

    # -------------------------------------------
    # get the annual snow dfn
    yrly_sn_dfn = np.sum(snow_dfn)
    if yrly_sn_dfn > 10.0:
        yrly_sn_dfn = f'+{yrly_sn_dfn}'
        space2 = space * 6

    elif yrly_sn_dfn == 0.0:
        yrly_sn_dfn = ' 0.0'

    # -------------------------------------------
    # get the max calendar day snow value and date, which will also be the 24 hr max value...
    # use our find annual extreme function here...
    # -------------------------------------------------
    def _find_annual_extreme_dates(sn_dates_lst, idx, clndr_day_max = False):
        '''

        This function will parse a precip dates list with a given index and return a string of the extreme dates

        Parameters
        ----------
        sn_dates_lst : list, a list of the extreme snow dates for a month
        idx : int, the index value of the extreme precip date for the year
        clndr_day_max : bool, default = False, if clndr_day_max, then edit space2 variable if necessary

        Returns
        ----------
        yrly_mxdt : str, a string of the formatted extreme snow dates
        _space3 : str, a string of formatted spaces for the space 3 variable in the annual values line
        _space4 : str, a string of formatted spaces for the space 4 variable in the annual values line

        '''
        _space3 = space3  # we only edit this for the calendar day max snow date, if it is two dates
        _space4 = space4

        bdt = list(filter(None, (sn_dates_lst[idx].split('TO')[0].split(' '))))[0]
        edt = list(filter(None, (sn_dates_lst[idx].split('TO')[-1].split(' '))))[0]

        if bdt == edt:
            yrly_mxdt = bdt.split('/')[-1]

        elif bdt != edt:
            bdt = bdt.split('/')[-1]
            edt = edt.split('/')[-1]
            yrly_mxdt = f'{bdt}-{edt}'

            if clndr_day_max:
                _space3 = space * 10
                _space4 = space * 5

        return yrly_mxdt, _space3, _space4

    # -------------------------------------------------
    # get the annual max calendar day snow
    sn_filtered = [s for s in mx_24hr_snow if s != 'T']  # filter out trace values

    # if the two lists are equal after filtering, then we filtered no T's
    if len(mx_24hr_snow) == len(sn_filtered):
        sn_clndr_mx = np.max(sn_filtered)

    # if the two lists are not equal after filtering, then we filtered T's out...
    elif len(mx_24hr_snow) != len(sn_filtered):
        if np.sum(sn_filtered) == 0.0:
            sn_clndr_mx = 'T'

        elif np.sum(sn_filtered) > 0.0:
            sn_clndr_mx = np.max(sn_filtered)

    # -------------------------------------------------
    # if the annual calendar day max snow value is > 0.0, and not trace
    if isinstance(sn_clndr_mx, float) and sn_clndr_mx > 0.0:
        idx = mx_24hr_snow.index(sn_clndr_mx)  # index value of the calendar day max snow
        clndr_snow_mx_dt, space3, space4 = _find_annual_extreme_dates(mx_24hr_snow_dt, idx, clndr_day_max=True)
        yrly_clndr_sn_mx = f'{sn_clndr_mx}/{calendar.month_name[idx + 1][:3].upper()} {clndr_snow_mx_dt}'

        if sn_clndr_mx >= 10.0:
            n1 = len(space3)
            n2 = len(space4)
            space3 = space*(n1-1)
            space4 = space*(n2-1)

    elif isinstance(sn_clndr_mx, float) and sn_clndr_mx == 0.0:
        yrly_clndr_sn_mx = '0.0'
        space3 = space*13
        space4 = space*15

    elif isinstance(sn_clndr_mx, str):
        yrly_clndr_sn_mx = 'T/|*Check Dates*|'
        space3 = space*6
        space4 = space*8

    # -------------------------------------------------
    # get the annual max snow depth and the date
    yrly_mx_sd = np.max(mx_sdepth)

    if yrly_mx_sd > 0:
        idx = mx_sdepth.index(yrly_mx_sd)
        yrly_mx_sd_dt, _, _ = _find_annual_extreme_dates(mx_sdepth_dt, idx, clndr_day_max=False)
        yrly_mx_sd = f'{int(yrly_mx_sd)}/{calendar.month_name[idx + 1][:3].upper()} {yrly_mx_sd_dt}'

    else:
        yrly_mx_sd = '0'


    # -------------------------------------------------
    f.write(table_sep)
    f.write(f'ANN.    {yrly_sn}{space1}{yrly_sn_dfn}{space2}{yrly_clndr_sn_mx}{space3}{yrly_clndr_sn_mx}{space4}{yrly_mx_sd}\n')

# ----------------------------------------------------------
def _make_misc_data(data_dict, f):

    '''

    Parameters
    ----------
    data_dict : dict, the main dictionary that includes all our main CLM product data
    f : file, the text file that is currently open that is being written to

    Returns
    -------
    None

    '''

    # lets assemble some data
    n1 = data_dict['misc_temp_data']['n days - minT <= 32']
    n2 = data_dict['misc_temp_data']['n days - minT <= 0']
    n3 = data_dict['misc_temp_data']['n days - maxT <= 32']
    n4 = data_dict['misc_temp_data']['n days - maxT >= 90']

    f.write(f'\n\n{table_sep}')
    f.write('MISCELLANEOUS DATA (FIRST/LAST DATES, ETC.)\n')
    f.write(table_sep)

    # add in the misc data
    f.write(f'DAYS WITH MINIMUMS AT OR BELOW 32 DEGREES........................{n1:02}\n')
    f.write(f'DAYS WITH MINIMUMS AT OR BELOW 20 DEGREES........................|*nn*|\n')
    f.write(f'DAYS WITH MINIMUMS AT OR BELOW 0 DEGREES.........................{n2:02}\n')
    f.write(f'DAYS WITH MAXIMUMS AT OR BELOW 32 DEGREES........................{n3:02}\n')
    f.write(f'DAYS WITH MAXIMUMS AT OR ABOVE 90 DEGREES........................{n4:02}\n')
    f.write('DAYS WITH MAXIMUMS AT OR ABOVE 100 DEGREES........................|*nn*|\n')
    f.write('DAYS WITH MAXIMUMS AT OR ABOVE 105 DEGREES........................|*nn*|\n')
    f.write('DAYS WITH MAXIMUMS AT OR ABOVE 110 DEGREES........................|*nn*|\n')
    f.write('LAST FREEZE.......................................................|*Date*|\n')
    f.write('FIRST 80-DEGREE DAY...............................................|*Date*|\n')
    f.write('FIRST 90-DEGREE DAY...............................................|*Date*|\n')
    f.write('FIRST 100-DEGREE DAY..............................................|*Date*|\n')
    f.write('LAST 100-DEGREE DAY...............................................|*Date*|\n')
    f.write('LAST 90-DEGREE DAY................................................|*Date*|\n')
    f.write('LAST 80-DEGREE DAY................................................|*Date*|\n')
    f.write('FIRST FREEZE......................................................|*Date*|\n')


# -----------------------------------------------------------
# Main Script - write_textfile()
# -----------------------------------------------------------
def write_textfile(data_dict, pil, year):

    # get the station ID from the pil
    stn_name = _get_station_name(pil)

    # ----------------------------------------------------------
    # here we're actually writing the text file
    with open(f'./_output/Supplemental_{year}_CLA{pil[3:]}_Data.txt', 'w') as f:

        f.write(f'{year} SUPPLEMENTAL ANNUAL CLIMATE DATA FOR {stn_name}\n')
        f.write('\n(DFN = DEPARTURE FROM NORMAL)\n')

        # first up, the annual temperature summary table
        _make_temp_table(data_dict, f, year, stn_name)

        # next, write the annual precip summary table
        _make_precip_table(data_dict, f, year, stn_name)

        # next, write the annual snow summary table
        _make_snow_table(data_dict, f, year, stn_name)

        # make the misc data summary
        _make_misc_data(data_dict, f)

        return f.close()

###########################################################################################
###########################################################################################

# -----------------------------------------------------------
# lets import a text file structured as a dictionary to make our lives easier
with open('./2021_LZK_Annual_Summary.txt', 'r') as f:
    json_str = f.read()

f.close()
data_dict = json.loads(json_str)


# -----------------------------------------------------------
# will need to incorporate these into a function call,
# preferably as sys arg's for command line operations
pil = 'CLMLZK'
year = 2021

# ----------------------------------------------------------
# declare some globals as constants that will be used in the
# function all
table_sep = f'-'*93 + f'\n'
space = ' '

# ----------------------------------------------------------
# For the function call
if __name__ == '__main__':

    # this is for when we're going to run the whole function
    #data_dict = construct_data_dict(pil, year)

    # now write the text file
    write_textfile(data_dict, pil, year)




In [4]:
# working on getting max/min pressure from the cf6 here...

url = 'https://mesonet.agron.iastate.edu/api/1/nwstext/202302011330-KLZK-CXUS54-CF6HRO'

html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()    # rip it out

# get text
text = soup.get_text()



In [5]:
text.splitlines()

['264 ',
 'CXUS54 KLZK 011330',
 'CF6HRO',
 'PRELIMINARY LOCAL CLIMATOLOGICAL DATA (WS FORM: F-6)',
 '',
 '                                          STATION:   HARRISON',
 '                                          MONTH:     JANUARY',
 '                                          YEAR:      2023',
 '                                          LATITUDE:   36 15 N      ',
 '                                          LONGITUDE:  93  9 W    ',
 '',
 '  TEMPERATURE IN F:       :PCPN:    SNOW:  WIND      :SUNSHINE: SKY     :PK WND ',
 '1   2   3   4   5  6A  6B    7    8   9   10  11  12  13   14  15   16   17  18',
 '                                     12Z  AVG MX 2MIN',
 'DY MAX MIN AVG DEP HDD CDD  WTR  SNW DPTH SPD SPD DIR MIN PSBL S-S WX    SPD DR',
 '',
 ' 1  73  42  58  21   7   0 0.00  0.0    M  8.6 17 170   M    M   2 12     22 140',
 ' 2  75  50  63  26   2   0 0.37  0.0    0  8.1 20 150   M    M   6 123    26 120',
 ' 3  65  42  54  17  11   0 0.13  0.0    0  6.2 18 240   M    M   1 

In [24]:
idx = 0

# parse out the text from the CF6 that includes the monthly max/min SLP data
max_slp_text = list(filter(None, (text.split('HIGHEST SLP')[-1].split('\n')[0].split(' '))))
min_slp_text = list(filter(None, (text.split('LOWEST  SLP')[-1].split('\n')[0].split(' '))))

# get the max SLP value and date for the month
max_slp = float(max_slp_text[0])
max_slp_date = f'{calendar.month_name[idx + 1][:3].upper()} {max_slp_text[-1]}'

# get the min SLP value and date for the month
min_slp = float(min_slp_text[0])
min_slp_date = f'{calendar.month_name[idx + 1][:3].upper()} {min_slp_text[-1]}'




30.45

In [23]:
max_slp_text[0]

'30.45'

In [None]:
# establish our main working dictionary
data_dict = {}

for i in range(1,13):
    _data_dict = {f'{calendar.month_name[i][:3].upper()}' : {}}
    data_dict.update(_data_dict)


# now add the months
for key in data_dict:
    _data_dict = {
        # generate the abbreviated month names
         #f'{calendar.month_name[i][:3].upper()}' : {
        'monthly_max_min': [],
        'high_dates': None,
        'low_dates': None,
        'monthly_avg_temps_dfn' : [], # avg high, low, mean, mean dfn
        'monthly_rain_and_dfn' : [], # monthly total rain and dfn
        'max_clndr_24hr_rain' : [], # [max calendar day rainfall (float), dates, max 24 hr rainfall (float), dates]
        'monthly_snow_sdepth_dfn' : [], # monthly total snow, total snow dfn, greatest snow depth, and date(s) of greatest snow depth
        'max_clndr_24hr_snow' : [] # max 24 hr total snow and date(s) of 24 hr total snow
        }
    
    data_dict[key].update(_data_dict)

# add in a sub-dictionary for the misc temp data
_data_dict = {
    'n days - minT <= 32': 0,
    'n days - minT <= 0': 0,
    'n days - maxT <= 32': 0,
    'n days - maxT >= 90': 0,
}

data_dict['misc_temp_data'] = _data_dict

data_dict

In [488]:
api_links

['https://mesonet.agron.iastate.edu/api/1/nwstext/202102020519-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202103050058-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202104020212-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202105012111-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202106010952-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202107020205-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202108011806-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202109010950-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202110020032-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202111021719-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202112020820-KLZK-CXUS54-CLMLZK',
 'https://mesonet.agron.iastate.edu/api/1/nwstext/202201040104-KLZK-CXUS54-C

In [50]:
api_links = get_product_links('CLMLZK', 2021) # these variables will be replaced with place holders in the main function

Searching for 2021 CLMLZK products...

---Feb 2021---
   -No CLMLZK product issued on 02/01/2021
   -CLMLZK Product issued on 02/02/2021

---Mar 2021---
   -No CLMLZK product issued on 03/01/2021
   -No CLMLZK product issued on 03/02/2021
   -No CLMLZK product issued on 03/03/2021
   -No CLMLZK product issued on 03/04/2021
   -CLMLZK Product issued on 03/05/2021

---Apr 2021---
   -No CLMLZK product issued on 04/01/2021
   -CLMLZK Product issued on 04/02/2021

---May 2021---
   -CLMLZK Product issued on 05/01/2021

---Jun 2021---
   -CLMLZK Product issued on 06/01/2021

---Jul 2021---
   -No CLMLZK product issued on 07/01/2021
   -CLMLZK Product issued on 07/02/2021

---Aug 2021---
   -CLMLZK Product issued on 08/01/2021

---Sep 2021---
   -CLMLZK Product issued on 09/01/2021

---Oct 2021---
   -No CLMLZK product issued on 10/01/2021
   -CLMLZK Product issued on 10/02/2021

---Nov 2021---
   -No CLMLZK product issued on 11/01/2021
   -CLMLZK Product issued on 11/02/2021

---Dec 2021---

In [489]:

# this allows us to quickly generate a text str for an api clm product
#url = 'https://mesonet.agron.iastate.edu/api/1/nwstext/202302051153-KLZK-CXUS54-CLMLZK'
url = 'https://mesonet.agron.iastate.edu/api/1/nwstext/202103050058-KLZK-CXUS54-CLMLZK'
#url = 'https://mesonet.agron.iastate.edu/api/1/nwstext/202303041555-KLZK-CXUS54-CLMHRO' # multiple dates of greatest snow depth

html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()    # rip it out

# get text
text = soup.get_text()

avg_high_temp_data = text.split('AVG. MAXIMUM  ')[-1].split('\nAVG. MINIMUM  ')[0]   
avg_low_temp_data = text.split('AVG. MINIMUM  ')[-1].split('\nMEAN')[0]  
avg_monthly_data = text.split('MEAN  ')[-1].split('\nDAYS MAX >= 90')[0]

monthly_precip_data = text.split('SNOWFALL (INCHES)')[0].split('PRECIPITATION (INCHES)')[-1].split('\nTOTALS')[-1]
monthly_snow_data = text.split('SNOWFALL (INCHES)')[-1].split('\nTOTALS')[-1].split('\nDEGREE DAYS')[0]

misc_temp_data = text.split('\nTEMPERATURE (F)\n')[-1].split('\n\nPRECIPITATION (INCHES)')[0]



In [464]:
# write our dictionary to a text file, so we can operate easily in pycharm without having to re acquire the data everytime...
with open('./2021_LZK_Annual_Summary.txt', 'w') as f:
    f.write(json.dumps(data_dict))

f.close()

In [467]:
with open('./2021_LZK_Annual_Summary.txt', 'r') as f:
    json_str = f.read()

f.close()
_new_data_dict = json.loads(json_str)

In [366]:
pcp = [data_dict[m]['monthly_rain_and_dfn'][0] for m in months]
pcp_dfn = [data_dict[m]['monthly_rain_and_dfn'][1] for m in months]

pcp_mx_clndr = [data_dict[m]['max_clndr_24hr_rain'][0] for m in months]
pcp_mx_clndr_dates = [data_dict[m]['max_clndr_24hr_rain'][1] for m in months]

precip_mx_storm_total = [data_dict[m]['max_clndr_24hr_rain'][2] for m in months]
precip_mx_storm_total_dates = [data_dict[m]['max_clndr_24hr_rain'][3] for m in months]

In [434]:
snow = [data_dict[m]['monthly_snow_sdepth_dfn'][0] for m in months if m in ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']]
snow_dfn = [data_dict[m]['monthly_snow_sdepth_dfn'][1] for m in months if m in ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']]

mx_sdepth = [data_dict[m]['monthly_snow_sdepth_dfn'][2] for m in months if m in ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']]
mx_sdepth_dt = [data_dict[m]['monthly_snow_sdepth_dfn'][3] for m in months if m in ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']]

mx_24hr_snow = [data_dict[m]['max_clndr_24hr_snow'][0] for m in months if m in ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']]
mx_24hr_snow_dt = [data_dict[m]['max_clndr_24hr_snow'][1] for m in months if m in ['JAN', 'FEB', 'MAR', 'APR', 'OCT', 'NOV', 'DEC']]

snow, snow_dfn, mx_sdepth, mx_sdepth_dt, mx_24hr_snow, mx_24hr_snow_dt

(['T', 18.7, 'T', 0.0, 0.0, 0.0, 0.0],
 [-1.7, 16.9, -0.4, 0.0, 0.0, -0.1, -0.6],
 [0.0, 14.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [nan, '02/18', nan, nan, nan, nan, nan],
 ['T', 10.9, 'T', 0.0, 0.0, 0.0, 0.0],
 ['01/15 TO 01/15', '02/14 TO 02/15', '03/31 TO 03/31', nan, nan, nan, nan])

In [None]:
# lets work on the daily record information here...

'''
Notes, working here last on 8/4, we had just finished 
pulling in the new record data, and getting the old 
record data (high/low/rain/snow) from the IEM daily CLI API...

Need to add that data to the dictionary, and investigate
avenues for getting high-low and low-high, if possible...

Otherwise, we're onto incorporating the record data into the text file
'''

In [92]:
# ----------------------------------------------------------
def _parse_rec_dates(rec_dt, year):
    '''
    This function will convert our record date format from 'mm/dd' to 'mm-dd'.
    We have to add year, to check on leap years, otherwise, without declaring a year, 
    datetime defaults the year to 1990, and this can raise errors if we have records
    on a leap day.
    
    
    Parameters
    ----------
    rec_dt : list, the list of daily record dates
    year : int, the year, formated as YYYY

    Returns
    ----------
    rec_dt : list, the list with newly formatted dates

    '''
    
    if rec_dt:
        return [datetime.strptime(f'{dt}/{year}', '%m/%d/%Y').strftime('%m-%d') for dt in rec_dt]
    else:
        return rec_dt

# ----------------------------------------------------------
def _get_iem_records(r, rec_dt, rec_key):
    '''
    This function will take a dictionary of daily climate data, obtained from the IEM API, and 
    obtain all the previous daily record data, based on the input new daily record dates.

    Parameters
    ----------
    r : dict, this is the json text we get from the IEM API, which contains daily climate summary info for a given year
    rec_dt : list, a list of new daily record dates that we need previous record info for
    rec_key : str, the keyword for which record we are parsing, args include:
                   'low_record', 'high_record', 'precip_record', 'snow_record'
    Returns
    ----------
    old_records : list, a list of lists with previous record values, and years it was set, e.g. record lows [[1, 2009, 2011], [5, 2005]]

    '''

    old_records = []
    
    if rec_dt:

        # iterate over the new record dates
        for rec_dt in rec_dt:
            # iterate through each day in cli, which is a dictionary with all days
            for item in r['results']:
                # find each day in the cli json that has a new daily record 
                if rec_dt in item['valid']:
    
                    # get the record value and years based on keyword arguments
                    _old_record = [] # blank list to add record values/years to

                    # for snow, sometimes during the summer months, it comes back as 'M'
                    if item[rec_key] == 'M' and rec_key == 'snow_record':
                        _old_record.append(0.0)

                    else:
                        _old_record.append(item[rec_key])
                    
                    for yr in item[f'{rec_key}_years']:
                        _old_record.append(yr)

                    old_records.append(_old_record)
        return old_records
    else:
        return old_records # should be an empty list if no record dates

# ----------------------------------------------------------
# first up, import the record data from our record text file

# this will get all the known record data for the new year...
with open('./records/mylzkrecs.txt' ,'r') as f:
    recs = f.read()

rec_mx = recs.split('\nRecord High\n')[-1].split('\nRecord Low High\n')[0].splitlines() # record high
rec_lwmx = recs.split('\nRecord Low High\n')[-1].split('\nRecord High Low\n')[0].splitlines() # record low high

rec_lw = recs.split('\nRecord Low\n')[-1].split('\nRecord Rain\n')[0].splitlines() # record low
rec_mxlw = recs.split('\nRecord High Low\n')[-1].split('\nRecord Low\n')[0].splitlines() # record high low

rec_pcp = recs.split('\nRecord Rain\n')[-1].split('\nRecord Snow\n')[0].splitlines() # record rain
rec_sn = recs.split('\nRecord Snow\n')[-1].splitlines()[:-1] # record snow, remove the last item from rec_sn because it will always be '\n'

# set up our records dictionary
clm_recs = [rec_mx, rec_lwmx, rec_lw, rec_mxlw, rec_pcp, rec_sn]
keys = ['rec_mx', 'rec_lwmx', 'rec_lw', 'rec_mxlw', 'rec_pcp', 'rec_sn']

recs_dict = {
    'new_recs' : {},
    'old_recs' : {}
}

recs_dict['new_recs'] = dict(zip(keys, clm_recs))

# ----------------------------------------------------------
# gets the dates we need, this converts from 'mm/dd' to 'mm-dd', 
# since that is the format used on the IEM API page
year = 2024

rec_mx_dt = _parse_rec_dates([dt.split(',')[0] for dt in recs_dict['new_recs']['rec_mx']], year)
rec_lwmx_dt = _parse_rec_dates([dt.split(',')[0] for dt in recs_dict['new_recs']['rec_lwmx']], year)
rec_lw_dt = _parse_rec_dates([dt.split(',')[0] for dt in recs_dict['new_recs']['rec_lw']], year)
rec_mxlw_dt = _parse_rec_dates([dt.split(',')[0] for dt in recs_dict['new_recs']['rec_mxlw']], year) 
rec_pcp_dt = _parse_rec_dates([dt.split(',')[0] for dt in recs_dict['new_recs']['rec_pcp']], year) 
rec_sn_dt = _parse_rec_dates([dt.split(',')[0] for dt in recs_dict['new_recs']['rec_sn']], year)


# ----------------------------------------------------------
# we still need to go to the IEM API, and fetch all the previous record data/years... how to store it??
# perhaps same key structure? e.g. 'rec_mx': [[(old record value), yyyy, yyyy, etc.], [(old record value), yyyy, yyyy, etc.]]

# the IEM API for daily climate data, for a given site and year
url = f"https://mesonet.agron.iastate.edu/json/cli.py?station=KLZK&year={year}"

# A GET request to the API
r = requests.get(url)
r = r.json()

old_lw_rec = _get_iem_records(r, rec_lw_dt, 'low_record')
old_mx_rec = _get_iem_records(r, rec_mx_dt, 'high_record')
old_pcp_rec = _get_iem_records(r, rec_pcp_dt, 'precip_record')
old_sn_rec = _get_iem_records(r, rec_sn_dt, 'snow_record')


# not sure about low-high and high-low data/dates, will have to think on this one...
# only solution for now seems to searching for RER's?, but this will be time consuming and difficult...

# can use the iem api for daily climate to also get misc daily high and low data









In [88]:
r

{'results': [{'station': 'KLZK',
   'valid': '2024-01-01',
   'state': 'AR',
   'wfo': 'LZK',
   'link': '/api/1/nwstext/202401020730-KLZK-CDUS44-CLILZK',
   'product': '202401020730-KLZK-CDUS44-CLILZK',
   'name': 'NORTH LITTLE ROCK',
   'high': 41,
   'high_record': 73,
   'high_record_years': [2006],
   'high_normal': 49,
   'high_depart': -8,
   'high_time': 'MM',
   'low': 28,
   'low_record': 9,
   'low_record_years': [2018],
   'low_normal': 33,
   'low_depart': -5,
   'low_time': 'MM',
   'precip': 0.0,
   'precip_normal': 0.14,
   'precip_month': 0.0,
   'precip_month_normal': 0.14,
   'precip_jan1': 0.0,
   'precip_jan1_normal': 0.14,
   'precip_jun1': 'M',
   'precip_jun1_normal': 'M',
   'precip_jul1': 'M',
   'precip_dec1': 1.45,
   'precip_dec1_normal': 5.24,
   'precip_record': 2.07,
   'precip_record_years': [1999],
   'snow': 0.0,
   'snowdepth': 0.0,
   'snow_normal': 0.0,
   'snow_month': 0.0,
   'snow_jun1': 'M',
   'snow_jul1': 0.0,
   'snow_dec1': 0.0,
   'snow_re

In [138]:
url = f"https://mesonet.agron.iastate.edu/json/cli.py?station=KLZK&year={year}"

# A GET request to the API
r = requests.get(url)
r = r.json()



In [142]:
# lets write a function that takes a keyword argument and integer argument

# iterate through each day in cli, which is a dictionary with all days


def _parse_iem_cli_api(r, key, t, gt = False):

    '''
    This function will utilize the IEM Daily Climate API, and parse 
    for misc temp info such as days above/below n temp.

    Parameters
    ----------
    r : dict, the json dictionary from the IEM API
    key : str, the key value that is being queried in r
    t : int, the temperature that is being queried
    gt : bool, default is False, a boolean that determines the sign operator

    Returns
    ----------
    n : int, the number of days of a threshold that is queried

    '''
    
    n = 0
    for item in r['results']:
        if gt:
            if item[key] >= t:
                n += 1
        else:
            if item[key] <= t:
                n += 1
    return n

n = _parse_iem_cli_api(r, 'low', 20, gt = False)
n

11

In [100]:
r

{'results': [{'station': 'KLZK',
   'valid': '2024-01-01',
   'state': 'AR',
   'wfo': 'LZK',
   'link': '/api/1/nwstext/202401020730-KLZK-CDUS44-CLILZK',
   'product': '202401020730-KLZK-CDUS44-CLILZK',
   'name': 'NORTH LITTLE ROCK',
   'high': 41,
   'high_record': 73,
   'high_record_years': [2006],
   'high_normal': 49,
   'high_depart': -8,
   'high_time': 'MM',
   'low': 28,
   'low_record': 9,
   'low_record_years': [2018],
   'low_normal': 33,
   'low_depart': -5,
   'low_time': 'MM',
   'precip': 0.0,
   'precip_normal': 0.14,
   'precip_month': 0.0,
   'precip_month_normal': 0.14,
   'precip_jan1': 0.0,
   'precip_jan1_normal': 0.14,
   'precip_jun1': 'M',
   'precip_jun1_normal': 'M',
   'precip_jul1': 'M',
   'precip_dec1': 1.45,
   'precip_dec1_normal': 5.24,
   'precip_record': 2.07,
   'precip_record_years': [1999],
   'snow': 0.0,
   'snowdepth': 0.0,
   'snow_normal': 0.0,
   'snow_month': 0.0,
   'snow_jun1': 'M',
   'snow_jul1': 0.0,
   'snow_dec1': 0.0,
   'snow_re

In [None]:
# XMACIS API Stuff Here...

In [56]:
'''
Here we were working with the xmacis API to obtain data, we have working code to get annual avg data and departure from normal,
now we need some logic to compare a yearly value 

'''


# notes, for a list of yearly 'maxT' in this case, average monthly temperatures, use this syntax
#[{"name":"maxt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year"}]



def get_annual_xmacis_data(station):

    # a dictionary that contains the json syntax to the xmacis api, that will return yearly avg values for the por, and the departures from normal
    json_dict = {
        #'lit' : '{"sid":"LITthr","sdate":"por","edate":"por","output":"json","elems":[{"name":"maxt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"maxt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"mint","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"mint","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"avgt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"avgt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"pcpn","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","prec":1},{"name":"pcpn","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","normal":"departure","prec":1},{"name":"snow","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","prec":1},{"name":"snow","interval":"yly","duration":1,"reduce":"sum","normal":"departure","maxmissing":"7","prec":1}],"output":"json"}',
        'lit' : '{"sid":"LITthr","sdate":"por","edate":"por","output":"json","elems":[{"name":"maxt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year"},{"name":"mint","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year"},{"name":"avgt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year", "prec":0},{"name":"pcpn","interval":[0,1],"duration":1,"reduce":"sum","maxmissing":"7","prec":2,"groupby":"year"},{"name":"snow","interval":[0,1],"duration":1,"reduce":"sum","maxmissing":"7","groupby":"year","prec":1}],"output":"json"}',
        #'lzk' : '{"sid":"LZKthr","sdate":"por","edate":"por","output":"json","elems":[{"name":"maxt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"maxt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"mint","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"mint","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"avgt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"avgt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"pcpn","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","prec":1},{"name":"pcpn","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","normal":"departure","prec":1},{"name":"snow","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","prec":1},{"name":"snow","interval":"yly","duration":1,"reduce":"sum","normal":"departure","maxmissing":"7","prec":1}],"output":"json"}',
        'hro' : '{"sid":"HROthr","sdate":"por","edate":"por","output":"json","elems":[{"name":"maxt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year"},{"name":"maxt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","groupby":"year"},{"name":"mint","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year"},{"name":"mint","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","groupby":"year"},{"name":"avgt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","groupby":"year"},{"name":"avgt","interval":[0,1],"duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","groupby":"year"},{"name":"pcpn","interval":[0,1],"duration":1,"reduce":"sum","maxmissing":"7","prec":1,"groupby":"year"},{"name":"pcpn","interval":[0,1],"duration":1,"reduce":"sum","maxmissing":"7","normal":"departure","prec":1,"groupby":"year"},{"name":"snow","interval":[0,1],"duration":1,"reduce":"sum","maxmissing":"7","groupby":"year"},{"name":"snow","interval":[0,1],"duration":1,"reduce":"sum","normal":"departure","maxmissing":"7","groupby":"year"}],"output":"json"}',
        'pbf' : '{"sid":"PBFthr","sdate":"por","edate":"por","output":"json","elems":[{"name":"maxt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"maxt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"mint","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"mint","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"avgt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","prec":1},{"name":"avgt","interval":"yly","duration":1,"reduce":"mean","maxmissing":"7","normal":"departure","prec":1},{"name":"pcpn","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","prec":1},{"name":"pcpn","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","normal":"departure","prec":1},{"name":"snow","interval":"yly","duration":1,"reduce":"sum","maxmissing":"7","prec":1},{"name":"snow","interval":"yly","duration":1,"reduce":"sum","normal":"departure","maxmissing":"7","prec":1}],"output":"json"}'
    }

    json_text = json_dict[station.lower()]
    
    # the xmacis api server for station data
    url = f"https://data.rcc-acis.org/StnData?params={json_text}"
    
    # A GET request to the API
    r = requests.get(url)
    return r.json()

data = get_annual_xmacis_data('LIT')

# --------------------------------------------------------------------

# this will take the json dictionary from the api, and create lists of lists for each annual avg or total parameter
# maxt = [[x for idx, x in enumerate(sub_lst) if idx in [0, 1, 2]] for sub_lst in data['data']]
# mint = [[x for idx, x in enumerate(sub_lst) if idx in [0, 3, 4]] for sub_lst in data['data']]
# avgt = [[x for idx, x in enumerate(sub_lst) if idx in [0, 5, 6]] for sub_lst in data['data']]
# pcpn = [[x for idx, x in enumerate(sub_lst) if idx in [0, 7, 8]] for sub_lst in data['data']]
# snow = [[x for idx, x in enumerate(sub_lst) if idx in [0, 9, 10]] for sub_lst in data['data']]

data['data'][-2]


['2023',
 ['58.6',
  '62.1',
  '65.6',
  '73.9',
  '84.4',
  '91.6',
  '92.8',
  '95.7',
  '91.1',
  '78.2',
  '65.7',
  '59.8'],
 ['39.6',
  '41.7',
  '46.5',
  '52.5',
  '63.4',
  '70.2',
  '74.3',
  '75.3',
  '67.0',
  '56.6',
  '43.9',
  '37.9'],
 ['49.1',
  '51.9',
  '56.0',
  '63.2',
  '73.9',
  '80.9',
  '83.6',
  '85.5',
  '79.0',
  '67.4',
  '54.8',
  '48.8'],
 ['8.07',
  '7.62',
  '8.44',
  '9.87',
  '1.72',
  '4.49',
  '3.71',
  '1.81',
  '0.46',
  '5.24',
  '4.03',
  '1.40'],
 ['T',
  'T',
  '0.0',
  '0.0',
  '0.0',
  '0.0',
  '0.0',
  '0.0',
  '0.0',
  '0.0',
  '0.0',
  '0.0']]

In [None]:
'''
Data comes out as follows, a lists of lists:

[yr, [monthly avg max T's], [monthly avg max T dfn], 
     [monthly avg min T's], [monthly avg min T dfn],
     [monthly avg mean T's], [monthly avg mean T dfn], 
     [monthly total precip], [monthly total precip dfn],
     [monthly total snow], [monthly total snow dfn]
     ]

# we will now be import normal data from a local csv file, and only getting the observed values from xmacis API

'''

In [60]:
# lets import normals data

nrml_file = pd.read_csv('./normals/LIT_Normals.csv')
nrml_df = pd.DataFrame(nrml_file)

annual_mx_nrml = nrml_df['MAX'].values[-1]
annual_mn_nrml = nrml_df['MIN'].values[-1]
annual_avg_nrml = nrml_df['MEAN'].values[-1]

annual_pcp_nrml = nrml_df['PCPN'].values[-1]
annual_sn_nrml = nrml_df['SNOW'].values[-1]



72.3

In [92]:
def _calculate_snow_annuals_and_dfn(data):
    
    # handle trace values inside of snow, if there are any
    sn_filtered = [float(s) for s in data if s != 'T']  # filter out trace values

    # if the two lists are equal after filtering, then we filtered no T's
    if len(data) == len(sn_filtered):
        ann_sn = np.round(np.sum(sn_filtered), 1)

    # if the two lists are not equal after filtering, then we filtered T's out...
    elif len(data) != len(sn_filtered):
        # if the sum is still 0.0, then annual snowfall was only trace 'T'
        if np.sum(sn_filtered) == 0.0:
            ann_sn = 'T'

        # if the sum is not 0.0 after filtering out trace values, find the sum
        elif np.sum(sn_filtered) > 0.0:
            ann_sn = np.round(np.sum(sn_filtered), 1)

    # find the normal value for the parameter
    annual_nrml = nrml_df['SNOW'].values[-1]
    
    # if not trace, then proceed
    if ann_sn != 'T':
        # get the observed annual average value, and the departure from normal
        #obs_annual_avg = np.round(np.sum([float(val) for val in data]),1)
        obs_annual_avg_dfn = np.round((ann_sn - annual_nrml),1)
    
        return ann_sn, obs_annual_avg_dfn

    else:
        obs_annual_avg_dfn = annual_nrml * -1
        return ann_sn, obs_annual_avg_dfn



    

In [93]:
obs_annual_sn, obs_annual_sn_dfn = _calculate_snow_annuals_and_dfn(sn_data)

In [74]:
def _calculate_temp_annuals_and_dfn(data, key):

    # find the normal value for the parameter
    annual_nrml = nrml_df[key].values[-1]

    # get the observed annual average value, and the departure from normal
    obs_annual_avg = np.round(np.mean([float(val) for val in data]),1)
    obs_annual_avg_dfn = np.round((obs_annual_avg - annual_nrml),1)

    return obs_annual_avg, obs_annual_avg_dfn
    

In [86]:




obs_annual_avg_mx, obs_annual_avg_dfn = _calculate_annuals_and_dfn(mxt_data, key = 'MAX')

76.6

In [82]:
year = 2023

mxt_data = [val[1] for val in data['data'] if val[0] == str(year)][0]
mnt_data = [val[2] for val in data['data'] if val[0] == str(year)][0]
avgt_data = [val[3] for val in data['data'] if val[0] == str(year)][0]

pcp_data = [val[4] for val in data['data'] if val[0] == str(year)][0]
sn_data = [val[5] for val in data['data'] if val[0] == str(year)][0]

In [21]:
def sort_annual_temps(temps, mintemps = False):

    '''
    This function will sort a list of annual avg temp values, and delete missing years

    temps : list of lists, a list of lists of annual temps for a station, including the year, avg value, and departure from normal
    mintemps : boolean, default set to False, if True, then returned order will be reversed to represent the coldest temps at the top

    returns:
    param_filtered : a sorted and filtered list of annual avg temps

    '''
    
    index = [1, 2]

    # for precip parameters this converts the values that are not missing or trace to floats
    param_filtered =[
            [np.float64(val) if i in index and val not in ['M'] else val for i, val in enumerate(subl)] 
            for subl in temps
            ]
    
    # now lets convert missings to nan, nans arrise from the 7 missing days in a year threshold
    param_filtered = [
            [np.nan if i in index and val == 'M' else val for i, val in enumerate(subl)] 
            for subl in param_filtered
            ]

    # now convert the years to integers
    index = [0]
    
    param_filtered =[
        [int(val) if i in index and val not in ['M'] else val for i, val in enumerate(subl)] 
        for subl in param_filtered
        ]
    
    # this will remove the nan values for sorting the data
    param_filtered = [item for item in param_filtered if item[1] is not np.nan]
    
    # this will create a sorted list, with the largest values first
    if mintemps:
        return sorted(param_filtered, key=itemgetter(1), reverse = False)
    else:
        return sorted(param_filtered, key=itemgetter(1), reverse = True)


maxt_filtered = sort_annual_temps(maxt)
mint_filtered = sort_annual_temps(mint, mintemps = True)
avgt_filtered = sort_annual_temps(avgt)




In [23]:
maxt_filtered

[[1954, 74.7, 5.7],
 [1901, 73.6, 4.6],
 [2012, 73.3, 4.3],
 [1934, 72.7, 3.7],
 [1952, 72.7, 3.7],
 [1956, 72.6, 3.6],
 [1941, 72.1, 3.1],
 [1964, 72.1, 3.1],
 [1909, 72.0, 3.0],
 [1955, 71.6, 2.6],
 [1998, 71.2, 2.2],
 [2006, 71.2, 2.2],
 [1962, 71.0, 2.0],
 [2017, 71.0, 2.0],
 [2023, 71.0, 2.0],
 [1959, 70.9, 1.9],
 [2016, 70.8, 1.8],
 [1999, 70.7, 1.7],
 [1957, 70.6, 1.6],
 [1974, 70.5, 1.5],
 [1904, 70.4, 1.4],
 [1914, 70.4, 1.4],
 [1990, 70.3, 1.3],
 [2005, 70.3, 1.3],
 [1916, 70.2, 1.2],
 [2007, 70.1, 1.1],
 [2011, 70.1, 1.1],
 [1967, 70.0, 1.0],
 [1917, 69.9, 0.9],
 [1911, 69.8, 0.8],
 [1966, 69.8, 0.8],
 [2022, 69.8, 0.8],
 [1902, 69.7, 0.7],
 [1973, 69.7, 0.7],
 [1986, 69.7, 0.7],
 [1960, 69.6, 0.6],
 [1972, 69.6, 0.6],
 [2021, 69.6, 0.6],
 [1906, 69.4, 0.4],
 [1961, 69.4, 0.4],
 [1991, 69.4, 0.4],
 [1987, 69.3, 0.3],
 [2010, 69.3, 0.3],
 [2018, 69.3, 0.3],
 [2020, 69.2, 0.2],
 [1988, 69.1, 0.1],
 [2015, 69.1, 0.1],
 [1980, 69.0, -0.0],
 [2001, 68.9, -0.1],
 [1910, 68.8, -0.2

In [28]:
def _find_numeric_suffix(myDate):
    '''
    This function will take a string date, formatted as 'nn', e.g. '05', and assign a suffix based on the number.

    Parameters
    ----------
    myDate : str, a date number, formatted as 'nn', e.g. '05'

    Returns
    ----------
    myDate : str, formatted as 'nnTH', 'nnST', 'nnND', 'nnRD'

    '''

    date_suffix = ["TH", "ST", "ND", "RD"]

    if int(myDate) % 10 in [1, 2, 3] and int(myDate) not in [11, 12, 13]:
        return f'{myDate}{date_suffix[int(myDate) % 10]}'
    else:
        return f'{myDate}{date_suffix[0]}'

In [None]:
'''
Need to see if there's a way to specify how the averaging is done through xmacis api...

perhaps getting monthly avg temps, then getting the avg of that?

'''

In [40]:
searchyear = 2023


yrly_val = [item[1] for item in maxt_filtered if item[0] == searchyear][0] # the yearly value, e.g. annual max T, min T, avg T
yrly_dfn = [item[2] for item in maxt_filtered if item[0] == searchyear][0] # the yearly value departure from normal

if float(yrly_dfn) > 0.0:
    yrly_dfn = f'+{yrly_dfn}'


# replacement_words = [f'{yrly_val}', f'{yrly_dfn}', f'{rank}', f'{next_closest_year}']

rank = 1
for idx, lst in enumerate(maxt_filtered):

    # check if our yearly value matches the value in the rank
    # if it does not, then keep counting the rank, if it does, then its a tie and don't count it
    if lst[1] != yrly_val:
        rank = rank + 1

    # if its our search year, then break the loop
    if lst[0] == searchyear:
        #rank = idx+1
        break

percentile = (rank/len(maxt_filtered))*100

filtered_data = maxt_filtered[:idx]
# calculate residules over the filtered list to find the min residual and thus, the next closest year
res = []
for sublst in filtered_data:
    res.append(abs(searchyear - sublst[0]))

next_closest_year = filtered_data[res.index(min(res))][0]

# words to fill into the default text strings
replacement_words = [f'{yrly_val}', f'{yrly_dfn}', f'{_find_numeric_suffix(rank)}', f'{next_closest_year}']

if percentile <= 50:
    #print(f'The yearly average high temperature of {searchyear} was the {rank}th warmest year, and the warmest year since {next_closest_year}.')
    _text = text_dict['temperatures']['warmest_avghigh_since'].format(*replacement_words).upper()

elif percentile > 50:
    _text = text_dict['temperatures']['coldest_avghigh_since']

_text



'THE YEARLY AVERAGE HIGH TEMPERATURE OF 71.0 (+2.0) WAS THE 13TH WARMEST YEAR, AND THE WARMEST SINCE 2017.'

In [33]:
 # maybe make a dictionary of text parameters here...

text_dict = {
    'temperatures' : {
        'warmest_avghigh_since' : 'THE YEARLY AVERAGE HIGH TEMPERATURE OF {} ({}) WAS THE {} WARMEST YEAR, AND THE WARMEST SINCE {}.',
        'coldest_avghigh_since' : 'THE YEARLY AVERAGE HIGH TEMPERATURE OF {} ({}) WAS THE {} COLDEST YEAR, AND THE COLDEST SINCE {}.',

        'warmest_avglow_since' : 'THE YEARLY AVERAGE LOW TEMPERATURE OF {} ({}) WAS THE {} WARMEST YEAR, AND THE WARMEST SINCE {}.',
        'coldest_avglow_since' : 'THE YEARLY AVERAGE LOW TEMPERATURE OF {} ({}) WAS THE {} COLDEST YEAR, AND THE COLDEST SINCE {}.',
    }
    

    
}


In [206]:
filtered_data

res = []
for sublst in filtered_data:
    res.append(abs(searchyear - sublst[0]))

print(res.index(min(res)))








13


In [162]:
def sort_annual_precip_value(param):

    '''
    This function will sort a list of annual total snowfall or precip values, converting Trace values to 0.005, and deleting missing years

    param : list of lists, a list of lists of snowfall or precip for a station, including the year, total value, and departure from normal

    returns:
    param_filtered : a sorted and filtered list of annual param totals

    '''
    
    index = [1, 2]

    # for precip parameters this converts the values that are not missing or trace to floats
    param_filtered =[
            [np.float64(val) if i in index and val not in ['M', 'T'] else val for i, val in enumerate(subl)] 
            for subl in param
            ]
    
    # now lets convert missings to nan, nans arrise from the 7 missing days in a year threshold
    param_filtered = [
            [np.nan if i in index and val == 'M' else val for i, val in enumerate(subl)] 
            for subl in param_filtered
            ]
    
    # lastly lets convert trace to 0.005
    param_filtered = [
            [np.float64(0.005) if i in index and val == 'T' else val for i, val in enumerate(subl)] 
            for subl in param_filtered
            ]

    # now convert the years to integers
    index = [0]
    
    param_filtered =[
        [int(val) if i in index and val not in ['M'] else val for i, val in enumerate(subl)] 
        for subl in param_filtered
        ]
    
    # this will remove the nan values for sorting the data
    param_filtered = [item for item in param_filtered if item[1] is not np.nan]
    
    # this will create a sorted list, with the largest values first
    return sorted(param_filtered, key=itemgetter(1), reverse = True)


filtered_snow = sort_annual_precip_value(snow)

In [81]:
url = 'https://mesonet.agron.iastate.edu/api/1/nwstext/202302051153-KLZK-CXUS54-CLMLZK'

html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()    # rip it out

# get text
text = soup.get_text()


text

"254 \nCXUS54 KLZK 081205\nCLMLZK\n\nCLIMATE REPORT...UPDATED WITH SUPPLEMENTAL DATA \nNATIONAL WEATHER SERVICE LITTLE ROCK AR\n553 AM CST SUN FEB 05 2023\n\n...................................\n\n...THE NORTH LITTLE ROCK CLIMATE SUMMARY FOR THE MONTH OF JANUARY 2023...\n\nCLIMATE NORMAL PERIOD 1991 TO 2020\nCLIMATE RECORD PERIOD 1975 TO 2023\n\nWEATHER         OBSERVED          NORMAL  DEPART   LAST YEAR'S         \n                VALUE   DATE(S)   VALUE   FROM     VALUE DATE(S)       \n                                          NORMAL                       \n................................................................\nTEMPERATURE (F)\nRECORD\n HIGH             81   01/30/2002                                      \n LOW              -6   01/20/1985                                      \nHIGHEST           72   01/11         49      23       68  01/01        \nLOWEST            24   01/31         31      -7       13  01/21        \n                       01/14                      

In [59]:
response['data'][0]

{'index': 0,
 'entered': '2023-02-05T11:53:00Z',
 'pil': 'CLMLIT',
 'product_id': '202302051153-KLZK-CXUS54-CLMLIT',
 'cccc': 'KLZK',
 'count': 2,
 'link': 'https://mesonet.agron.iastate.edu/p.php?pid=202302051153-KLZK-CXUS54-CLMLIT',
 'text_link': 'https://mesonet.agron.iastate.edu/api/1/nwstext/202302051153-KLZK-CXUS54-CLMLIT'}

In [29]:
url = 'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil=CLMLIT&date=2023-02-05'
#url = f'https://mesonet.agron.iastate.edu/api/1/nws/afos/list.json?cccc=KLZK&pil={station}&date={y}-{m:02}-{d:02}'


html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()    # rip it out

text = soup.get_text()

#text.split('Skip to cccc:')[-1]

text = f'[{text}]'
json_item = json.loads(text)[0]

# if not json_item['data']:
#     print('no product issued')

# elif json_item['data']:
#     print(json_item['data']['text_link'])

In [35]:
json_item['data'][0]['text_link']

'https://mesonet.agron.iastate.edu/api/1/nwstext/202302051153-KLZK-CXUS54-CLMLIT'

In [24]:
text_ = f'[{text}]'
#print(text_)
json1_data = json.loads(text_)[0]
json1_data['data']

[]

In [26]:
if not json1_data['data']:
    print('no product issued')

no product issued


In [151]:
# use the api page to parse the actual text

#api_url = f'https://mesonet.agron.iastate.edu/api/1/nwstext/202302051153-KLZK-CXUS54-CLMLZK'
api_url = f'https://mesonet.agron.iastate.edu/api/1/nwstext/202303041555-KLZK-CXUS54-CLMLIT'

html = urlopen(api_url).read()
soup = BeautifulSoup(html, features="html.parser")

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()    # rip it out

# get text
text = soup.get_text()