In [1]:
# avg last freeze date
import pandas as pd
import noaa_datasets

In [2]:
# pull down master data
stations = noaa_datasets.Stations()

Retrieving ghcnd-stations.txt
Retrieving ghcnd-states.txt
Retrieving ghcnd-countries.txt
Reading ghcnd-stations.txt
Reading ghcnd-countries.txt
Reading ghcnd-states.txt
Applying data prep operations to enrich dataset
Success!


In [3]:
inventory = noaa_datasets.Inventory(stations)

Retrieving ghcnd-inventory.txt
Reading ghcnd-inventory.txt
Merging with Stations dataset
Success!


In [4]:
# figure out best datasets to use for our requirement
# these are ones with the lastyear = 2019
# and at least 30 years for YearCount
# also, we're only looking at low temps, which is TMIN
df = inventory.df
df = df[ df['Element']=='TMIN' ]
df = df[ df['YearCount']>30 ]
df = df[ df['LastYear']==2019 ]

# whole world have 8,000+ results
# let's filter to just the united states, still 4,311...
df = df[ df['CountryCode']=='US' ]
df = df[ df['State']=='NE' ]

# let's pick a random 100 of these
df_samples = df.sample(25)

In [5]:
# now we need to pull the daily file information for 
# each of these 100 records
# this will take some time, which is why we reduced
# to just 100 in the first place!

# let's try the process with just a single station
# then we can iterate and concat
def c_to_f(row):
    # (0°C × 9/5) + 32 = 32°F
    return ( ((((row['VALUE']/10) * 9) / 5) + 32))

full_df = pd.DataFrame()

for station in df_samples['StationID']:
    df_daily = noaa_datasets.DailyFile(station)
    df_daily.df = df_daily.df[ df_daily.df['ELEMENT']=='TMIN' ]
    df_daily.df['Low Temp (f)'] = df_daily.df.apply(c_to_f, axis=1)
    df_daily.df = df_daily.df[ df_daily.df['YEAR']>2010 ]
    full_df = pd.concat([full_df, df_daily.df])

Retrieving USC00254110.dly
Transposing USC00254110 dataset
Applying data prep operations
Success!
Retrieving USC00255362.dly
Transposing USC00255362 dataset


  return _read(filepath_or_buffer, kwds)


Applying data prep operations
Success!
Retrieving USC00258915.dly
Transposing USC00258915 dataset
Applying data prep operations
Success!
Retrieving USC00255565.dly
Transposing USC00255565 dataset
Applying data prep operations
Success!
Retrieving USC00253365.dly
Transposing USC00253365 dataset
Applying data prep operations
Success!
Retrieving USC00254985.dly
Transposing USC00254985 dataset
Applying data prep operations
Success!
Retrieving USC00250070.dly
Transposing USC00250070 dataset
Applying data prep operations
Success!
Retrieving USC00251345.dly
Transposing USC00251345 dataset
Applying data prep operations
Success!
Retrieving USC00250435.dly
Transposing USC00250435 dataset
Applying data prep operations
Success!
Retrieving USC00256200.dly
Transposing USC00256200 dataset
Applying data prep operations
Success!
Retrieving USC00256135.dly
Transposing USC00256135 dataset
Applying data prep operations
Success!
Retrieving USC00258628.dly
Transposing USC00258628 dataset
Applying data prep o

In [6]:
full_df = pd.merge(full_df, stations.df, left_on='StationID', right_index=True)

In [7]:
full_df.to_csv('NOAA Low Daily Temperatures Analysis.csv', sep='\t')