Note: This notebook uses f-strings, so it can only run in Python >= 3.6

In [1]:
import os
import pandas
import numpy as np
from pathlib import Path

In [2]:
base_path = Path('wx_data/')
weather_files = sorted(os.listdir(base_path))

In [3]:
OUTPUT_PATH = 'answers/YearlyAverages.out'

# clear or create the output file
with open(OUTPUT_PATH, 'w'):
    pass

def write_outfile(line):
    ''' Write a line of output to the output data file.
    
    Parameters
    ----------
    line : str
        The line to write.
        
    Returns
    -------
    None
    '''
    with open(OUTPUT_PATH, 'a') as f:
        f.write(line)

In [4]:
def filter_year(frame, year):
    ''' Return a data frame with valid values where the date is within the
        provided year and NaNs outside this range.
    
    Parameters
    ----------
    frame : pandas.core.frame.DataFrame
        The data frame from which to filter values.
        
    year : int
        The year to filter by.
        
    Returns
    -------
    pandas.core.frame.DataFrame
        The frame, with dates within `year` selected.
        
    Notes
    -----
    This function assumes `frame` has the date stored in column 0 formatted
    in YYYYMMDD format.
    '''
    return frame.where((year*10000 < frame[0]) & (frame[0] < (year+1)*10000))

In [5]:
def process_file(file_path, fname):
    ''' Process a single file, computing the means of the max temperature, min temperature,
        and precipitation per year and writing them to a file.
    
    Parameters
    ----------
    file_path : Union[str, pathlib.Path]
        The path to the file to process.
        
    Returns
    -------
    None
    
    Notes
    -----
    Writes the means of the minimum and maximum temperatures and the precipitation
    to `OUTPUT_FILE` in the format
        fname 1985 min_tmp max_tmp precip
        fname 1986 min_tmp max_tmp precip
        fname 1987 min_tmp max_tmp precip
        ...
        fname 2014 min_tmp max_tmp precip
    '''
    file = pandas.read_csv(str(file_path / fname), sep='\t', header=None)

    tmp_maxs, tmp_mins, precips = [], [], []
    for year in range(1985, 2015):
        filtered_file = filter_year(file, year)

        tmp_max = filtered_file[1][filtered_file[1] != -9999].mean()
        tmp_min = filter_year(file, year)[2][filtered_file[2] != -9999].mean()
        precip = filter_year(file, year)[3][filtered_file[3] != -9999].mean()
        
        tmp_max = -9999 if np.isnan(tmp_max) else tmp_max
        tmp_min = -9999 if np.isnan(tmp_min) else tmp_min
        precip = -9999 if np.isnan(precip) else precip
        write_outfile(f'{fname}\t{year}\t{tmp_max:.2f}\t{tmp_min:.2f}\t{precip:.2f}\n')

In [6]:
for fname in weather_files:
    process_file(base_path, fname)