**Table of contents**<a id='toc0_'></a>    
- [Set-up](#toc1_)    
      - [Libraries](#toc1_1_1_1_)    
      - [Functions](#toc1_1_1_2_)    
- [Analysis](#toc2_)    
  - [Precipitation](#toc2_1_)    
      - [Total](#toc2_1_1_1_)    
      - [Rain](#toc2_1_1_2_)    
      - [Snow](#toc2_1_1_3_)    
  - [Temperature](#toc2_2_)    
      - [Mean](#toc2_2_1_1_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

# <a id='toc1_'></a>[Set-up](#toc0_)

#### <a id='toc1_1_1_1_'></a>[Libraries](#toc0_)

In [8]:
import re
import pandas as pd
import numpy as np
import os


#### <a id='toc1_1_1_2_'></a>[Functions](#toc0_)

In [6]:
def stackDates(df, value_name, year_col_name='Year', month_col_name='Mo'):
    df['yyyy-mm'] = df[year_col_name]+'-'+df[month_col_name].apply(lambda x: str(x).zfill(2))
    df.drop(columns=[year_col_name, month_col_name], inplace=True)
    df.set_index('yyyy-mm', inplace=True)
    stacked_df = df.stack().reset_index().rename(columns={'level_1': 'day', 0: value_name})
    stacked_df['yyyy-mm-dd'] = stacked_df['yyyy-mm'] + '-' + stacked_df['day']
    stacked_df.drop(columns=['yyyy-mm', 'day'], inplace=True)
    stacked_df.set_index('yyyy-mm-dd', inplace=True)
    return stacked_df

def readTempTextFile(path, maxWidth=8):   
    df_list = []
    with open(path) as f:
        lines = f.readlines()
        lines[2] = lines[2].replace('Day ', '')
        details = lines[0].split(',')
        details = [item.strip() for item in details]
        for line in lines[3:-1]:
            if 'Jour' in line:
                continue
            # remove whitespace at the start and the newline at the end
            line = line.strip()
            # split each column on whitespace
            # columns = re.split('\s+', line, maxsplit=35)
            year = line[:5].strip()
            month = line[5:8].strip()
            remainder = line[8:]
            segments = [remainder[i:i+maxWidth].strip() for i in range(0, len(remainder), maxWidth)]
            columns = [year, month] + segments 
            df_list.append(columns)
        headers=re.split('\s+', lines[2])[1:-1]
    f.close
    return pd.DataFrame(df_list, columns=headers), details

def readPrecipTextFile(path, maxWidth=9):   
    df_list = []
    with open(path) as f:
        lines = f.readlines()
        details = lines[0].split(',')
        details = [item.strip() for item in details]
        for line in lines[3:-1]:
            line = line.strip()

            year = line[:5].strip()
            month = line[5:8].strip()
            remainder = line[8:]
            segments = [remainder[i:i+maxWidth].strip() for i in range(0, len(remainder), maxWidth)]
            columns = [year, month] + segments 
            df_list.append(columns)
    f.close
    headers = ['Year', 'Mo'] + [str(num).zfill(2) for num in range(1, 32)]
    return pd.DataFrame(df_list, columns=headers), details

# <a id='toc2_'></a>[Analysis](#toc0_)

## <a id='toc2_1_'></a>[Precipitation](#toc0_)

#### <a id='toc2_1_1_1_'></a>[Total](#toc0_)

In [10]:
folder_name = 'Adj_Daily_Total_v2017'
input_folder = '../data/' + folder_name + '/'
output_folder = '../results/' + folder_name + '/'
os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    input_path = input_folder + filename
    precip_df, details = readPrecipTextFile(input_path)
    value_name = details[4].title() + ' [' + details[5] + ']'
    output_path = output_folder + filename[:-4] +'.csv'
    output_df = stackDates(precip_df, value_name)
    output_df.to_csv(output_path)
    

#### <a id='toc2_1_1_2_'></a>[Rain](#toc0_)

In [15]:
folder_name = 'Adj_Daily_Rain_v2017'
input_folder = '../data/' + folder_name + '/'
output_folder = '../results/' + folder_name + '/'
os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    input_path = input_folder + filename
    precip_df, details = readPrecipTextFile(input_path)
    value_name = details[4].title() + ' [' + details[5] + ']'
    output_path = output_folder + filename[:-4] +'.csv'
    output_df = stackDates(precip_df, value_name)
    output_df.to_csv(output_path)
    

#### <a id='toc2_1_1_3_'></a>[Snow](#toc0_)

In [14]:
folder_name = 'Adj_Daily_Snow_v2017'
input_folder = '../data/' + folder_name + '/'
output_folder = '../results/' + folder_name + '/'
os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    input_path = input_folder + filename
    precip_df, details = readPrecipTextFile(input_path)
    value_name = details[4].title() + ' [' + details[5] + ']'
    output_path = output_folder + filename[:-4] +'.csv'
    output_df = stackDates(precip_df, value_name)
    output_df.to_csv(output_path)
    

## <a id='toc2_2_'></a>[Temperature](#toc0_)

#### <a id='toc2_2_1_1_'></a>[Mean](#toc0_)

In [None]:
folder_name = 'Homog_daily_mean_temp_v2022_Gen3'
input_folder = '../data/' + folder_name + '/'
output_folder = '../results/' + folder_name + '/'
os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    input_path = folder_path + filename
    temp_df, details = readTempTextFile(input_path)
    value_name = details[4].title() + ' [' + details[5] + ']'
    output_path = output_folder + filename[:-4] +'.csv'
    output_df = stackDates(temp_df, value_name)
    output_df.to_csv(output_path)
