# Filter reformatted GHCN files by year and convert measurement units to matcn LCD

Convert units to match LCD weather units.

| Measurement Type | GHCN | LCD |
| --- | --- | --- |
| Temp (dry bulb, wet bulb, dew point) | 1/10th deg C | F |
| Precipitation | 1/10th mm | inch |
| Wind speed | 1/10th m/s | mi/h

## Imports and env

In [None]:
import csv
import os
import pandas as pd
from tqdm import tqdm    # For nice loading graphic

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Globals

In [None]:
BASE_FOLDER_PATH = "/content/drive/My Drive/ML6140  - Project/Raw Data/NOAA_GHCN-Daily/"
!ls "{BASE_FOLDER_PATH}"

ca-2013-2023-station-data					melio-ca-stations-2013-2023.csv
ca-2013-2023-station-data-reformatted				melio-ca-stations.csv
ca-2013-2023-station-data-reformatted-filtered-units-converted	README.gdoc
filterAndChangeUnitGHCNFiles.ipynb				readme.txt
ghcnd-inventory.txt						reformatGHCNFiles.ipynb
ghcnd-stations.txt						retrieveData.ipynb


In [None]:
INFILE_FOLDER = "ca-2013-2023-station-data-reformatted/"
OUTFILE_FOLDER = "ca-2013-2023-station-data-reformatted-filtered-units-converted/"

In [None]:
MEASUREMENT_START_YEAR = 2014

In [None]:
# Other elements are unique to GHCN files or are in a common format.
ELEMENT_CONVERSIONS = {
    "precipitation": ["PRCP"],
    "temperature": ["TMAX", "TMIN", "ADPT", "AWBT", "TAVG"],
    "speed": ["AWND"]
}

In [None]:
# WEATHER_ELEMENTS = [
#     "PRCP", # precipitation (.1 mm)
#     "SNOW", # snowfall (mm)
#     "SNWD", # snow depth (mm)
#     "TMAX", # max temp (1/10 C)
#     "TMIN", # min temp (1/10 C)
#     "ADPT", # avg dew point temp (1/10 C)
#     "AWBT", # avg wet bulb temp (1/10 C)
#     "AWND", # avg wind speed (.1 m/s)
#     "EVAP", # evap from pan (.1 mm)
#     "FMTM", # time of fastest wind (HHMM)
#     "FRGB", # frozen ground bottom (cm)
#     "FRGT", # frozen ground top (cm)
#     "FRTH", # frozen ground height (cm)
#     "RHAV", # avg rel humid (%)
#     "RHMN", # min rel humid (%)
#     "RHMX", # max rel humid (%)
#     "TAVG", # avg temp (1/10 C)
# ]

## Helpers

### Filter by year

In [None]:
def get_filtered_df_by_year(df):
  return df[df.YEAR >= MEASUREMENT_START_YEAR]

### Unit conversions

Element

In [None]:
def convert_temp(temp_10th_C):
  temp_C = temp_10th_C / 10
  temp_F = (temp_C * 9/5) + 32
  return temp_F

In [None]:
def convert_prcp(prcp_10th_mm):
  prcp_mm = prcp_10th_mm / 10
  prcp_inch = prcp_mm / 25.4
  return prcp_inch

In [None]:
def convert_speed(speed_10th_m_p_s):
  speed_m_p_s = speed_10th_m_p_s / 10
  speed_mi_p_h = speed_m_p_s * 2.237
  return speed_mi_p_h

## Main

In [None]:
def make_conversions(df):
  df.loc[:, ELEMENT_CONVERSIONS["temperature"]] = df.loc[:, ELEMENT_CONVERSIONS["temperature"]].apply(convert_temp)
  df.loc[:, ELEMENT_CONVERSIONS["precipitation"]] = df.loc[:, ELEMENT_CONVERSIONS["precipitation"]].apply(convert_prcp)
  df.loc[:, ELEMENT_CONVERSIONS["speed"]] = df.loc[:, ELEMENT_CONVERSIONS["speed"]].apply(convert_speed)
  return df

In [None]:
!rm -r "{BASE_FOLDER_PATH + OUTFILE_FOLDER}"
!mkdir "{BASE_FOLDER_PATH + OUTFILE_FOLDER}"

In [None]:
filenames = os.listdir(BASE_FOLDER_PATH + INFILE_FOLDER)
len(filenames)

641

In [None]:
for filename in tqdm(filenames):
  df = get_filtered_df_by_year(pd.read_csv(BASE_FOLDER_PATH + INFILE_FOLDER + filenames[621]))
  df = make_conversions(df)
  df.to_csv(BASE_FOLDER_PATH + OUTFILE_FOLDER + filename, index=False)