In [None]:
# This Notebook merges the typhoon info dataset (grouped by year) into one dataset while appending station location mapping

# Disclaimer: Generative AI was used to convert typhoon report screenshots (Source: PAGASA) into CSV format
# and to generate a mapping between unique weather stations to provinces and regions,
# the authors reviewed and edited the content as needed and takes full responsibility for the content.

In [None]:
# Import relevant python modules
import numpy as np
import pandas as pd
import glob

In [None]:

# 1. Grab the paths of all CSV files in your folder
path = './data/typhoon-info/by-year' 
all_files = glob.glob(path + "/*.csv")

# 2. Use a list comprehension to read them all at once
li = [pd.read_csv(filename) for filename in all_files]

# 3. Concatenate them into one master DataFrame
# Concatenate master
df = pd.concat(li, axis=0, ignore_index=True)

# Clean it up before the merge
mapping_df = pd.read_csv('data/geospatial-data/station_location_mapping.csv')

# Remove geo columns from raw data if they exist to avoid the "x/y" issue
df = df.drop(columns=['Province', 'Region'], errors='ignore')

# Merge
df = pd.merge(df, mapping_df, on='Location', how='left')

In [None]:
# Save your cleaned dataframe to a specific folder
# 'index=False' prevents pandas from adding an extra column of numbers at the start
df.to_csv('data/merged/typhoon-info/compiled_2019-2025.csv', index=False)