In [4]:
import shapefile
import json
from pathlib import Path
import re

source_dir = "../../data_exploration/eccc/dd.weather.gc.ca/trajectoires/hurricane/shapefile/"
source_search_pattern = "**/*.shp"

destination_dir = "./data/forecasts/eccc"

filename_parser = re.compile(r'(?P<date>\d+)_(?P<time>\d+)Z_(?P<storm>\w+)\.(?P<type>\w+).*')


In [5]:
def shp_to_json(shp_file:Path):
    shp_file_obj = shapefile.Reader(shp_file.as_posix())
    json_data = shp_file_obj.__geo_interface__

    return(json_data)


In [12]:
storm_index = {}

for shp_file_path in Path(source_dir).glob(source_search_pattern):
    shp_file = Path(shp_file_path)

    (storm_date, storm_time, storm_name, data_type) = filename_parser.match(shp_file.name).groupdict().values()

    storm_year = storm_date[0:4]

    print(f"Filename: {shp_file.name} Parts: {storm_date} {storm_time} {storm_name} {data_type}")

    json_data = shp_to_json(shp_file=shp_file)

    output_path = f"{storm_name}/{storm_date}/{storm_time}"

    final_output_path = Path(f"{destination_dir}/{output_path}/{data_type}.json")
    print(f"Output path: {final_output_path}")
    
    if storm_name not in storm_index:
        storm_index[storm_name] = []

    storm_index[storm_name].append(
        {
        "date":storm_date,
        "time":storm_time,
        "type":data_type,
        "path":final_output_path.as_posix()
        }
    )

    Path.mkdir(final_output_path.parent, parents=True, exist_ok=True)
    json.dump(json_data, open(final_output_path, mode="w+", encoding="UTF-8"))


Filename: 20220909_0600Z_EARL.err.shp Parts: 20220909 0600 EARL err
Output path: data\forecasts\eccc\EARL\20220909\0600\err.json
Filename: 20220909_0600Z_EARL.lin.shp Parts: 20220909 0600 EARL lin
Output path: data\forecasts\eccc\EARL\20220909\0600\lin.json
Filename: 20220909_0600Z_EARL.pts.shp Parts: 20220909 0600 EARL pts
Output path: data\forecasts\eccc\EARL\20220909\0600\pts.json
Filename: 20220909_0600Z_EARL.rad.shp Parts: 20220909 0600 EARL rad
Output path: data\forecasts\eccc\EARL\20220909\0600\rad.json
Filename: 20220909_1200Z_EARL.err.shp Parts: 20220909 1200 EARL err
Output path: data\forecasts\eccc\EARL\20220909\1200\err.json
Filename: 20220909_1200Z_EARL.lin.shp Parts: 20220909 1200 EARL lin
Output path: data\forecasts\eccc\EARL\20220909\1200\lin.json
Filename: 20220909_1200Z_EARL.pts.shp Parts: 20220909 1200 EARL pts
Output path: data\forecasts\eccc\EARL\20220909\1200\pts.json
Filename: 20220909_1200Z_EARL.rad.shp Parts: 20220909 1200 EARL rad
Output path: data\forecasts\e

In [14]:
for storm in storm_index:
    output_path = Path(f"{destination_dir}/{storm}/index.json").as_posix()
    print(output_path)
    print(storm_index[storm])
    json.dump(storm_index[storm], open(output_path, mode="w+", encoding="UTF-8"))

data/forecasts/eccc/EARL/index.json
[{'date': '20220909', 'time': '0600', 'type': 'err', 'path': 'data/forecasts/eccc/EARL/20220909/0600/err.json'}, {'date': '20220909', 'time': '0600', 'type': 'lin', 'path': 'data/forecasts/eccc/EARL/20220909/0600/lin.json'}, {'date': '20220909', 'time': '0600', 'type': 'pts', 'path': 'data/forecasts/eccc/EARL/20220909/0600/pts.json'}, {'date': '20220909', 'time': '0600', 'type': 'rad', 'path': 'data/forecasts/eccc/EARL/20220909/0600/rad.json'}, {'date': '20220909', 'time': '1200', 'type': 'err', 'path': 'data/forecasts/eccc/EARL/20220909/1200/err.json'}, {'date': '20220909', 'time': '1200', 'type': 'lin', 'path': 'data/forecasts/eccc/EARL/20220909/1200/lin.json'}, {'date': '20220909', 'time': '1200', 'type': 'pts', 'path': 'data/forecasts/eccc/EARL/20220909/1200/pts.json'}, {'date': '20220909', 'time': '1200', 'type': 'rad', 'path': 'data/forecasts/eccc/EARL/20220909/1200/rad.json'}, {'date': '20220909', 'time': '1800', 'type': 'err', 'path': 'data/f