### Getting Storm Data

- Script to demonstrate how to run the data source functions to get the storm data in a df.

In [2]:
import numpy as np

import pandas as pd

from data_source import (
    get_subfolder_names_from_root,
    create_storm_feature_and_label_dfs,
    merge_df2_onto_df1,
)

In [4]:
ROOT_DIR = "data/"

storm_names = get_subfolder_names_from_root(root_dir=ROOT_DIR)

df_features, df_labels = create_storm_feature_and_label_dfs(
    root_dir=ROOT_DIR, storm_names=storm_names
)
df = merge_df2_onto_df1(
    df1=df_features,
    df2=df_labels,
    how="left",
    columns_to_match=["storm_name", "image_number"],
)

df.head()

Unnamed: 0,storm_id,relative_time,ocean,storm_name,image_number,file_suffix_x,file_path_x,wind_speed,file_suffix_y,file_path_y
0,gme,0,1,gme,0,features.json,data/gme/gme_000.jpg,30,label.json,data/gme/gme_000.jpg
1,gme,1800,1,gme,1,features.json,data/gme/gme_001.jpg,30,label.json,data/gme/gme_001.jpg
2,gme,3600,1,gme,2,features.json,data/gme/gme_002.jpg,30,label.json,data/gme/gme_002.jpg
3,gme,5400,1,gme,3,features.json,data/gme/gme_003.jpg,30,label.json,data/gme/gme_003.jpg
4,gme,7200,1,gme,4,features.json,data/gme/gme_004.jpg,30,label.json,data/gme/gme_004.jpg


In [5]:
df = df.rename(columns={"file_path_x": "img_file_path"})

# check that the left join worked.
assert all(np.array(df.storm_id.values) == np.array(df.storm_name.values))

df = df[
    [
        "storm_id",
        "relative_time",
        "ocean",
        "image_number",
        "img_file_path",
        "wind_speed",
    ]
]

df.head()

Unnamed: 0,storm_id,relative_time,ocean,image_number,img_file_path,wind_speed
0,gme,0,1,0,data/gme/gme_000.jpg,30
1,gme,1800,1,1,data/gme/gme_001.jpg,30
2,gme,3600,1,2,data/gme/gme_002.jpg,30
3,gme,5400,1,3,data/gme/gme_003.jpg,30
4,gme,7200,1,4,data/gme/gme_004.jpg,30


In [6]:
# export the storm data
export_file_name = "data/raw_storm_data.csv"

df.to_csv(export_file_name, header=True, index=False)

### END OF SCRIPT