# Storing Batch Artefacts

In [1]:
import os
import yaml
import pandas as pd 

In [4]:
# Setting up our feature store

# Directory for feature store
directory = "feature_store"
if not os.path.exists(directory):
    os.makedirs(directory)

# Feature definitions
id = "v1"
config_data = {
    'feature_store': {
        'id': f"{id}",
        'latest_feature': "1999-01-01",
        'latest_target': "1999-01-01",
        'feature_offset': "13",
        'target_offset': "3",
        'features_path': f"feature_store/features_{id}.csv",
        'targets_path': f"feature_store/targets_{id}.csv",
        'schema': {
            'features': [
                {'name': 'lag_1', 'type': 'float'},
                {'name': 'lag_4', 'type': 'float'},
                {'name': 'lag_5', 'type': 'float'},
                {'name': 'lag_6', 'type': 'float'},
                {'name': 'lag_11', 'type': 'float'},
                {'name': 'lag_12', 'type': 'float'},
                {'name': 'lag_13', 'type': 'float'},
                {'name': 'rolling_mean_7', 'type': 'float'},
                {'name': 'rolling_std_7', 'type': 'float'}
            ],
            'targets': [
                {'name': 'target_1d', 'type': 'float'},
                {'name': 'target_2d', 'type': 'float'},
                {'name': 'target_3d', 'type': 'float'}
            ]
        }
    }
}

# Path to the YAML configuration file
yaml_file_path = os.path.join(directory, f"config_{id}.yaml")

# Write the configuration data to a YAML file
with open(yaml_file_path, 'w') as file:
    yaml.dump(config_data, file, default_flow_style=False)
    
  

In [5]:
from scripts import feature_processing, feature_store

# Load the file
csv_file_path = 'data/energy_data.csv'
df = pd.read_csv(csv_file_path, parse_dates=['period'])
df.set_index('period', inplace=True)

# Run the feature pipeline
batch_df = feature_processing.feature_pipeline(df)
batch_df


Unnamed: 0_level_0,lag_1,lag_4,lag_5,lag_6,lag_11,lag_12,lag_13,rolling_mean_7,rolling_std_7
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-01-14,119134.0,129372.0,134119.0,132752.0,131883.0,128866.0,116406.0,127822.29,5524.62
2024-01-15,121604.0,129308.0,129372.0,134119.0,131606.0,131883.0,128866.0,128386.86,6264.72
2024-01-16,136704.0,128467.0,129308.0,129372.0,136960.0,131606.0,131883.0,130031.00,8953.19
2024-01-17,145628.0,119134.0,128467.0,129308.0,130213.0,136960.0,131606.0,133167.71,12008.71
2024-01-18,151329.0,121604.0,119134.0,128467.0,126933.0,130213.0,136960.0,135579.14,12774.85
...,...,...,...,...,...,...,...,...,...
2024-03-26,126118.0,130956.0,129138.0,124910.0,119478.0,120293.0,120013.0,125303.14,4217.08
2024-03-27,125493.0,122277.0,130956.0,129138.0,109879.0,119478.0,120293.0,125439.86,4217.72
2024-03-28,125867.0,118230.0,122277.0,130956.0,107680.0,109879.0,119478.0,124931.14,3900.13
2024-03-29,125577.0,126118.0,118230.0,122277.0,118592.0,107680.0,109879.0,123498.29,3072.37


In [None]:
# Update the feature store
yaml_file_path = "feature_store/config_v1.yaml"
feature_store.update_feature_store(batch_df, yaml_file_path)
