In [1]:
import pandas as pd
import numpy as np
import os 

In [2]:
def extract_yearly_highs_lows(file_path, drop_na=True):
    # Load the CSV file
    data = pd.read_csv(file_path)
    # Initialize a dictionary to store the results
    yearly_stats = {}
    # Extract the range of years from the columns
    years = range(2004, 2025)
    # Iterate through each year
    for year in years:
        date_col = f'Timestamp.{year}'
        price_col = str(year)
        # Check if the expected columns are present
        if date_col in data.columns and price_col in data.columns:
            # Handle missing values in the timestamps
            if drop_na:
                year_data = data[[date_col, price_col]].dropna()
            else:
                year_data = data[[date_col, price_col]].fillna({date_col: 0}).dropna(subset=[price_col])
            if not year_data.empty:
                
                data_df = {
                    'Date': year_data[date_col],
                    'Price': year_data[price_col]
                }
                data_df = pd.DataFrame(data_df)
                base_path = "/workspaces/Futures-First/Prediction/data"
                file_name = f"{year}.csv"
                full_path = os.path.join(base_path, file_name)

                # Save DataFrame to CSV
                data_df.to_csv(full_path, index=False)
                # print(year_data[price_col])
        else:
            print(f"Columns for the year {year} are missing in the data")
    return data_df

In [3]:
df = extract_yearly_highs_lows('/workspaces/Futures-First/data/W_U24-Z24.csv')
df

Unnamed: 0,Date,Price
0,2023-03-03,-6.25
1,2023-03-04,-6.00
2,2023-03-05,-6.00
3,2023-03-06,-6.00
4,2023-03-07,-6.50
...,...,...
494,2024-07-09,-23.50
495,2024-07-10,-23.50
496,2024-07-11,-23.75
497,2024-07-12,-25.00


In [8]:
df = pd.read_csv('/workspaces/Futures-First/Prediction/data/SB_nov-jan/2024.csv')
df

Unnamed: 0,Timestamp,SBX24-F25.OPEN,SBX24-F25.HIGH,SBX24-F25.LOW,SBX24-F25.CLOSE
0,2023-08-14,-5.25,-5.25,-5.25,-5.25
1,2023-08-15,-5.50,-5.50,-5.50,-5.50
2,2023-08-16,-4.75,-4.75,-4.75,-4.75
3,2023-08-17,-5.00,-5.00,-5.00,-5.00
4,2023-08-18,-5.00,-5.00,-5.00,-5.00
...,...,...,...,...,...
247,2024-08-07,-16.00,-16.00,-16.00,-16.00
248,2024-08-08,-16.75,-16.75,-16.75,-16.75
249,2024-08-09,-16.75,-16.75,-16.75,-16.75
250,2024-08-12,-17.50,-17.50,-17.50,-17.50


In [12]:
df = df.rename(columns={'SBX24-F25.OPEN':'OPEN','SBX24-F25.HIGH':'HIGH','SBX24-F25.LOW':'LOW','SBX24-F25.CLOSE':'CLOSE'})
df.to_csv('/workspaces/Futures-First/Prediction/data/SB_nov-jan/2024.csv',index=False)