In [1]:
import pandas as pd
import numpy as np
import os 
import plotly.express as px
from datetime import timedelta
import openpyxl

In [2]:
def extract_yearly_highs_lows(file_path, drop_na=True):
    # Load the CSV file
    data = pd.read_csv(file_path)
    # Initialize a dictionary to store the results
    yearly_stats = {}
    # Extract the range of years from the columns
    years = range(2004, 2025)
    # Iterate through each year
    for year in years:
        date_col = f'Timestamp.{year}'
        price_col = str(year)
        # Check if the expected columns are present
        if date_col in data.columns and price_col in data.columns:
            # Handle missing values in the timestamps
            if drop_na:
                year_data = data[[date_col, price_col]].dropna()
            else:
                year_data = data[[date_col, price_col]].fillna({date_col: 0}).dropna(subset=[price_col])
            if not year_data.empty:
                
                data_df = {
                    'Date': year_data[date_col],
                    'Price': year_data[price_col]
                }
                data_df = pd.DataFrame(data_df)
                base_path = "/workspaces/Futures-First/Prediction/data"
                file_name = f"{year}.csv"
                full_path = os.path.join(base_path, file_name)

                # Save DataFrame to CSV
                data_df.to_csv(full_path, index=False)
                # print(year_data[price_col])
        else:
            print(f"Columns for the year {year} are missing in the data")
    return data_df

In [None]:
df = extract_yearly_highs_lows('/workspaces/Futures-First/data/W_U24-Z24.csv')
df

In [None]:
df = pd.read_csv('/workspaces/Futures-First/Prediction/data/SB_nov-jan/2024.csv')
df

In [7]:
# df = df.rename(columns={'SBX24-F25.OPEN':'OPEN','SBX24-F25.HIGH':'HIGH','SBX24-F25.LOW':'LOW','SBX24-F25.CLOSE':'CLOSE'})
# df = pd.read_csv('/workspaces/Futures-First/Prediction/data/SB Nov-Jan/SB Nov-Jan.csv')
df = pd.read_excel('/workspaces/Futures-First/Prediction/data/ZW Dec-Mar/ZW Dec-Mar.xlsx')
df

Unnamed: 0,Timestamp (UTC),Open,High,Low,Close,Unnamed: 5,Timestamp (UTC).1,Open.1,High.1,Low.1,...,Open.8,High.8,Low.8,Close.8,Unnamed: 53,Timestamp (UTC).9,Open.9,High.9,Low.9,Close.9
0,2022-03-08,-0.015,-0.015,-0.015,-0.015,,2021-07-08,0.0060,0.0060,0.0060,...,-0.0085,-0.0085,-0.0085,-0.0085,,2015-04-27,-0.0035,-0.0030,-0.0040,-0.0030
1,2023-01-06,0.005,0.005,0.005,0.005,,2021-10-12,0.0100,0.0100,0.0100,...,-0.0075,-0.0075,-0.0075,-0.0075,,2015-04-28,-0.0025,-0.0020,-0.0030,-0.0030
2,2023-01-17,0.005,0.005,0.005,0.005,,2021-10-21,0.0200,0.0200,0.0200,...,-0.0095,-0.0095,-0.0100,-0.0100,,2015-04-29,-0.0025,-0.0015,-0.0025,-0.0015
3,2023-01-18,0.005,0.005,0.005,0.005,,2021-11-19,0.0150,0.0150,0.0150,...,-0.0095,-0.0095,-0.0115,-0.0115,,2015-04-30,-0.0025,-0.0010,-0.0025,-0.0020
4,2023-01-19,0.006,0.006,0.006,0.006,,2021-12-15,0.0075,0.0075,0.0075,...,-0.0120,-0.0120,-0.0130,-0.0130,,2015-05-01,-0.0015,0.0015,-0.0015,0.0005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371,NaT,,,,,,NaT,,,,...,,,,,,NaT,,,,
372,NaT,,,,,,NaT,,,,...,,,,,,NaT,,,,
373,NaT,,,,,,NaT,,,,...,,,,,,NaT,,,,
374,NaT,,,,,,NaT,,,,...,,,,,,NaT,,,,


In [3]:
temp = df.iloc[:,0:5].dropna()
temp.columns = ['Date', 'Open','High','Low','Close']
temp.to_csv('/workspaces/Futures-First/Prediction/data/ZW Dec-Mar/2024.csv',index=False)

In [8]:
ls = ['2023','2022','2021','2020','2019','2018','2017','2016','2015']
j = 6
for i in ls:
    
    temp = df.iloc[:,j:j+5].dropna()
    j = j+6
    temp.columns = ['Date', 'Open','High','Low','Close']

    # Convert the 'Date' column to datetime format
    temp['Date'] = pd.to_datetime(temp['Date'])
    
    # Find the last date in the temp DataFrame
    last_date = temp['Date'].max()
    
    # Calculate the date one year before the last date
    one_year_ago = last_date - timedelta(days=365)
    
    # Filter the data to only include the last one year
    temp = temp[temp['Date'] >= one_year_ago]

    temp.to_csv('/workspaces/Futures-First/Prediction/data/ZW Dec-Mar/'+i+'.csv',index=False)
    print(temp)

          Date    Open    High     Low   Close
84  2022-12-27  0.0050  0.0050  0.0030  0.0030
85  2022-12-28  0.0035  0.0065  0.0025  0.0025
86  2022-12-29  0.0020  0.0050  0.0010  0.0050
87  2022-12-30  0.0060  0.0060  0.0055  0.0055
88  2023-01-03  0.0035  0.0040 -0.0010  0.0000
..         ...     ...     ...     ...     ...
332 2023-12-20 -0.0035 -0.0020 -0.0065 -0.0065
333 2023-12-21 -0.0070 -0.0070 -0.0110 -0.0090
334 2023-12-22 -0.0090 -0.0075 -0.0135 -0.0100
335 2023-12-26 -0.0075 -0.0040 -0.0080 -0.0040
336 2023-12-27 -0.0040  0.0100 -0.0185 -0.0185

[253 rows x 5 columns]
          Date    Open    High     Low   Close
97  2021-12-28  0.0245  0.0245  0.0205  0.0205
98  2021-12-30  0.0190  0.0190  0.0175  0.0180
99  2021-12-31  0.0200  0.0200  0.0185  0.0185
100 2022-01-03  0.0175  0.0175  0.0170  0.0170
101 2022-01-04  0.0190  0.0220  0.0190  0.0210
..         ...     ...     ...     ...     ...
344 2022-12-21 -0.0020 -0.0020 -0.0050 -0.0045
345 2022-12-22 -0.0045 -0.0045 -0.00

In [37]:
temp = df.iloc[:,6:11].dropna()
temp = temp.rename(columns={'ZSF24-H24':'Date'})
temp.to_csv('/workspaces/Futures-First/Prediction/data/SB Jan-Mar/'++'.csv',index=False)

Unnamed: 0,Date,Open.1,High.1,Low.1,Last.1
38,8-Apr-22,5.75,6.00,5.75,7.50
41,13-Apr-22,6.00,6.00,6.00,6.75
58,9-May-22,6.00,6.00,6.00,7.25
64,17-May-22,8.00,8.00,8.00,10.75
81,10-Jun-22,13.25,13.25,13.25,12.00
...,...,...,...,...,...
475,5-Jan-24,-4.75,-4.50,-6.75,-6.50
476,8-Jan-24,-5.00,-5.00,-8.00,-6.00
477,9-Jan-24,-7.25,-5.50,-8.00,-7.00
478,10-Jan-24,-8.00,-5.50,-8.00,-7.75


In [40]:
temp['Date'] = pd.to_datetime(temp['Date'])
temp = temp.sort_values(by='Date')
temp

Unnamed: 0,Date,Open.1,High.1,Low.1,Last.1
38,2022-04-08,5.75,6.00,5.75,7.50
41,2022-04-13,6.00,6.00,6.00,6.75
58,2022-05-09,6.00,6.00,6.00,7.25
64,2022-05-17,8.00,8.00,8.00,10.75
81,2022-06-10,13.25,13.25,13.25,12.00
...,...,...,...,...,...
475,2024-01-05,-4.75,-4.50,-6.75,-6.50
476,2024-01-08,-5.00,-5.00,-8.00,-6.00
477,2024-01-09,-7.25,-5.50,-8.00,-7.00
478,2024-01-10,-8.00,-5.50,-8.00,-7.75


In [39]:
temp.to_csv('/workspaces/Futures-First/Prediction/data/SB Jan-Mar/2023.csv',index=False)