In [6]:
# Automatic check in folder

import pandas as pd
import numpy as np
import os
from itertools import combinations

def optimize_z_ratio(df):
    # Initialize tracking variables
    best_z = None
    lowest_variance = float('inf')
    lowest_standard_deviation = float('inf')
    
    # Iterate through potential z values with decimal precision
    # Using a range from 0 to 10 with 0.1 increments
    z_values = np.arange(0, 10.1, 0.1)
    
    results = []
    
    for z in z_values:
        # Calculate Z column
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate variance and standard deviation of Z
        z_variance = df['Z'].var()
        z_std = df['Z'].std()
        
        # Track results
        results.append({
            'z': z,
            'variance': z_variance,
            'standard_deviation': z_std
        })
        
        # Update best z if current z has lower variance
        if z_variance < lowest_variance:
            lowest_variance = z_variance
            best_z = z
            lowest_standard_deviation = z_std
    
    # Convert results to DataFrame for easy analysis
    results_df = pd.DataFrame(results)
    
    # Return optimization results
    return best_z

def process_files_in_directory(directory):
    # List all CSV files in the directory
    files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    
    # Initialize a list to store results for each pair
    results_list = []
    
    # Iterate through all pairs of files
    for file1, file2 in combinations(files, 2):
        df1 = pd.read_csv(os.path.join(directory, file1))
        df2 = pd.read_csv(os.path.join(directory, file2))

        df1 = df1.iloc[:, 1::2]
        df2 = df2.iloc[:, 1::2]

        optimization_results = []

        for i in range(df1.columns.size):
            df3 = pd.DataFrame()
            df3['X'] = pd.to_numeric(df1.iloc[:, i], errors='coerce')
            df3['Y'] = pd.to_numeric(df2.iloc[:, i], errors='coerce')

            # Remove rows with NaN values
            df3 = df3.dropna()
            df3 = df3[-250:]
            df3 = df3[:230]

            # Perform optimization
            optimization_results.append(optimize_z_ratio(df3))

        # Convert results to a Series for easy analysis
        optimization_results_series = pd.Series(optimization_results)

        # Calculate mean and median
        mean_z = optimization_results_series.mean()
        median_z = optimization_results_series.median()

        # Append results to the list
        results_list.append({
            'Contract 1': file1,
            'Contract 2': file2,
            'Mean': mean_z,
            'Median': median_z
        })

    # Convert the list of results to a DataFrame
    results_df = pd.DataFrame(results_list)
    
    return results_df

# Specify the directory containing the CSV files
directory = '/workspaces/Futures-First/BackTest/data/ZW Contracts'

# Process all files in the directory and get the results DataFrame
results_df = process_files_in_directory(directory)

# Display the results DataFrame
print(results_df)

             Contract 1          Contract 2   Mean  Median
0    ZW May25-Dec25.csv  ZW Sep25-Mar26.csv  1.800    1.50
1    ZW May25-Dec25.csv  ZW Dec25-May26.csv  1.095    1.20
2    ZW May25-Dec25.csv  ZW Mar26-Sep26.csv  0.350    0.20
3    ZW May25-Dec25.csv  ZW May25-Jul25.csv  1.895    1.80
4    ZW May25-Dec25.csv  ZW Sep25-May26.csv  0.865    0.95
..                  ...                 ...    ...     ...
185  ZW Jul25-Sep25.csv  ZW May26-Jul26.csv  0.170    0.10
186  ZW Jul25-Sep25.csv  ZW Sep25-Dec25.csv  0.695    0.70
187  ZW Dec25-Mar26.csv  ZW May26-Jul26.csv  0.235    0.20
188  ZW Dec25-Mar26.csv  ZW Sep25-Dec25.csv  0.845    0.80
189  ZW May26-Jul26.csv  ZW Sep25-Dec25.csv  1.315    0.80

[190 rows x 4 columns]


In [7]:
results_df.to_csv('/workspaces/Futures-First/BackTest/data/RatioResult/Wheat.csv',index=False)

In [4]:
# inclusion of new features
import pandas as pd
import numpy as np
import os
from itertools import combinations

def optimize_z_ratio(df):
    best_z = None
    best_sharpe = -float('inf')
    
    z_values = np.arange(0, 10.1, 0.1)
    
    for z in z_values:
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate returns
        df['Returns'] = df['Z'].pct_change().dropna()
        
        # Skip if there are no returns or standard deviation is zero
        if df['Returns'].empty or df['Returns'].std() == 0:
            continue
        
        # Calculate Sharpe ratio (assuming risk-free rate = 0)
        sharpe_ratio = df['Returns'].mean() / df['Returns'].std()
        
        if sharpe_ratio > best_sharpe:
            best_sharpe = sharpe_ratio
            best_z = z
    
    # Return best_z and best_sharpe (or default values if no valid z found)
    return best_z if best_z is not None else 0, best_sharpe

def process_files_in_directory(directory, window_size=230):
    files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    results_list = []
    
    for file1, file2 in combinations(files, 2):
        df1 = pd.read_csv(os.path.join(directory, file1))
        df2 = pd.read_csv(os.path.join(directory, file2))

        df1 = df1.iloc[:, 1::2]
        df2 = df2.iloc[:, 1::2]

        optimization_results = []

        for i in range(df1.columns.size):
            df3 = pd.DataFrame()
            df3['X'] = pd.to_numeric(df1.iloc[:, i], errors='coerce')
            df3['Y'] = pd.to_numeric(df2.iloc[:, i], errors='coerce')

            df3 = df3.dropna()
            df3 = df3[-250:]  # Use last 250 days
            df3 = df3[:window_size]  # Apply dynamic window

            z, sharpe = optimize_z_ratio(df3)
            optimization_results.append(z)

        # Filter out None values from optimization_results
        optimization_results = [z for z in optimization_results if z is not None]

        # Calculate mean and median only if there are valid results
        if optimization_results:
            mean_z = np.mean(optimization_results)
            median_z = np.median(optimization_results)
        else:
            mean_z, median_z = 0, 0  # Default values if no valid z found

        results_list.append({
            'Contract 1': file1,
            'Contract 2': file2,
            'Mean': mean_z,
            'Median': median_z,
            'Sharpe Ratio': sharpe
        })

    return pd.DataFrame(results_list)

# Specify the directory containing the CSV files
directory = '/workspaces/Futures-First/BackTest/data/ZC Contracts'

# Process all files in the directory and get the results DataFrame
results_df = process_files_in_directory(directory, window_size=230)

# Display the results DataFrame
print(results_df)

  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_num

             Contract 1          Contract 2   Mean  Median  Sharpe Ratio
0    ZC Dec25-May26.csv  ZC Mar26-May26.csv  3.060    3.10      0.148139
1    ZC Dec25-May26.csv  ZC Jul25-Mar26.csv  3.725    3.55      0.142470
2    ZC Dec25-May26.csv  ZC May25-Jul25.csv  3.760    3.35      0.122468
3    ZC Dec25-May26.csv  ZC Jul25-Sep25.csv  3.630    3.10      0.126574
4    ZC Dec25-May26.csv  ZC May25-Mar26.csv  3.875    2.65      0.187096
..                  ...                 ...    ...     ...           ...
205  ZC Sep25-Mar26.csv  ZC May25-Sep25.csv  2.200    2.05      0.121246
206  ZC Sep25-Mar26.csv  ZC May25-Dec25.csv  2.780    1.60      0.119340
207  ZC Sep25-May26.csv  ZC May25-Sep25.csv  3.085    1.90      0.141127
208  ZC Sep25-May26.csv  ZC May25-Dec25.csv  2.765    1.65      0.141127
209  ZC May25-Sep25.csv  ZC May25-Dec25.csv  1.050    0.75      0.120995

[210 rows x 5 columns]


  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


In [5]:
results_df

Unnamed: 0,Contract 1,Contract 2,Mean,Median,Sharpe Ratio
0,ZC Dec25-May26.csv,ZC Mar26-May26.csv,3.060,3.10,0.148139
1,ZC Dec25-May26.csv,ZC Jul25-Mar26.csv,3.725,3.55,0.142470
2,ZC Dec25-May26.csv,ZC May25-Jul25.csv,3.760,3.35,0.122468
3,ZC Dec25-May26.csv,ZC Jul25-Sep25.csv,3.630,3.10,0.126574
4,ZC Dec25-May26.csv,ZC May25-Mar26.csv,3.875,2.65,0.187096
...,...,...,...,...,...
205,ZC Sep25-Mar26.csv,ZC May25-Sep25.csv,2.200,2.05,0.121246
206,ZC Sep25-Mar26.csv,ZC May25-Dec25.csv,2.780,1.60,0.119340
207,ZC Sep25-May26.csv,ZC May25-Sep25.csv,3.085,1.90,0.141127
208,ZC Sep25-May26.csv,ZC May25-Dec25.csv,2.765,1.65,0.141127


In [47]:
# Manual Change

import pandas as pd
import numpy as np

def optimize_z_ratio(df):
    
    # Initialize tracking variables
    best_z = None
    lowest_variance = float('inf')
    lowest_standard_deviation = float('inf')
    
    # Iterate through potential z values with decimal precision
    # Using a range from 0 to 10 with 0.1 increments
    z_values = np.arange(0, 10.1, 0.1)
    
    results = []
    
    for z in z_values:
        # Calculate Z column
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate variance and standard deviation of Z
        z_variance = df['Z'].var()
        z_std = df['Z'].std()
        
        # Track results
        results.append({
            'z': z,
            'variance': z_variance,
            'standard_deviation': z_std
        })
        
        # Update best z if current z has lower variance
        if z_variance < lowest_variance:
            lowest_variance = z_variance
            best_z = z
            lowest_standard_deviation = z_std
    
    # Convert results to DataFrame for easy analysis
    results_df = pd.DataFrame(results)
    
    # Return optimization results
    return best_z
    # return {
    #     'best_z': best_z,
    #     'lowest_variance': lowest_variance,
    #     'lowest_standard_deviation': lowest_standard_deviation,
    #     'results_dataframe': results_df
    # }

optimization_results = []

# df = pd.read_csv('/workspaces/Futures-First/BackTest/data/data.csv', names=['X', 'Y'])
df1 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZC Contracts/ZC Dec25-Jul26.csv')
df2 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZC Contracts/ZC Mar26-May26.csv')

df1 = df1.iloc[:, 1::2]
df2 = df2.iloc[:, 1::2]
# df1 = df1.iloc[:,1:]
# df2 = df2.iloc[:,1:]

for i in range(df1.columns.size):
    df3 = pd.DataFrame()
    df3['X'] = pd.to_numeric(df1.iloc[:,i], errors='coerce')
    df3['Y'] = pd.to_numeric(df2.iloc[:,i], errors='coerce')

    # Remove rows with NaN values
    df3 = df3.dropna()
    df3 = df3[-250:]
    df3 = df3[:230]

    # Perform optimization
    # optimization_results = optimize_z_ratio(df)
    optimization_results.append(optimize_z_ratio(df3))

# Print results
# print("Optimization Results:")
# print(f"Best Z Value: {optimization_results['best_z']:.1f}")
# print(f"Lowest Variance: {optimization_results['lowest_variance']:.4f}")
# print(f"Lowest Standard Deviation: {optimization_results['lowest_standard_deviation']:.4f}")

# Optional: Display full results DataFrame for detailed analysis
# print("\nDetailed Z Value Analysis:")
# print(optimization_results['results_dataframe'])

# print(optimization_results)
optimization_results_series = pd.Series(optimization_results)

# Calculate mean and median
mean_z = optimization_results_series.mean()
median_z = optimization_results_series.median()

print(f"Mean of optimization results: {mean_z}")
print(f"Median of optimization results: {median_z}")

Mean of optimization results: 0.925
Median of optimization results: 0.9


In [None]:
# import pandas as pd
# import numpy as np
# from statsmodels.tsa.stattools import adfuller

# # Function to calculate correlation between X and Y
# def calculate_correlation(df):
#     return df['X'].corr(df['Y'])

# # Function to check mean reversion using ADF test
# def test_mean_reversion(df):
#     result = adfuller(df['Z'])
#     p_value = result[1]
#     return p_value < 0.05  # Mean-reverting if p-value < 0.05

# # Function to incorporate seasonality check
# def seasonality_check(df, seasonal_avg):
#     recent_avg = df['X'].mean()
#     return abs(recent_avg - seasonal_avg) < 0.05 * seasonal_avg  # Within 5% deviation

# # Function for dynamic optimization with rolling windows
# def dynamic_optimize_z_ratio(df, window_size):
#     rolling_results = []

#     for start in range(0, len(df) - window_size + 1):
#         # Slice rolling window
#         rolling_df = df.iloc[start:start + window_size]
#         best_z = optimize_z_ratio(rolling_df)
#         rolling_results.append(best_z)

#     return rolling_results

# # Function to optimize Z ratio
# # Reuses the original implementation

# def optimize_z_ratio(df):
#     best_z = None
#     lowest_variance = float('inf')

#     z_values = np.arange(0, 10.1, 0.1)
#     for z in z_values:
#         df['Z'] = df['X'] - (z * df['Y'])
#         z_variance = df['Z'].var()
#         if z_variance < lowest_variance:
#             lowest_variance = z_variance
#             best_z = z

#     return best_z

# # Main Code
# df1 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZS Spreads/ZS Jul25-Aug25.csv')
# df2 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZS Spreads/ZS Jul25-Aug25.csv')
# df1 = df1.iloc[:, 1::2]
# df2 = df2.iloc[:, 1::2]

# seasonal_avg = 100  # Placeholder for seasonal average, replace with actual historical data
# window_size = 250  # Rolling window size
# optimization_results = []

# for i in range(df1.columns.size):
#     df3 = pd.DataFrame()
#     df3['X'] = pd.to_numeric(df1.iloc[:, i], errors='coerce')
#     df3['Y'] = pd.to_numeric(df2.iloc[:, i], errors='coerce')
#     df3 = df3.dropna()[-250:][:230]

#     # Correlation Check
#     correlation = calculate_correlation(df3)
#     if abs(correlation) < 0.5:  # Filter weak correlations
#         print(f"Skipping column pair {i} due to low correlation ({correlation:.2f})")
#         continue

#     # Seasonality Check
#     if not seasonality_check(df3, seasonal_avg):
#         print(f"Skipping column pair {i} due to seasonal mismatch")
#         continue

#     # Perform Optimization
#     dynamic_results = dynamic_optimize_z_ratio(df3, window_size)
#     mean_z = np.mean(dynamic_results)

#     # Test Mean Reversion
#     df3['Z'] = df3['X'] - (mean_z * df3['Y'])
#     if not test_mean_reversion(df3):
#         print(f"Skipping column pair {i} as Z is not mean-reverting")
#         continue

#     # Store result
#     optimization_results.append(mean_z)

# # Calculate and print overall statistics
# optimization_results_series = pd.Series(optimization_results)
# mean_z = optimization_results_series.mean()
# median_z = optimization_results_series.median()

# print(f"Mean of optimization results: {mean_z}")
# print(f"Median of optimization results: {median_z}")


Skipping column pair 0 due to seasonal mismatch
Skipping column pair 1 due to seasonal mismatch
Skipping column pair 2 due to seasonal mismatch
Skipping column pair 3 due to seasonal mismatch
Skipping column pair 4 due to seasonal mismatch
Skipping column pair 5 due to seasonal mismatch
Skipping column pair 6 due to seasonal mismatch
Skipping column pair 7 due to seasonal mismatch
Skipping column pair 8 due to seasonal mismatch
Skipping column pair 9 due to seasonal mismatch
Skipping column pair 10 due to seasonal mismatch
Skipping column pair 11 due to seasonal mismatch
Skipping column pair 12 due to seasonal mismatch
Skipping column pair 13 due to seasonal mismatch
Skipping column pair 14 due to seasonal mismatch
Skipping column pair 15 due to seasonal mismatch
Skipping column pair 16 due to seasonal mismatch
Skipping column pair 17 due to seasonal mismatch
Skipping column pair 18 due to seasonal mismatch
Skipping column pair 19 due to seasonal mismatch
Mean of optimization results: 

In [3]:
import pandas as pd

df1 = pd.read_csv('/workspaces/Futures-First/BackTest/data/Data1.csv')
df2 = pd.read_csv('/workspaces/Futures-First/BackTest/data/Data2.csv')
df1 = df1.iloc[:, 1::2]
df2 = df2.iloc[:, 1::2]


Unnamed: 0,2025,2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014
0,,21.50,48.75,2.25,10.00,,,5.50,10.25,13.00,15.25,
1,,,48.75,2.25,10.00,15.25,,,10.25,12.25,15.25,9.75
2,,,,2.25,10.00,15.25,5.50,,10.25,12.25,15.25,9.75
3,,21.50,,,10.00,16.00,5.50,5.50,,12.25,15.25,9.75
4,,21.50,48.75,,10.00,16.00,5.00,5.50,,,15.25,9.75
...,...,...,...,...,...,...,...,...,...,...,...,...
743,,,60.00,179.75,99.25,6.50,,-4.75,-3.25,18.50,36.50,
744,,,64.00,189.25,58.50,7.50,,,-2.75,20.50,33.00,206.25
745,,40.75,,134.25,54.50,10.00,-5.25,,,4.25,32.50,195.75
746,,41.75,,,51.25,11.00,-4.25,-3.75,,,56.75,185.00


In [1]:
# import pandas as pd
# import numpy as np

# def optimize_z_ratio(df):
#     """
#     Optimize the z ratio to minimize risk in a futures trading structure.
    
#     Parameters:
#     df (pandas.DataFrame): DataFrame with 'X' and 'Y' price columns
    
#     Returns:
#     dict: Optimization results including best z value and associated metrics
#     """
#     # Initialize tracking variables
#     best_z = None
#     lowest_variance = float('inf')
#     lowest_standard_deviation = float('inf')
    
#     # Iterate through potential z values with decimal precision
#     # Using a range from 0 to 10 with 0.1 increments
#     z_values = np.arange(0, 10.1, 0.1)
    
#     results = []
    
#     for z in z_values:
#         # Calculate Z column
#         df['Z'] = df['X'] - (z * df['Y'])
        
#         # Calculate variance and standard deviation of Z
#         z_variance = df['Z'].var()
#         z_std = df['Z'].std()
        
#         # Track results
#         results.append({
#             'z': z,
#             'variance': z_variance,
#             'standard_deviation': z_std
#         })
        
#         # Update best z if current z has lower variance
#         if z_variance < lowest_variance:
#             lowest_variance = z_variance
#             best_z = z
#             lowest_standard_deviation = z_std
    
#     # Convert results to DataFrame for easy analysis
#     results_df = pd.DataFrame(results)
    
#     # Return optimization results
#     return {
#         'best_z': best_z,
#         'lowest_variance': lowest_variance,
#         'lowest_standard_deviation': lowest_standard_deviation,
#         'results_dataframe': results_df
#     }

# # Read the data
# df = pd.read_csv('/workspaces/Futures-First/BackTest/data/data.csv', names=['X', 'Y'])
# # Convert to numeric, replacing 'NA' and empty strings with NaN
# df['X'] = pd.to_numeric(df['X'], errors='coerce')
# df['Y'] = pd.to_numeric(df['Y'], errors='coerce')

# # Remove rows with NaN values
# df = df.dropna()
# df = df[-150:]

# # Perform optimization
# optimization_results = optimize_z_ratio(df)

# # Print results
# print("Optimization Results:")
# print(f"Best Z Value: {optimization_results['best_z']:.1f}")
# print(f"Lowest Variance: {optimization_results['lowest_variance']:.4f}")
# print(f"Lowest Standard Deviation: {optimization_results['lowest_standard_deviation']:.4f}")

# # Optional: Display full results DataFrame for detailed analysis
# print("\nDetailed Z Value Analysis:")
# print(optimization_results['results_dataframe'])

Optimization Results:
Best Z Value: 2.2
Lowest Variance: 1.8206
Lowest Standard Deviation: 1.3493

Detailed Z Value Analysis:
        z     variance  standard_deviation
0     0.0   158.779276           12.600765
1     0.1   145.051513           12.043733
2     0.2   131.951681           11.487022
3     0.3   119.479780           10.930681
4     0.4   107.635810           10.374768
..    ...          ...                 ...
96    9.6  1704.280065           41.282927
97    9.7  1750.833692           41.842965
98    9.8  1798.015251           42.403010
99    9.9  1845.824741           42.963063
100  10.0  1894.262162           43.523122

[101 rows x 3 columns]
