In [1]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
# Automatic check in folder

import pandas as pd
import numpy as np
import os
from itertools import combinations

def optimize_z_ratio(df):
    # Initialize tracking variables
    best_z = None
    lowest_variance = float('inf')
    lowest_standard_deviation = float('inf')
    
    # Iterate through potential z values with decimal precision
    # Using a range from 0 to 10 with 0.1 increments
    z_values = np.arange(0, 10.1, 0.1)
    
    results = []
    
    for z in z_values:
        # Calculate Z column
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate variance and standard deviation of Z
        z_variance = df['Z'].var()
        z_std = df['Z'].std()
        
        # Track results
        results.append({
            'z': z,
            'variance': z_variance,
            'standard_deviation': z_std
        })
        
        # Update best z if current z has lower variance
        if z_variance < lowest_variance:
            lowest_variance = z_variance
            best_z = z
            lowest_standard_deviation = z_std
    
    # Convert results to DataFrame for easy analysis
    results_df = pd.DataFrame(results)
    
    # Return optimization results
    return best_z

def process_files_in_directory(directory):
    # List all CSV files in the directory
    files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    
    # Initialize a list to store results for each pair
    results_list = []
    
    # Iterate through all pairs of files
    for file1, file2 in combinations(files, 2):
        df1 = pd.read_csv(os.path.join(directory, file1))
        df2 = pd.read_csv(os.path.join(directory, file2))

        df1 = df1.iloc[:, 1::2]
        df2 = df2.iloc[:, 1::2]

        optimization_results = []

        for i in range(df1.columns.size):
            df3 = pd.DataFrame()
            df3['X'] = pd.to_numeric(df1.iloc[:, i], errors='coerce')
            df3['Y'] = pd.to_numeric(df2.iloc[:, i], errors='coerce')

            # Remove rows with NaN values
            df3 = df3.dropna()
            df3 = df3[-250:]
            df3 = df3[:230]

            # Perform optimization
            optimization_results.append(optimize_z_ratio(df3))

        # Convert results to a Series for easy analysis
        optimization_results_series = pd.Series(optimization_results)

        # Calculate mean and median
        mean_z = optimization_results_series.mean()
        median_z = optimization_results_series.median()

        # Append results to the list
        results_list.append({
            'Contract 1': file1,
            'Contract 2': file2,
            'Mean': mean_z,
            'Median': median_z
        })

    # Convert the list of results to a DataFrame
    results_df = pd.DataFrame(results_list)
    
    return results_df

# Specify the directory containing the CSV files
directory = '/workspaces/Futures-First/BackTest/data/ZW Contracts'

# Process all files in the directory and get the results DataFrame
results_df = process_files_in_directory(directory)

# Display the results DataFrame
print(results_df)

             Contract 1          Contract 2   Mean  Median
0    ZW May25-Dec25.csv  ZW Sep25-Mar26.csv  1.800    1.50
1    ZW May25-Dec25.csv  ZW Dec25-May26.csv  1.095    1.20
2    ZW May25-Dec25.csv  ZW Mar26-Sep26.csv  0.350    0.20
3    ZW May25-Dec25.csv  ZW May25-Jul25.csv  1.895    1.80
4    ZW May25-Dec25.csv  ZW Sep25-May26.csv  0.865    0.95
..                  ...                 ...    ...     ...
185  ZW Jul25-Sep25.csv  ZW May26-Jul26.csv  0.170    0.10
186  ZW Jul25-Sep25.csv  ZW Sep25-Dec25.csv  0.695    0.70
187  ZW Dec25-Mar26.csv  ZW May26-Jul26.csv  0.235    0.20
188  ZW Dec25-Mar26.csv  ZW Sep25-Dec25.csv  0.845    0.80
189  ZW May26-Jul26.csv  ZW Sep25-Dec25.csv  1.315    0.80

[190 rows x 4 columns]


In [7]:
results_df.to_csv('/workspaces/Futures-First/BackTest/data/RatioResult/Wheat.csv',index=False)

In [14]:
import pandas as pd
import numpy as np
import os
from itertools import combinations
from datetime import datetime

# Month names for display
MONTH_NAMES = ['January', 'February', 'March', 'April', 'May', 'June',
               'July', 'August', 'September', 'October', 'November', 'December']

def optimize_z_ratio_monthly(df):
    best_z = 0
    best_sharpe = -np.inf
    
    z_values = np.arange(0, 10.1, 0.1)
    
    for z in z_values:
        df['Spread'] = df['Value_X'] - z * df['Value_Y']
        returns = df['Spread'].pct_change().dropna()
        
        # Skip if returns contain NaN or inf
        if len(returns) < 2 or returns.isnull().any() or np.isinf(returns).any():
            continue
        
        sharpe = returns.mean() / returns.std()
        if sharpe > best_sharpe:
            best_sharpe = sharpe
            best_z = z
            
    return best_z, best_sharpe

def process_contract_file(file_path):
    """Convert wide-format contract data to long format"""
    df = pd.read_csv(file_path)
    melted = pd.DataFrame()

    for i in range(0, len(df.columns), 2):
        if i+1 >= len(df.columns):
            break
            
        date_col = df.columns[i]
        value_col = df.columns[i+1]
        year = value_col
        
        temp_df = df[[date_col, value_col]].copy()
        temp_df.columns = ['Date', 'Value']
        temp_df['ContractYear'] = year
        melted = pd.concat([melted, temp_df])

    melted['Date'] = pd.to_datetime(melted['Date'], errors='coerce')
    melted['Value'] = pd.to_numeric(melted['Value'], errors='coerce')  # Ensure numeric values
    melted = melted.dropna(subset=['Date', 'Value'])  # Drop rows with invalid dates or values
    melted = melted.replace([np.inf, -np.inf], np.nan).dropna()  # Handle infinite values
    return melted


def process_files_monthly(directory):
    results = []
    
    for file1, file2 in combinations(os.listdir(directory), 2):
        if not (file1.endswith('.csv') and file2.endswith('.csv')):
            continue
            
        # Load and merge data
        df1 = process_contract_file(os.path.join(directory, file1))
        df2 = process_contract_file(os.path.join(directory, file2))
        
        merged = pd.merge(df1, df2, on='Date', suffixes=('_X', '_Y'))
        if merged.empty:
            continue
            
        merged['Month'] = merged['Date'].dt.month
        
        monthly_sharpe = {}

        # Calculate monthly Sharpe ratios
        for month, group in merged.groupby('Month'):
            group = group.sort_values('Date')
            if len(group) < 5:  # Minimum data threshold
                continue
                
            _, sharpe = optimize_z_ratio_monthly(group)
            if not np.isnan(sharpe) and not np.isinf(sharpe):  # Skip invalid Sharpe ratios
                monthly_sharpe[month] = sharpe

        if not monthly_sharpe:
            continue

        # Prepare result row
        max_sharpe = max(monthly_sharpe.values())
        best_month_num = max(monthly_sharpe, key=monthly_sharpe.get)
        
        row = {
            'Contract1': file1[:-4],
            'Contract2': file2[:-4],
            'Best': max_sharpe,
            'Average': np.mean(list(monthly_sharpe.values())),
            'Best Month': MONTH_NAMES[best_month_num - 1]
        }

        # Add monthly values
        for month_num in range(1, 13):
            month_name = MONTH_NAMES[month_num - 1]
            row[month_name] = monthly_sharpe.get(month_num, np.nan)

        results.append(row)
    
    return pd.DataFrame(results)


# Example usage
if __name__ == "__main__":
    results = process_files_monthly('/workspaces/Futures-First/BackTest/data/ZL Contracts')
    print("Monthly Optimization Results:")
    print(results.to_string(index=False))

Monthly Optimization Results:
     Contract1      Contract2     Best  Average Best Month  January  February    March    April      May     June     July   August  September  October  November  December
ZL Jan26-Jul26 ZL May25-Oct25 0.065111 0.065111     August      NaN       NaN      NaN      NaN      NaN      NaN      NaN 0.065111        NaN      NaN       NaN       NaN
ZL Jan26-Jul26 ZL Mar25-Aug25 0.083508 0.074592       June      NaN       NaN      NaN      NaN      NaN 0.083508 0.077328 0.062942        NaN      NaN       NaN       NaN
ZL Jan26-Jul26 ZL Mar25-Jul25 0.095101 0.082167      April      NaN       NaN      NaN 0.095101      NaN      NaN 0.069233      NaN        NaN      NaN       NaN       NaN
ZL Jan26-Jul26 ZL Aug25-Oct25 0.046196 0.046196     August      NaN       NaN      NaN      NaN      NaN      NaN      NaN 0.046196        NaN      NaN       NaN       NaN
ZL Jan26-Jul26 ZL Aug25-Jan26 0.059044 0.051772    October      NaN       NaN      NaN      NaN      NaN      

In [15]:
df = results
df = df.applymap(lambda x: round(x, 2) if isinstance(x, float) else x)

df.to_csv('/workspaces/Futures-First/BackTest/data/RatioResult/SoyOil_Zscore.csv',index=False)

  df = df.applymap(lambda x: round(x, 2) if isinstance(x, float) else x)


In [16]:
import pandas as pd
import numpy as np
import os
from itertools import combinations
from datetime import datetime

def optimize_z_ratio_monthly(df):
    best_z = 0
    best_sharpe = -np.inf
    
    z_values = np.arange(0, 10.1, 0.1)
    
    for z in z_values:
        df['Spread'] = df['Value_X'] - z * df['Value_Y']
        returns = df['Spread'].pct_change().dropna()
        
        if len(returns) < 2 or returns.std() == 0:
            continue
        
        sharpe = returns.mean() / returns.std()
        if sharpe > best_sharpe:
            best_sharpe = sharpe
            best_z = z
            
    return best_z, best_sharpe

def process_contract_file(file_path):
    """Convert wide-format contract data to long format"""
    df = pd.read_csv(file_path)
    melted = pd.DataFrame()

    # Process each year's data columns
    for i in range(0, len(df.columns), 2):
        if i+1 >= len(df.columns):
            break
            
        date_col = df.columns[i]
        value_col = df.columns[i+1]
        year = value_col  # Get contract year from column name
        
        temp_df = df[[date_col, value_col]].copy()
        temp_df.columns = ['Date', 'Value']
        temp_df['ContractYear'] = year
        melted = pd.concat([melted, temp_df])

    melted['Date'] = pd.to_datetime(melted['Date'], errors='coerce')
    return melted.dropna(subset=['Date', 'Value'])

def process_files_monthly(directory):
    results = []
    
    for file1, file2 in combinations(os.listdir(directory), 2):
        if not (file1.endswith('.csv') and file2.endswith('.csv')):
            continue
            
        # Load and merge contract data
        df1 = process_contract_file(os.path.join(directory, file1))
        df2 = process_contract_file(os.path.join(directory, file2))
        
        merged = pd.merge(df1, df2, on='Date', suffixes=('_X', '_Y'))
        if merged.empty:
            continue
            
        # Extract month for grouping
        merged['Month'] = merged['Date'].dt.month
        
        monthly_z = []
        monthly_sharpe = []
        
        # Optimize for each month
        for month, group in merged.groupby('Month'):
            group = group.sort_values('Date')
            if len(group) < 5:  # Minimum data threshold
                continue
                
            z, sharpe = optimize_z_ratio_monthly(group)
            monthly_z.append(z)
            monthly_sharpe.append(sharpe)
        
        if monthly_z:
            results.append({
                'Contract Pair': f"{file1[:-4]}-{file2[:-4]}",
                'Best Monthly Z': np.mean(monthly_z),
                'Max Sharpe': np.max(monthly_sharpe),
                'Min Sharpe': np.min(monthly_sharpe),
                'Avg Sharpe': np.mean(monthly_sharpe)
            })
    
    return pd.DataFrame(results)

# Example usage
if __name__ == "__main__":
    results = process_files_monthly('/workspaces/Futures-First/BackTest/data/ZS Contracts')
    print("Monthly Optimization Results:")
    print(results.sort_values('Avg Sharpe', ascending=False).to_string(index=False))

  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_num

KeyboardInterrupt: 

In [None]:
import pandas as pd
import numpy as np
import os
from itertools import combinations

def optimize_z_ratio(df):
    best_z = None
    best_sharpe = -float('inf')
    
    z_values = np.arange(0, 10.1, 0.1)
    
    for z in z_values:
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate returns
        df['Returns'] = df['Z'].pct_change()
        
        # Remove NaN and infinite values
        df['Returns'] = df['Returns'].replace([np.inf, -np.inf], np.nan).dropna()
        
        # Skip if no valid returns exist
        if df['Returns'].empty or df['Returns'].std() == 0:
            continue
        
        # Calculate Sharpe ratio (assuming risk-free rate = 0)
        sharpe_ratio = df['Returns'].mean() / df['Returns'].std()
        
        if sharpe_ratio > best_sharpe:
            best_sharpe = sharpe_ratio
            best_z = z
    
    return best_z if best_z is not None else 0, best_sharpe

def process_files_in_directory(directory, window_size=230):
    files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    results_list = []
    
    for file1, file2 in combinations(files, 2):
        df1 = pd.read_csv(os.path.join(directory, file1))
        df2 = pd.read_csv(os.path.join(directory, file2))

        df1 = df1.iloc[:, 1::2]  # Select every second column starting from index 1
        df2 = df2.iloc[:, 1::2]  # Select every second column starting from index 1

        optimization_results = []

        for i in range(df1.columns.size):
            df3 = pd.DataFrame()
            df3['X'] = pd.to_numeric(df1.iloc[:, i], errors='coerce')
            df3['Y'] = pd.to_numeric(df2.iloc[:, i], errors='coerce')

            # Drop NaN values
            df3 = df3.dropna(subset=['X', 'Y'])

            # Use last 250 rows, then apply window size
            df3 = df3[-250:].iloc[:window_size]

            # Skip empty dataframes
            if df3.empty:
                continue

            z, sharpe = optimize_z_ratio(df3)
            optimization_results.append(z)

        # Remove None values
        optimization_results = [z for z in optimization_results if z is not None]

        if optimization_results:
            mean_z = np.mean(optimization_results)
            median_z = np.median(optimization_results)
        else:
            mean_z, median_z = 0, 0

        results_list.append({
            'Contract 1': file1,
            'Contract 2': file2,
            'Mean': mean_z,
            'Median': median_z,
            'Sharpe Ratio': sharpe
        })

    return pd.DataFrame(results_list)

# Specify the directory containing the CSV files
directory = '/workspaces/Futures-First/BackTest/data/ZS Contracts'

# Process all files in the directory and get the results DataFrame
results_df = process_files_in_directory(directory, window_size=230)

# Display the results DataFrame
print(results_df)


In [3]:
results_df.to_csv('/workspaces/Futures-First/BackTest/data/RatioResult/ZS1-Test.csv',index=False)

In [6]:
# Monthly
import pandas as pd
import numpy as np
import os
from itertools import combinations
from datetime import datetime

def optimize_z_ratio_monthly(df_month):
    best_z = None
    best_sharpe = -float('inf')
    
    z_values = np.arange(0, 10.1, 0.1)
    
    for z in z_values:
        df_month['Z'] = df_month['Spread_X'] - (z * df_month['Spread_Y'])  # Assuming spread columns are named 'Spread_X' and 'Spread_Y'
        
        # Calculate daily returns of the spread
        df_month['Returns'] = df_month['Z'].pct_change().dropna()
        
        # Skip if no returns or zero volatility
        if df_month['Returns'].empty or df_month['Returns'].std() == 0:
            continue
        
        # Sharpe ratio (annualized)
        sharpe_ratio = df_month['Returns'].mean() / df_month['Returns'].std() * np.sqrt(252)  # 252 trading days
        
        if sharpe_ratio > best_sharpe:
            best_sharpe = sharpe_ratio
            best_z = z
    
    return best_z if best_z is not None else np.nan, best_sharpe

def process_monthly_spreads(directory):
    files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    results_list = []
    
    for file1, file2 in combinations(files, 2):
        # Load spread data for both contracts
        df1 = pd.read_csv(os.path.join(directory, file1), parse_dates=['Date'], index_col='Date')
        df2 = pd.read_csv(os.path.join(directory, file2), parse_dates=['Date'], index_col='Date')
        
        # Align dates and keep only common trading days
        common_dates = df1.index.intersection(df2.index)
        df1 = df1.loc[common_dates]
        df2 = df2.loc[common_dates]
        
        # Resample data by month
        monthly_groups = df1.groupby(pd.Grouper(freq='M'))
        
        for month, df1_month in monthly_groups:
            # Skip months with insufficient data (e.g., < 15 trading days)
            if len(df1_month) < 15:
                continue
            
            # Get corresponding month data for df2
            df2_month = df2.loc[month.strftime('%Y-%m')]
            
            # Create a DataFrame for the month's spreads
            df_month = pd.DataFrame({
                'Spread_X': df1_month['Spread'],  # Replace 'Spread' with your actual column name
                'Spread_Y': df2_month['Spread']
            }).dropna()
            
            # Skip if no data after alignment
            if df_month.empty:
                continue
            
            # Optimize Z ratio for the month
            best_z, sharpe = optimize_z_ratio_monthly(df_month)
            
            # Append results
            results_list.append({
                'Contract 1': file1,
                'Contract 2': file2,
                'Month': month.strftime('%Y-%m'),
                'Best_Z': best_z,
                'Sharpe': sharpe
            })
    
    results_df = pd.DataFrame(results_list)
    
    # Aggregate results by month (mean/median of Best_Z across pairs)
    monthly_summary = results_df.groupby('Month').agg({
        'Best_Z': ['mean', 'median'],
        'Sharpe': 'mean'
    }).reset_index()
    
    return monthly_summary, results_df

# Directory containing spread CSV files
directory = '/workspaces/Futures-First/BackTest/data/ZM Contracts'

# Process files and get results
monthly_summary, detailed_results = process_monthly_spreads(directory)

# Display results
print("Monthly Summary:")
print(monthly_summary)
print("\nDetailed Results:")
print(detailed_results)

ValueError: Missing column provided to 'parse_dates': 'Date'

In [47]:
# Manual Change

import pandas as pd
import numpy as np

def optimize_z_ratio(df):
    
    # Initialize tracking variables
    best_z = None
    lowest_variance = float('inf')
    lowest_standard_deviation = float('inf')
    
    # Iterate through potential z values with decimal precision
    # Using a range from 0 to 10 with 0.1 increments
    z_values = np.arange(0, 10.1, 0.1)
    
    results = []
    
    for z in z_values:
        # Calculate Z column
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate variance and standard deviation of Z
        z_variance = df['Z'].var()
        z_std = df['Z'].std()
        
        # Track results
        results.append({
            'z': z,
            'variance': z_variance,
            'standard_deviation': z_std
        })
        
        # Update best z if current z has lower variance
        if z_variance < lowest_variance:
            lowest_variance = z_variance
            best_z = z
            lowest_standard_deviation = z_std
    
    # Convert results to DataFrame for easy analysis
    results_df = pd.DataFrame(results)
    
    # Return optimization results
    return best_z
    # return {
    #     'best_z': best_z,
    #     'lowest_variance': lowest_variance,
    #     'lowest_standard_deviation': lowest_standard_deviation,
    #     'results_dataframe': results_df
    # }

optimization_results = []

# df = pd.read_csv('/workspaces/Futures-First/BackTest/data/data.csv', names=['X', 'Y'])
df1 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZC Contracts/ZC Dec25-Jul26.csv')
df2 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZC Contracts/ZC Mar26-May26.csv')

df1 = df1.iloc[:, 1::2]
df2 = df2.iloc[:, 1::2]
# df1 = df1.iloc[:,1:]
# df2 = df2.iloc[:,1:]

for i in range(df1.columns.size):
    df3 = pd.DataFrame()
    df3['X'] = pd.to_numeric(df1.iloc[:,i], errors='coerce')
    df3['Y'] = pd.to_numeric(df2.iloc[:,i], errors='coerce')

    # Remove rows with NaN values
    df3 = df3.dropna()
    df3 = df3[-250:]
    df3 = df3[:230]

    # Perform optimization
    # optimization_results = optimize_z_ratio(df)
    optimization_results.append(optimize_z_ratio(df3))

# Print results
# print("Optimization Results:")
# print(f"Best Z Value: {optimization_results['best_z']:.1f}")
# print(f"Lowest Variance: {optimization_results['lowest_variance']:.4f}")
# print(f"Lowest Standard Deviation: {optimization_results['lowest_standard_deviation']:.4f}")

# Optional: Display full results DataFrame for detailed analysis
# print("\nDetailed Z Value Analysis:")
# print(optimization_results['results_dataframe'])

# print(optimization_results)
optimization_results_series = pd.Series(optimization_results)

# Calculate mean and median
mean_z = optimization_results_series.mean()
median_z = optimization_results_series.median()

print(f"Mean of optimization results: {mean_z}")
print(f"Median of optimization results: {median_z}")

Mean of optimization results: 0.925
Median of optimization results: 0.9


In [None]:
# import pandas as pd
# import numpy as np
# from statsmodels.tsa.stattools import adfuller

# # Function to calculate correlation between X and Y
# def calculate_correlation(df):
#     return df['X'].corr(df['Y'])

# # Function to check mean reversion using ADF test
# def test_mean_reversion(df):
#     result = adfuller(df['Z'])
#     p_value = result[1]
#     return p_value < 0.05  # Mean-reverting if p-value < 0.05

# # Function to incorporate seasonality check
# def seasonality_check(df, seasonal_avg):
#     recent_avg = df['X'].mean()
#     return abs(recent_avg - seasonal_avg) < 0.05 * seasonal_avg  # Within 5% deviation

# # Function for dynamic optimization with rolling windows
# def dynamic_optimize_z_ratio(df, window_size):
#     rolling_results = []

#     for start in range(0, len(df) - window_size + 1):
#         # Slice rolling window
#         rolling_df = df.iloc[start:start + window_size]
#         best_z = optimize_z_ratio(rolling_df)
#         rolling_results.append(best_z)

#     return rolling_results

# # Function to optimize Z ratio
# # Reuses the original implementation

# def optimize_z_ratio(df):
#     best_z = None
#     lowest_variance = float('inf')

#     z_values = np.arange(0, 10.1, 0.1)
#     for z in z_values:
#         df['Z'] = df['X'] - (z * df['Y'])
#         z_variance = df['Z'].var()
#         if z_variance < lowest_variance:
#             lowest_variance = z_variance
#             best_z = z

#     return best_z

# # Main Code
# df1 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZS Spreads/ZS Jul25-Aug25.csv')
# df2 = pd.read_csv('/workspaces/Futures-First/BackTest/data/ZS Spreads/ZS Jul25-Aug25.csv')
# df1 = df1.iloc[:, 1::2]
# df2 = df2.iloc[:, 1::2]

# seasonal_avg = 100  # Placeholder for seasonal average, replace with actual historical data
# window_size = 250  # Rolling window size
# optimization_results = []

# for i in range(df1.columns.size):
#     df3 = pd.DataFrame()
#     df3['X'] = pd.to_numeric(df1.iloc[:, i], errors='coerce')
#     df3['Y'] = pd.to_numeric(df2.iloc[:, i], errors='coerce')
#     df3 = df3.dropna()[-250:][:230]

#     # Correlation Check
#     correlation = calculate_correlation(df3)
#     if abs(correlation) < 0.5:  # Filter weak correlations
#         print(f"Skipping column pair {i} due to low correlation ({correlation:.2f})")
#         continue

#     # Seasonality Check
#     if not seasonality_check(df3, seasonal_avg):
#         print(f"Skipping column pair {i} due to seasonal mismatch")
#         continue

#     # Perform Optimization
#     dynamic_results = dynamic_optimize_z_ratio(df3, window_size)
#     mean_z = np.mean(dynamic_results)

#     # Test Mean Reversion
#     df3['Z'] = df3['X'] - (mean_z * df3['Y'])
#     if not test_mean_reversion(df3):
#         print(f"Skipping column pair {i} as Z is not mean-reverting")
#         continue

#     # Store result
#     optimization_results.append(mean_z)

# # Calculate and print overall statistics
# optimization_results_series = pd.Series(optimization_results)
# mean_z = optimization_results_series.mean()
# median_z = optimization_results_series.median()

# print(f"Mean of optimization results: {mean_z}")
# print(f"Median of optimization results: {median_z}")


Skipping column pair 0 due to seasonal mismatch
Skipping column pair 1 due to seasonal mismatch
Skipping column pair 2 due to seasonal mismatch
Skipping column pair 3 due to seasonal mismatch
Skipping column pair 4 due to seasonal mismatch
Skipping column pair 5 due to seasonal mismatch
Skipping column pair 6 due to seasonal mismatch
Skipping column pair 7 due to seasonal mismatch
Skipping column pair 8 due to seasonal mismatch
Skipping column pair 9 due to seasonal mismatch
Skipping column pair 10 due to seasonal mismatch
Skipping column pair 11 due to seasonal mismatch
Skipping column pair 12 due to seasonal mismatch
Skipping column pair 13 due to seasonal mismatch
Skipping column pair 14 due to seasonal mismatch
Skipping column pair 15 due to seasonal mismatch
Skipping column pair 16 due to seasonal mismatch
Skipping column pair 17 due to seasonal mismatch
Skipping column pair 18 due to seasonal mismatch
Skipping column pair 19 due to seasonal mismatch
Mean of optimization results: 

In [3]:
import pandas as pd

df1 = pd.read_csv('/workspaces/Futures-First/BackTest/data/Data1.csv')
df2 = pd.read_csv('/workspaces/Futures-First/BackTest/data/Data2.csv')
df1 = df1.iloc[:, 1::2]
df2 = df2.iloc[:, 1::2]


Unnamed: 0,2025,2024,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014
0,,21.50,48.75,2.25,10.00,,,5.50,10.25,13.00,15.25,
1,,,48.75,2.25,10.00,15.25,,,10.25,12.25,15.25,9.75
2,,,,2.25,10.00,15.25,5.50,,10.25,12.25,15.25,9.75
3,,21.50,,,10.00,16.00,5.50,5.50,,12.25,15.25,9.75
4,,21.50,48.75,,10.00,16.00,5.00,5.50,,,15.25,9.75
...,...,...,...,...,...,...,...,...,...,...,...,...
743,,,60.00,179.75,99.25,6.50,,-4.75,-3.25,18.50,36.50,
744,,,64.00,189.25,58.50,7.50,,,-2.75,20.50,33.00,206.25
745,,40.75,,134.25,54.50,10.00,-5.25,,,4.25,32.50,195.75
746,,41.75,,,51.25,11.00,-4.25,-3.75,,,56.75,185.00


In [1]:
# import pandas as pd
# import numpy as np

# def optimize_z_ratio(df):
#     """
#     Optimize the z ratio to minimize risk in a futures trading structure.
    
#     Parameters:
#     df (pandas.DataFrame): DataFrame with 'X' and 'Y' price columns
    
#     Returns:
#     dict: Optimization results including best z value and associated metrics
#     """
#     # Initialize tracking variables
#     best_z = None
#     lowest_variance = float('inf')
#     lowest_standard_deviation = float('inf')
    
#     # Iterate through potential z values with decimal precision
#     # Using a range from 0 to 10 with 0.1 increments
#     z_values = np.arange(0, 10.1, 0.1)
    
#     results = []
    
#     for z in z_values:
#         # Calculate Z column
#         df['Z'] = df['X'] - (z * df['Y'])
        
#         # Calculate variance and standard deviation of Z
#         z_variance = df['Z'].var()
#         z_std = df['Z'].std()
        
#         # Track results
#         results.append({
#             'z': z,
#             'variance': z_variance,
#             'standard_deviation': z_std
#         })
        
#         # Update best z if current z has lower variance
#         if z_variance < lowest_variance:
#             lowest_variance = z_variance
#             best_z = z
#             lowest_standard_deviation = z_std
    
#     # Convert results to DataFrame for easy analysis
#     results_df = pd.DataFrame(results)
    
#     # Return optimization results
#     return {
#         'best_z': best_z,
#         'lowest_variance': lowest_variance,
#         'lowest_standard_deviation': lowest_standard_deviation,
#         'results_dataframe': results_df
#     }

# # Read the data
# df = pd.read_csv('/workspaces/Futures-First/BackTest/data/data.csv', names=['X', 'Y'])
# # Convert to numeric, replacing 'NA' and empty strings with NaN
# df['X'] = pd.to_numeric(df['X'], errors='coerce')
# df['Y'] = pd.to_numeric(df['Y'], errors='coerce')

# # Remove rows with NaN values
# df = df.dropna()
# df = df[-150:]

# # Perform optimization
# optimization_results = optimize_z_ratio(df)

# # Print results
# print("Optimization Results:")
# print(f"Best Z Value: {optimization_results['best_z']:.1f}")
# print(f"Lowest Variance: {optimization_results['lowest_variance']:.4f}")
# print(f"Lowest Standard Deviation: {optimization_results['lowest_standard_deviation']:.4f}")

# # Optional: Display full results DataFrame for detailed analysis
# print("\nDetailed Z Value Analysis:")
# print(optimization_results['results_dataframe'])

Optimization Results:
Best Z Value: 2.2
Lowest Variance: 1.8206
Lowest Standard Deviation: 1.3493

Detailed Z Value Analysis:
        z     variance  standard_deviation
0     0.0   158.779276           12.600765
1     0.1   145.051513           12.043733
2     0.2   131.951681           11.487022
3     0.3   119.479780           10.930681
4     0.4   107.635810           10.374768
..    ...          ...                 ...
96    9.6  1704.280065           41.282927
97    9.7  1750.833692           41.842965
98    9.8  1798.015251           42.403010
99    9.9  1845.824741           42.963063
100  10.0  1894.262162           43.523122

[101 rows x 3 columns]
