In [None]:
import numpy as np
import pandas as pd
import scipy.optimize as optimize

def calculate_price_range_variance(z, X, Y):
    """
    Calculate the range of the transformed prices.
    
    Args:
    z (float): Scaling factor
    X (array-like): First price column
    Y (array-like): Second price column
    
    Returns:
    float: Range of transformed prices (X - z*Y)
    """
    transformed_prices = X - z * Y
    return np.ptp(transformed_prices)  # peak-to-peak, gives range

def optimize_price_range(X, Y):
    """
    Find the z that minimizes the price range, rounded to 1 decimal place.
    
    Args:
    X (array-like): First price column
    Y (array-like): Second price column
    
    Returns:
    tuple: Optimal z (rounded to 1 decimal) and the resulting minimal price range
    """
    # Use scipy's minimize_scalar to find the z that minimizes range
    result = optimize.minimize_scalar(
        calculate_price_range_variance, 
        args=(X, Y),
        method='brent'
    )
    
    # Round z to 1 decimal place
    rounded_z = round(result.x, 1)
    
    return rounded_z, calculate_price_range_variance(rounded_z, X, Y)

def process_csv(file_path):
    """
    Read CSV file, clean and extract numeric columns.
    
    Args:
    file_path (str): Path to the CSV file
    
    Returns:
    tuple: Numpy arrays of cleaned X and Y columns
    """
    # Read the CSV file
    df = pd.read_csv(file_path, header=None, names=['X', 'Y'])
    
    # Convert to numeric, replacing 'NA' and empty strings with NaN
    df['X'] = pd.to_numeric(df['X'], errors='coerce')
    df['Y'] = pd.to_numeric(df['Y'], errors='coerce')
    
    # Remove rows with NaN values
    df_cleaned = df.dropna()
    
    # Convert to numpy arrays
    X = df_cleaned['X'].to_numpy()
    Y = df_cleaned['Y'].to_numpy()
    
    return X, Y

def main(file_path):
    """
    Main function to process CSV and find optimal z
    
    Args:
    file_path (str): Path to the CSV file
    """
    # Read and clean data from CSV
    X, Y = process_csv(file_path)
    
    # Print number of valid data points
    print(f"Number of valid data points: {len(X)}")
    
    # Optimize price range
    optimal_z, min_range = optimize_price_range(X, Y)
    
    print(f"Optimal scaling factor (z): {optimal_z}")
    print(f"Minimal price range: {min_range}")
    print("\nFirst 10 Transformed prices:")
    transformed_prices = X - optimal_z * Y
    print(transformed_prices)
    
    return optimal_z, min_range

# Run the main function
if __name__ == "__main__":
    FILE_PATH = '/workspaces/Futures-First/BackTest/data/data.csv'
    main(FILE_PATH)

Number of valid data points: 297
Optimal scaling factor (z): 1.0
Minimal price range: 11.0

First 10 Transformed prices:
[ 0.   -4.75 -0.5   1.25  1.   -2.75 -2.5   1.    1.5   1.   -1.75 -3.25
 -0.75  3.75  3.5   0.75  0.5   3.    2.    2.5  -0.25  1.    2.75  2.75
  1.    2.25  0.5   0.   -2.   -0.5   1.75  5.75  1.25  2.    4.75  4.5
  3.75  4.5   3.75  3.    3.    4.    3.    0.75  3.25  2.    0.   -2.
 -3.5  -2.75 -4.5  -5.25 -4.   -1.5   0.25  2.    1.    1.5   3.5   3.
  2.5   0.75 -0.75  0.    0.5   0.75 -1.    0.   -0.75 -0.25 -2.75 -2.5
 -2.75 -5.25 -4.5  -1.5  -2.25 -1.5  -3.   -3.   -1.5   0.5   0.75  0.
  1.5   4.    2.    2.25  1.75  0.    0.5  -0.25 -0.5   0.25 -1.75 -0.75
 -1.    0.25  0.75 -1.    0.75  0.   -1.   -1.   -1.5  -2.75 -4.5  -1.25
 -2.   -2.   -2.75 -3.25 -3.   -0.25 -2.   -1.75 -0.25  0.5   0.25  0.
 -0.25 -0.75 -0.5  -0.25  0.5  -1.    0.25 -0.25 -0.75  0.   -1.25 -0.75
  0.    0.   -0.25  0.25  0.5   0.5  -0.75 -0.5   0.25 -0.25 -0.75  1.5
  1.25  1.25  

In [None]:
import pandas as pd

df = pd.read_csv('/workspaces/Futures-First/BackTest/data/data.csv', names=['X', 'Y'])
    
    # Convert to numeric, replacing 'NA' and empty strings with NaN
df['X'] = pd.to_numeric(df['X'], errors='coerce')
df['Y'] = pd.to_numeric(df['Y'], errors='coerce')
    
    # Remove rows with NaN values
df = df.dropna()
z = 3

df['Z'] = df['X'] - (z*df['Y'])
df

Unnamed: 0,X,Y,Z
54,48.75,32.00,-47.25
73,49.50,32.00,-46.50
74,47.75,32.00,-48.25
75,48.00,32.00,-48.00
76,51.75,32.00,-44.25
...,...,...,...
494,13.00,12.25,-23.75
495,15.25,13.50,-25.25
496,15.00,13.25,-24.75
499,12.00,11.75,-23.25


In [31]:
import pandas as pd
import numpy as np

def optimize_z_ratio(df):
    """
    Optimize the z ratio to minimize risk in a futures trading structure.
    
    Parameters:
    df (pandas.DataFrame): DataFrame with 'X' and 'Y' price columns
    
    Returns:
    dict: Optimization results including best z value and associated metrics
    """
    # Initialize tracking variables
    best_z = None
    lowest_variance = float('inf')
    lowest_standard_deviation = float('inf')
    
    # Iterate through potential z values with decimal precision
    # Using a range from 0 to 10 with 0.1 increments
    z_values = np.arange(0, 10.1, 0.1)
    
    results = []
    
    for z in z_values:
        # Calculate Z column
        df['Z'] = df['X'] - (z * df['Y'])
        
        # Calculate variance and standard deviation of Z
        z_variance = df['Z'].var()
        z_std = df['Z'].std()
        
        # Track results
        results.append({
            'z': z,
            'variance': z_variance,
            'standard_deviation': z_std
        })
        
        # Update best z if current z has lower variance
        if z_variance < lowest_variance:
            lowest_variance = z_variance
            best_z = z
            lowest_standard_deviation = z_std
    
    # Convert results to DataFrame for easy analysis
    results_df = pd.DataFrame(results)
    
    # Return optimization results
    return {
        'best_z': best_z,
        'lowest_variance': lowest_variance,
        'lowest_standard_deviation': lowest_standard_deviation,
        'results_dataframe': results_df
    }

# Read the data
df = pd.read_csv('/workspaces/Futures-First/BackTest/data/data.csv', names=['X', 'Y'])
# Convert to numeric, replacing 'NA' and empty strings with NaN
df['X'] = pd.to_numeric(df['X'], errors='coerce')
df['Y'] = pd.to_numeric(df['Y'], errors='coerce')

# Remove rows with NaN values
df = df.dropna()
df = df[-150:]

# Perform optimization
optimization_results = optimize_z_ratio(df)

# Print results
print("Optimization Results:")
print(f"Best Z Value: {optimization_results['best_z']:.1f}")
print(f"Lowest Variance: {optimization_results['lowest_variance']:.4f}")
print(f"Lowest Standard Deviation: {optimization_results['lowest_standard_deviation']:.4f}")

# Optional: Display full results DataFrame for detailed analysis
print("\nDetailed Z Value Analysis:")
print(optimization_results['results_dataframe'])

Optimization Results:
Best Z Value: 2.2
Lowest Variance: 1.8206
Lowest Standard Deviation: 1.3493

Detailed Z Value Analysis:
        z     variance  standard_deviation
0     0.0   158.779276           12.600765
1     0.1   145.051513           12.043733
2     0.2   131.951681           11.487022
3     0.3   119.479780           10.930681
4     0.4   107.635810           10.374768
..    ...          ...                 ...
96    9.6  1704.280065           41.282927
97    9.7  1750.833692           41.842965
98    9.8  1798.015251           42.403010
99    9.9  1845.824741           42.963063
100  10.0  1894.262162           43.523122

[101 rows x 3 columns]


Unnamed: 0,X,Y,Z
54,48.75,32.00,-271.25
73,49.50,32.00,-270.50
74,47.75,32.00,-272.25
75,48.00,32.00,-272.00
76,51.75,32.00,-268.25
...,...,...,...
494,13.00,12.25,-109.50
495,15.25,13.50,-119.75
496,15.00,13.25,-117.50
499,12.00,11.75,-105.50
