### Calculating correlations

In [140]:
import numpy as np
import pandas as pd
from itertools import combinations
import warnings
warnings.filterwarnings('ignore')

# Load the price data
prices = np.loadtxt('prices.txt')
prices = prices.T  # Transpose to get (nInst, nt) shape

nInst, nt = prices.shape

# Calculate returns for all companies
returns = np.log(prices[:, 1:] / prices[:, :-1])

# Create tables for each company
tables = []
for i in range(nInst):
    # Create a dataframe for the current company's returns
    df = pd.DataFrame({'current_return': returns[i, 3:]})
    
    # Add lagged returns for all other companies
    for j in range(nInst):
        if i != j:
            for lag in range(1, 4):
                col_name = f'company_{j}_lag_{lag}'
                df[col_name] = returns[j, 3-lag:-lag]
    
    tables.append(df)

# Calculate correlations and find the highest
all_correlations = []

for i, table in enumerate(tables):
    correlations = table.corr()['current_return'].sort_values(ascending=False)
    
    for col, corr in correlations.items():
        if col != 'current_return':
            company, lag = col.split('_lag_')
            company = int(company.split('_')[1])
            lag = int(lag)
            all_correlations.append((i, company, lag, corr))

# Sort correlations and print the top 10
all_correlations.sort(key=lambda x: abs(x[3]), reverse=True)

print("Top 50 correlations:")
for target, predictor, lag, corr in all_correlations[:30]:
    #if int(target) != 38 and int(target) != 27 and int(target) != 39 and int(target) != 16 and int(target != 28):
    #if int(target) == 16:
    print(f"Target company {target} can be predicted using company {predictor} with lag {lag}. Correlation: {corr:.4f}")

# Identify meaningful correlations (e.g., abs(corr) > 0.5)
meaningful_correlations = [c for c in all_correlations if abs(c[3]) > 0.5]

print("\nMeaningful correlations (abs(corr) > 0.5):")
for target, predictor, lag, corr in meaningful_correlations:
    print(f"Target company {target} can be predicted using company {predictor} with lag {lag}. Correlation: {corr:.4f}")

Top 50 correlations:
Target company 38 can be predicted using company 22 with lag 1. Correlation: 0.3722
Target company 38 can be predicted using company 30 with lag 1. Correlation: 0.3407
Target company 27 can be predicted using company 22 with lag 1. Correlation: 0.3004
Target company 38 can be predicted using company 11 with lag 1. Correlation: 0.2950
Target company 38 can be predicted using company 27 with lag 1. Correlation: 0.2944
Target company 27 can be predicted using company 11 with lag 1. Correlation: 0.2930
Target company 39 can be predicted using company 11 with lag 1. Correlation: 0.2921
Target company 27 can be predicted using company 38 with lag 1. Correlation: 0.2841
Target company 27 can be predicted using company 25 with lag 1. Correlation: 0.2767
Target company 38 can be predicted using company 25 with lag 1. Correlation: 0.2568
Target company 27 can be predicted using company 37 with lag 1. Correlation: 0.2548
Target company 27 can be predicted using company 39 wit

In [25]:
threshold = 0.01
change = 0.1
max_pos_change = 10000 * change

### Trading Pos 38

In [50]:
# python eval.py
import numpy as np

nInst = 50
currentPos = np.zeros(nInst)

def getPosition(prcSoFar):
    global currentPos
    (nins, nt) = prcSoFar.shape

    if (nt < 2):
        return np.zeros(nins)
    
    # Calculate the previous day's return for company 22
    prev_return_22 = prcSoFar[22, -1] / prcSoFar[22, -2] - 1
    prev_return_30 = prcSoFar[30, -1] / prcSoFar[30, -2] - 1
    prev_return_25 = prcSoFar[25, -1] / prcSoFar[25, -2] - 1

    # Determine the desired position for company 38
    if prev_return_22 > threshold and prev_return_30 > threshold: #  and prev_return_25 > threshold
        desired_position_38 = 10000
    elif prev_return_22 < threshold and prev_return_30 < threshold: # and prev_return_25 < threshold
        desired_position_38 = -10000
    else:
        desired_position_38 = 0
    
    # Create the new position array
    new_pos = np.zeros(nins)
    new_pos[38] = desired_position_38
    
    # Calculate the change in positions
    position_changes = new_pos - currentPos
    
    # Apply a maximum change of 20% of the maximum allowed position
    max_change = 10000 * change
    position_changes = np.clip(position_changes, -max_change, max_change)
    
    # Update current positions
    currentPos += position_changes.astype(int)

    return currentPos

### Trading pos 27

In [63]:
# Target company 27 can be predicted using company 22 with lag 1. Correlation: 0.3004
# Target company 27 can be predicted using company 11 with lag 1. Correlation: 0.2930
# Target company 27 can be predicted using company 38 with lag 1. Correlation: 0.2841
# Target company 27 can be predicted using company 25 with lag 1. Correlation: 0.2767

# python eval.py
import numpy as np

nInst = 50
currentPos = np.zeros(nInst)

def getPosition(prcSoFar):
    global currentPos
    (nins, nt) = prcSoFar.shape

    if (nt < 2):
        return np.zeros(nins)
    
    # Calculate the previous day's return for company 22
    prev_return_22 = prcSoFar[22, -1] / prcSoFar[22, -2] - 1
    prev_return_11 = prcSoFar[11, -1] / prcSoFar[11, -2] - 1
    prev_return_38 = prcSoFar[38, -1] / prcSoFar[38, -2] - 1

    # Determine the desired position for company 38
    if prev_return_22 > threshold and prev_return_11 > threshold: #  and prev_return_38 > threshold
        desired_position_27 = 10000
    elif prev_return_22 < threshold and prev_return_11 < threshold: # and prev_return_38 < threshold
        desired_position_27 = -10000
    else:
        desired_position_27 = 0
    
    # Create the new position array
    new_pos = np.zeros(nins)
    new_pos[27] = desired_position_27
    
    # Calculate the change in positions
    position_changes = new_pos - currentPos
    
    # Apply a maximum change of 20% of the maximum allowed position
    max_change = max_pos_change
    position_changes = np.clip(position_changes, -max_change, max_change)
    
    # Update current positions
    currentPos += position_changes.astype(int)

    return currentPos

### Trading Pos 39

In [104]:
# Target company 39 can be predicted using company 11 with lag 1. Correlation: 0.2921
# Target company 39 can be predicted using company 22 with lag 1. Correlation: 0.2462
# Target company 39 can be predicted using company 27 with lag 1. Correlation: 0.2340
# Target company 39 can be predicted using company 24 with lag 1. Correlation: 0.2275
# Target company 39 can be predicted using company 38 with lag 1. Correlation: 0.2129
# Target company 39 can be predicted using company 30 with lag 1. Correlation: 0.2070
# Target company 39 can be predicted using company 25 with lag 1. Correlation: 0.2015

# python eval.py
import numpy as np

nInst = 50
currentPos = np.zeros(nInst)

def getPosition(prcSoFar):
    global currentPos
    (nins, nt) = prcSoFar.shape

    if (nt < 2):
        return np.zeros(nins)
    
    # Calculate the previous day's return for companys of interest
    prev_return_22 = prcSoFar[22, -1] / prcSoFar[22, -2] - 1
    prev_return_11 = prcSoFar[11, -1] / prcSoFar[11, -2] - 1
    prev_return_27 = prcSoFar[27, -1] / prcSoFar[27, -2] - 1
    prev_return_24 = prcSoFar[24, -1] / prcSoFar[24, -2] - 1
    prev_return_38 = prcSoFar[38, -1] / prcSoFar[38, -2] - 1

    # Count how many indicators are above and below the threshold
    above_threshold = sum([
        prev_return_22 > threshold,
        prev_return_11 > threshold,
        prev_return_27 > threshold,
        prev_return_24 > threshold,
        prev_return_38 > threshold
    ])

    below_threshold = sum([
        prev_return_22 < threshold,
        prev_return_11 < threshold,
        prev_return_27 < threshold,
        prev_return_24 < threshold,
        prev_return_38 < threshold
    ])

    # Determine the desired position for company 39 based on majority vote
    if above_threshold >= 2:  # If 3 or more indicators are above threshold
        desired_position_39 = 10000
    elif below_threshold >= 2:  # If 3 or more indicators are below threshold
        desired_position_39 = -10000
    else:
        desired_position_39 = 0
    
    # Create the new position array
    new_pos = np.zeros(nins)
    new_pos[39] = desired_position_39
    
    # Calculate the change in positions
    position_changes = new_pos - currentPos
    
    # Apply a maximum change of 20% of the maximum allowed position
    max_change = max_pos_change
    position_changes = np.clip(position_changes, -max_change, max_change)
    
    # Update current positions
    currentPos += position_changes.astype(int)

    return currentPos

### Trading Pos 16

In [123]:
# Target company 16 can be predicted using company 11 with lag 1. Correlation: 0.2491
# Target company 16 can be predicted using company 25 with lag 1. Correlation: 0.2049
# Target company 16 can be predicted using company 38 with lag 1. Correlation: 0.1892
# Target company 16 can be predicted using company 24 with lag 1. Correlation: 0.1575
# Target company 16 can be predicted using company 27 with lag 1. Correlation: 0.1494

# python eval.py
import numpy as np

nInst = 50
currentPos = np.zeros(nInst)

def getPosition(prcSoFar):
    global currentPos
    (nins, nt) = prcSoFar.shape

    if (nt < 2):
        return np.zeros(nins)
    
    # Calculate the previous day's return for companys of interest
    prev_return_25 = prcSoFar[25, -1] / prcSoFar[25, -2] - 1
    prev_return_11 = prcSoFar[11, -1] / prcSoFar[11, -2] - 1
    prev_return_27 = prcSoFar[27, -1] / prcSoFar[27, -2] - 1
    prev_return_24 = prcSoFar[24, -1] / prcSoFar[24, -2] - 1
    prev_return_38 = prcSoFar[38, -1] / prcSoFar[38, -2] - 1

    # Count how many indicators are above and below the threshold
    above_threshold = sum([
        prev_return_25 > threshold,
        prev_return_11 > threshold,
        prev_return_27 > threshold,
        prev_return_24 > threshold,
        prev_return_38 > threshold
    ])

    below_threshold = sum([
        prev_return_25 < threshold,
        prev_return_11 < threshold,
        prev_return_27 < threshold,
        prev_return_24 < threshold,
        prev_return_38 < threshold
    ])

    # Determine the desired position for company 39 based on majority vote
    if above_threshold >= 3:  # If 3 or more indicators are above threshold
        desired_position_16 = 10000
    elif below_threshold >= 3:  # If 3 or more indicators are below threshold
        desired_position_16 = -10000
    else:
        desired_position_16 = 0
    
    # Create the new position array
    new_pos = np.zeros(nins)
    new_pos[16] = desired_position_16
    
    # Calculate the change in positions
    position_changes = new_pos - currentPos
    
    # Apply a maximum change of 20% of the maximum allowed position
    max_change = max_pos_change
    position_changes = np.clip(position_changes, -max_change, max_change)
    
    # Update current positions
    currentPos += position_changes.astype(int)

    return currentPos

### Trading Pos 28

In [128]:
# Target company 28 can be predicted using company 44 with lag 1. Correlation: 0.2206
# Target company 28 can be predicted using company 22 with lag 1. Correlation: 0.2093
# Target company 28 can be predicted using company 27 with lag 1. Correlation: 0.2088

# python eval.py
import numpy as np

nInst = 50
currentPos = np.zeros(nInst)

def getPosition(prcSoFar):
    global currentPos
    (nins, nt) = prcSoFar.shape

    if (nt < 2):
        return np.zeros(nins)
    
    # Calculate the previous day's return for companies 44, 22, and 27
    prev_return_44 = prcSoFar[44, -1] / prcSoFar[44, -2] - 1
    prev_return_22 = prcSoFar[22, -1] / prcSoFar[22, -2] - 1
    prev_return_27 = prcSoFar[27, -1] / prcSoFar[27, -2] - 1

    # Determine the desired position for company 28
    if prev_return_44 > threshold and prev_return_22 > threshold and prev_return_27 > threshold:
        desired_position_28 = 10000
    elif prev_return_44 < threshold and prev_return_22 < threshold and prev_return_27 < threshold:
        desired_position_28 = -10000
    else:
        desired_position_28 = 0
    
    # Create the new position array
    new_pos = np.zeros(nins)
    new_pos[28] = desired_position_28
    
    # Calculate the change in positions
    position_changes = new_pos - currentPos
    
    # Apply a maximum change of 20% of the maximum allowed position
    max_change = max_pos_change
    position_changes = np.clip(position_changes, -max_change, max_change)
    
    # Update current positions
    currentPos += position_changes.astype(int)

    return currentPos

8, 29, 24

Target company 8 can be predicted using company 11 with lag 1. Correlation: 0.2380
Target company 2 can be predicted using company 27 with lag 1. Correlation: 0.2371
Target company 8 can be predicted using company 25 with lag 1. Correlation: 0.1954
Target company 29 can be predicted using company 22 with lag 1. Correlation: 0.1881
Target company 8 can be predicted using company 38 with lag 1. Correlation: 0.1880
Target company 24 can be predicted using company 11 with lag 1. Correlation: 0.1784
Target company 8 can be predicted using company 2 with lag 1. Correlation: 0.1767
Target company 29 can be predicted using company 11 with lag 1. Correlation: 0.1746
Target company 24 can be predicted using company 22 with lag 1. Correlation: 0.1744

### Trade pos 38 and 37

In [145]:
# python eval.py
import numpy as np

nInst = 50
currentPos = np.zeros(nInst)

def getPosition(prcSoFar):
    global currentPos
    (nins, nt) = prcSoFar.shape

    threshold = 0.02
    change = 0.5

    if (nt < 2):
        return np.zeros(nins)
    
    # Calculate the previous day's return for company 22
    prev_return_22 = prcSoFar[22, -1] / prcSoFar[22, -2] - 1
    prev_return_30 = prcSoFar[30, -1] / prcSoFar[30, -2] - 1
    # prev_return_25 = prcSoFar[25, -1] / prcSoFar[25, -2] - 1
    prev_return_11 = prcSoFar[11, -1] / prcSoFar[11, -2] - 1
    # prev_return_38 = prcSoFar[38, -1] / prcSoFar[38, -2] - 1

    # Determine the desired position for company 38
    if prev_return_22 > threshold and prev_return_30 > threshold: #  and prev_return_25 > threshold
        desired_position_38 = 10000
    elif prev_return_22 < threshold and prev_return_30 < threshold: # and prev_return_25 < threshold
        desired_position_38 = -10000
    else:
        desired_position_38 = 0
    
    # Determine the desired position for company 38
    if prev_return_22 > threshold and prev_return_11 > threshold: #  and prev_return_38 > threshold
        desired_position_27 = 10000
    elif prev_return_22 < threshold and prev_return_11 < threshold: # and prev_return_38 < threshold
        desired_position_27 = -10000
    else:
        desired_position_27 = 0
    
    # Create the new position array
    new_pos = np.zeros(nins)
    new_pos[38] = desired_position_38
    new_pos[27] = desired_position_27
    
    # Calculate the change in positions
    position_changes = new_pos - currentPos
    
    # Apply a maximum change of 20% of the maximum allowed position
    max_change = 10000 * change
    position_changes = np.clip(position_changes, -max_change, max_change)
    
    # Update current positions
    currentPos += position_changes.astype(int)

    return currentPos

### Evaluation

In [147]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
# from teamName import getMyPosition

nInst = 50
nt = 500
commRate = 0.0010
dlrPosLimit = 10000

def loadPrices(fn):
    global nt, nInst
    df=pd.read_csv(fn, sep='\s+', header=None, index_col=None)
    nt, nInst = df.shape
    return df.values.T

pricesFile="./prices.txt"
prcAll = loadPrices(pricesFile)
print ("Loaded %d instruments for %d days" % (nInst, nt))

def calcPL(prcHist):
    cash = 0
    curPos = np.zeros(nInst)
    totDVolume = 0
    totDVolumeSignal = 0
    totDVolumeRandom = 0
    value = 0
    todayPLL = []
    (_,nt) = prcHist.shape
    for t in range(250,501): # 250 in training period here 
        prcHistSoFar = prcHist[:,:t]
        newPosOrig = getPosition(prcHistSoFar)
        curPrices = prcHistSoFar[:,-1] 
        posLimits = np.array([int(x) for x in dlrPosLimit / curPrices])
        newPos = np.clip(newPosOrig, -posLimits, posLimits)
        deltaPos = newPos - curPos
        dvolumes = curPrices * np.abs(deltaPos)
        dvolume = np.sum(dvolumes)
        totDVolume += dvolume
        comm = dvolume * commRate
        cash -= curPrices.dot(deltaPos) + comm
        curPos = np.array(newPos)
        posValue = curPos.dot(curPrices)
        todayPL = cash + posValue - value
        todayPLL.append(todayPL)
        value = cash + posValue
        ret = 0.0
        if (totDVolume > 0):
            ret = value / totDVolume
        #print ("Day %d value: %.2lf todayPL: $%.2lf $-traded: %.0lf return: %.5lf" % (t,value, todayPL, totDVolume, ret))
    pll = np.array(todayPLL)
    (plmu,plstd) = (np.mean(pll), np.std(pll))
    annSharpe = 0.0
    if (plstd > 0):
        annSharpe = np.sqrt(250) * plmu / plstd
    return (plmu, ret, plstd, annSharpe, totDVolume)

(meanpl, ret, plstd, sharpe, dvol) = calcPL(prcAll)
score = meanpl - 0.1*plstd
print ("=====")
print ("mean(PL): %.1lf" % meanpl)
print ("return: %.5lf" % ret)
print ("StdDev(PL): %.2lf" % plstd)
print ("annSharpe(PL): %.2lf " % sharpe)
print ("totDvolume: %.0lf " % dvol)
print ("Score: %.2lf" % score)

Loaded 50 instruments for 500 days
=====
mean(PL): 2.8
return: 0.03058
StdDev(PL): 13.91
annSharpe(PL): 3.18 
totDvolume: 22991 
Score: 1.41


### GridSearch Eval

In [137]:
def run_evaluation(threshold, change):
    global currentPos
    currentPos = np.zeros(nInst)  # Reset currentPos for each evaluation
    max_pos_change = 10000 * change
    
    (meanpl, ret, plstd, sharpe, dvol) = calcPL(prcAll)
    score = meanpl - 0.1*plstd
    return {
        'threshold': threshold,
        'change': change,
        'mean_pl': meanpl,
        'return': ret,
        'std_dev': plstd,
        'ann_sharpe': sharpe,
        'tot_dvolume': dvol,
        'score': score
    }

In [138]:
import itertools
import pandas as pd

# Define the ranges for threshold and change
thresholds = np.arange(0.002, 0.031, 0.002)  # 0.001 to 0.02 in steps of 0.001
changes = np.arange(0.02, 0.51, 0.04)  # 0.05 to 0.5 in steps of 0.05

# Perform grid search
results = []
for threshold, change in itertools.product(thresholds, changes):
    result = run_evaluation(threshold, change)
    results.append(result)

# Convert results to DataFrame
df_results = pd.DataFrame(results)

# Sort results by score in descending order
df_results_sorted = df_results.sort_values('score', ascending=False)

# Display top 10 results
print("Top 10 Results:")
print(df_results_sorted.head(10))

# Save all results to CSV
df_results.to_csv('grid_search_results.csv', index=False)
print("\nAll results saved to 'grid_search_results.csv'")

# # Plot heatmap of scores
# import matplotlib.pyplot as plt
# import seaborn as sns

# pivot_table = df_results.pivot('threshold', 'change', 'score')
# plt.figure(figsize=(12, 8))
# sns.heatmap(pivot_table, annot=False, cmap='YlOrRd', fmt='.2f')
# plt.title('Score Heatmap')
# plt.xlabel('Change')
# plt.ylabel('Threshold')
# plt.show()

Top 10 Results:
     threshold  change   mean_pl    return   std_dev  ann_sharpe  tot_dvolume  \
97       0.016    0.26  2.801271  0.030583  13.91089     3.18398     22990.89   
146      0.024    0.14  2.801271  0.030583  13.91089     3.18398     22990.89   
124      0.020    0.30  2.801271  0.030583  13.91089     3.18398     22990.89   
125      0.020    0.34  2.801271  0.030583  13.91089     3.18398     22990.89   
126      0.020    0.38  2.801271  0.030583  13.91089     3.18398     22990.89   
127      0.020    0.42  2.801271  0.030583  13.91089     3.18398     22990.89   
128      0.020    0.46  2.801271  0.030583  13.91089     3.18398     22990.89   
129      0.020    0.50  2.801271  0.030583  13.91089     3.18398     22990.89   
130      0.022    0.02  2.801271  0.030583  13.91089     3.18398     22990.89   
131      0.022    0.06  2.801271  0.030583  13.91089     3.18398     22990.89   

        score  
97   1.410182  
146  1.410182  
124  1.410182  
125  1.410182  
126  1.41018