In [6]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from teamName import getMyPosition as getPosition

nInst = 0
nt = 0
commRate = 0.0010
dlrPosLimit = 10000

def loadPrices(fn):
    global nt, nInst
    df = pd.read_csv(fn, sep='\s+', header=None, index_col=None)
    (nt, nInst) = df.shape
    return (df.values).T


pricesFile = "./prices.txt"
prcAll = loadPrices(pricesFile)
print("Loaded %d instruments for %d days" % (nInst, nt))

def calcPL(prcHist):
    cash = 0
    curPos = np.zeros(nInst)
    totDVolume = 0
    totDVolumeSignal = 0
    totDVolumeRandom = 0
    value = 0
    todayPLL = []
    (_, nt) = prcHist.shape
    for t in range(250, 501):
        prcHistSoFar = prcHist[:, :t]
        newPosOrig = getPosition(prcHistSoFar)
        curPrices = prcHistSoFar[:, -1]
        posLimits = np.array([int(x) for x in dlrPosLimit / curPrices])
        newPos = np.clip(newPosOrig, -posLimits, posLimits)
        deltaPos = newPos - curPos
        dvolumes = curPrices * np.abs(deltaPos)
        dvolume = np.sum(dvolumes)
        totDVolume += dvolume
        comm = dvolume * commRate
        cash -= curPrices.dot(deltaPos) + comm
        curPos = np.array(newPos)
        posValue = curPos.dot(curPrices)
        todayPL = cash + posValue - value
        todayPLL.append(todayPL)
        value = cash + posValue
        ret = 0.0
        if (totDVolume > 0):
            ret = value / totDVolume
        # print("Day %d value: %.2lf todayPL: $%.2lf $-traded: %.0lf return: %.5lf" %
        #      (t, value, todayPL, totDVolume, ret))
    pll = np.array(todayPLL)
    (plmu, plstd) = (np.mean(pll), np.std(pll))
    annSharpe = 0.0
    if (plstd > 0):
        annSharpe = np.sqrt(250) * plmu / plstd
    return (plmu, ret, plstd, annSharpe, totDVolume)


(meanpl, ret, plstd, sharpe, dvol) = calcPL(prcAll)
score = meanpl - 0.1*plstd
print("=====")
print("mean(PL): %.1lf" % meanpl)
print("return: %.5lf" % ret)
print("StdDev(PL): %.2lf" % plstd)
print("annSharpe(PL): %.2lf " % sharpe)
print("totDvolume: %.0lf " % dvol)
print("Score: %.2lf" % score)

Loaded 50 instruments for 500 days
=====
mean(PL): -0.2
return: -0.00140
StdDev(PL): 2.11
annSharpe(PL): -1.27 
totDvolume: 30289 
Score: -0.38


In [7]:
daily_returns = np.diff(prcAll) / prcAll[:, :-1]
correlation_matrix = np.corrcoef(daily_returns)

# Convert the correlation matrix to a DataFrame
correlation_df = pd.DataFrame(correlation_matrix)
np.fill_diagonal(correlation_df.values, np.nan)

# Save the DataFrame as a CSV file
correlation_df.to_csv('correlation_matrix.csv', index=False)

# Find the maximum correlation in each column, excluding self-correlations
max_correlations = correlation_df.max()

# Identify the corresponding instrument for each maximum correlation
most_correlated_instruments = correlation_df.idxmax()

# Combine the results into a DataFrame
result_df = pd.DataFrame({
    'Max Correlation': max_correlations,
    'Most Correlated Instrument': most_correlated_instruments
})

print(result_df)

    Max Correlation  Most Correlated Instrument
0          0.104112                          37
1          0.153817                           8
2          0.242410                          27
3          0.106335                          11
4          0.169772                          38
5          0.106285                          38
6          0.095368                           3
7          0.077785                          37
8          0.383450                          27
9          0.112618                          34
10         0.080750                          18
11         0.146408                          27
12         0.315180                          38
13         0.091816                          31
14         0.106760                          39
15         0.089287                          27
16         0.401624                          38
17         0.117141                           1
18         0.190175                          38
19         0.089078                     

In [1]:
from itertools import cycle 

instruments_to_plot = [7,28,43,49]
colors = cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k'])
plt.figure(figsize=(20,12))

df = pd.DataFrame(prcAll.T) 
window_size = 100 
moving_averages = df.rolling(window=window_size).mean()
moving_std_devs = df.rolling(window=window_size).std()
    

for i in instruments_to_plot:
    color = next(colors)
    mean_price = np.mean(prcAll[i, :])
    std_dev = np.std(prcAll[i, :])
    # plt.axhline(y=mean_price, color=color)
    # plt.axhline(y=mean_price+2*std_dev, color=color, linestyle='--')
    # plt.axhline(y=mean_price-2*std_dev, color=color, linestyle='--')
    plt.plot(prcAll[i, :], color=color, label=f'Instrument {i}')
    plt.plot(moving_averages[i], color=color, label=f'Instrument {i}')
    plt.plot(moving_averages[i]+2*moving_std_devs[i], color=color, linestyle=':', label=f'Instrument {i}')
    plt.plot(moving_averages[i]-2*moving_std_devs[i], color=color, linestyle=':', label=f'Instrument {i}')

plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Price Trajectory of Each Instrument from Day 1 to 500')
plt.tight_layout()
plt.show()

NameError: name 'plt' is not defined

In [None]:
instruments_to_plot = [0]

daily_returns = np.diff(prcAll) / prcAll[:, :-1]

plt.figure(figsize=(20,12))
for i in instruments_to_plot:
    plt.plot(100 * daily_returns[i, :])

plt.xlabel('Days')
plt.ylabel('% Return')
plt.title('Daily Returns of Each Instrument from Day 1 to 500')
plt.tight_layout()
plt.show()

In [1]:
from statsmodels.tsa.stattools import acf

colors = cycle(['b', 'g', 'r', 'y'])
instruments_to_plot = [7,28,43,49]
plt.figure(figsize=(20, 12))
# Calculate ACF for each instrument
acf_values = [acf(prcAll[i, :], nlags=250) for i in instruments_to_plot]
for i in range(4):
    color = next(colors)
    plt.plot(acf_values[i], color=color, label=f'Instrument {i}')

# Plot ACF for the first instrument as an example
plt.title('ACF for Instrument 0')
plt.xlabel('Lags')
plt.ylabel('ACF')
plt.show()

NameError: name 'prcAll' is not defined

In [None]:
from statsmodels.tsa.stattools import adfuller

# Function to perform ADF test and print results
def perform_adf_test(series, instrument_index):
    result = adfuller(series)
    print(f'p-value for Instrument {instrument_index}: {result[1]}')

for i in range(nInst):
    # Get the price series for the specific instrument
    price_series = prcAll[i, :]
    # Perform the ADF test on the price series
    perform_adf_test(price_series, i)