In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interactive,interact, HBox, Layout,VBox
from IPython.display import display, clear_output
import os


CSI_Header = ['Date', 'Source', 'Source-ID', 'Open', 'High', 'Low', 'AdjustedClose', 'Close', 'Volume', 'Contract']
data_files = os.listdir('./data')
subset = []
date_col = "Date"
ret_col = "LRN"
style = {'description_width': 'initial'}

In [2]:
def dfReader(file):
    global df, fileName
    fileName = file
    file_path = "./data/" + fileName
    # load csv into a dataframe
    df = pd.read_csv(file_path, names=CSI_Header)
    # Ensure the date column is in datetime format
    df[date_col] = pd.to_datetime(df[date_col])
    df.set_index(date_col, inplace=True)
    

In [3]:
def getColumns(dependentVar, windowSize):
    global value_col
    value_col = dependentVar
    # Calculate additional columns of interest
    df["SMA"] = df[value_col].rolling(window=windowSize).mean()
    df["EWMA"] = df[value_col].ewm(span=windowSize, adjust=False).mean()
    df["SRN"] = df[value_col].pct_change()
    df["LRN"] = np.log(df[value_col] / df[value_col].shift(1))
    df['cum_SRN'] = df["SRN"].cumsum()
    df['cum_LRN'] = df["LRN"].cumsum()
    
    
    ## Exponential moving average strategy calcualtions
    df["EWMA_100"] = df[value_col].ewm(span=100, adjust=False).mean()
    df['Position'] = np.where(df['EWMA_100'] > df['EWMA'], 1, 0) #1 if long, 0 if flat
    df['Position'] = df['Position'].shift()
    df['StrategyPct'] = df[value_col].pct_change(1) * df['Position']
    df['Strategy'] = (df['StrategyPct'] + 1).cumprod()
    df['BuyHold'] = (df[value_col].pct_change(1) + 1).cumprod()
    
    
    ## Simple moving average strategy calcualtions
    df["SMA100"] = df[value_col].rolling(window=100).mean()
    df['PositionSMA'] = np.where(df['SMA'] > df['SMA100'], 1, 0)
    df['PositionSMA'] = df['PositionSMA'].shift()
    df['StrategyPctSMA'] = df[value_col].pct_change(1) * df['PositionSMA']
    df['StrategySMA'] = (df['StrategyPctSMA'] + 1).cumprod()
    df['BuyHoldSMA'] = (df[value_col].pct_change(1) + 1).cumprod()
    

In [4]:
def plot_timeseries(MA_cols, strat_cols):
    
    clear_output(wait=True)
    # plot the main variable, plus additional moving averages or strategies
    plotCols = [value_col]
    plotCols.pop()
    
    for i in range (len(MA_cols)):
        plotCols.append(MA_cols[i])
    for j in range (len(strat_cols)):
        plotCols.append(strat_cols[j])
        
    subset[plotCols].plot(figsize=(10,6))    
    plt.title('Time Series Plot for ' + fileName)
    plt.xlabel(date_col)
    plt.ylabel(value_col)
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    

In [5]:
def timeSeries(dependentVar, windowSize, MA_cols, strat_cols, start, end):
    global subset
    subset = df[start:end]
    getColumns(dependentVar, windowSize)
    plot_timeseries(MA_cols, strat_cols)
    

# Select file to analyse

In [6]:
# FILE SELECT WIDGET - has to run before rest of processing
file_select = widgets.Dropdown(options=data_files, value=data_files[0], description="Select file", disabled=False)
widgets.interactive(dfReader, file=file_select)


interactive(children=(Dropdown(description='Select file', options=('CSI_C_1.csv', 'CSI_ES_1.csv', 'CSI_HG_1.cs…

In [7]:
# WIDGETS
widget_info = widgets.Text(value="Use shift-click to multi-select moving averages", style=style, disabled=False)
value_widget = widgets.Dropdown(options=CSI_Header[3:len(CSI_Header)-1], value='Close', description="Select variable", style=style, disabled=False)
window_slider = widgets.IntSlider(min=10, max=500, step=1, description="MA Window Size", value=50, style=style)
MA_select = widgets.SelectMultiple(options=["SMA","EWMA"], rows=2, value=[], description="Select MA", disabled=False)
strat_widget = widgets.SelectMultiple(options=["Strategy", "BuyHold", "StrategySMA", "BuyHoldSMA"], rows=4, value=[], description="Select strategies", style=style, disabled=False)
start_widget = widgets.DatePicker(description="Start Date", value=df.index[0])
end_widget = widgets.DatePicker(description="End Date", value=df.index[-1])

display(widget_info)
widgets.interactive(timeSeries, dependentVar=value_widget, windowSize=window_slider, MA_cols=MA_select, strat_cols=strat_widget, start=start_widget, end=end_widget)
#HBox([value_widget, window_slider, MA_select, start_widget, end_widget]) - horizontal widget layout, not ideal

Text(value='Use shift-click to multi-select moving averages', style=TextStyle(description_width='initial'))

interactive(children=(Dropdown(description='Select variable', index=4, options=('Open', 'High', 'Low', 'Adjust…

In [8]:
def plot_cummulative_returns(start_date, end_date):
    
    subset = df[start_date:end_date]
    clear_output(wait=True)
    subset['cum_LRN'].plot(figsize=(10, 5))
    plt.title('Cumulative Return Plot for ' + fileName)
    plt.xlabel('Date')
    plt.ylabel("cum_LRN")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

start_widget = widgets.DatePicker(description='Start Date', value=df.index[0])
end_widget = widgets.DatePicker(description='End Date', value=df.index[-1])

widgets.interactive(plot_cummulative_returns, start_date=start_widget, end_date=end_widget)

interactive(children=(DatePicker(value=Timestamp('2000-01-03 00:00:00'), description='Start Date', step=1), Da…

In [9]:
# Function to plot based on date range
def plot_distribution(start_date, end_date):
    subset = df[start_date:end_date]
    clear_output(wait=True)
    sns.histplot(subset['LRN'], kde=True)
    plt.title(f'Distribution of LRN from {start_date} to {end_date}')
    plt.xlabel('LRN')
    plt.ylabel('Density')
    plt.show()
    subset.describe()

# Interactive widgets
start_widget = widgets.DatePicker(description='Start Date', value=df.index[0])
end_widget = widgets.DatePicker(description='End Date', value=df.index[-1])
widgets.interactive(plot_distribution, start_date=start_widget, end_date=end_widget)


interactive(children=(DatePicker(value=Timestamp('2000-01-03 00:00:00'), description='Start Date', step=1), Da…

In [10]:
subset.describe()

Unnamed: 0,Source-ID,Open,High,Low,AdjustedClose,Close,Volume
count,5909.0,5909.0,5909.0,5909.0,5909.0,5909.0,5909.0
mean,4.0,405.186664,407.583347,402.643595,405.130733,392.754739,22265.739211
std,0.0,160.87846,161.736171,159.894118,160.83932,124.45235,22928.38467
min,4.0,186.0,188.0,185.5,187.25,112.0,0.0
25%,4.0,255.5,257.5,253.25,255.25,301.5,7361.0
50%,4.0,373.75,375.0,372.25,373.75,397.75,13393.0
75%,4.0,509.0,515.0,506.0,509.25,473.75,30932.0
max,4.0,838.75,840.0,832.5,838.75,741.5,202092.0
