# 1.1 Get started + import

In [44]:
!pip install plotly
!pip install scipy



In [43]:
# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

# Import Pandas, Numpy and Scipy
import pandas as pd
import numpy as np
from scipy.stats import norm

# Import plotly express
import plotly.express as px
import plotly.graph_objects as go
px.defaults.width, px.defaults.height = 1000, 600

11


# 1.2 Load GBPUSD Data

In [35]:
# Load the CSV file
df = pd.read_csv('CQF_Jan_24_M1L1_Excel-1.csv', index_col=0, parse_dates=True)['2020':'2023']
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-12-22,4753.919922,4772.939941,4736.77002,4754.629883,4754.629883,3046770000
2023-12-26,4758.859863,4784.720215,4758.450195,4774.75,4774.75,2513910000
2023-12-27,4773.450195,4785.390137,4768.899902,4781.580078,4781.580078,2748450000
2023-12-28,4786.439941,4793.299805,4780.97998,4783.350098,4783.350098,2698860000
2023-12-29,4782.879883,4788.430176,4751.990234,4769.830078,4769.830078,3126060000


In [36]:
# Visualize the plot to verify the data
px.line(df, y="Adj Close", title="SPY Historical Chart")

In [13]:
# Verify the datetime format
df.index

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2023-12-15', '2023-12-18', '2023-12-19', '2023-12-20',
               '2023-12-21', '2023-12-22', '2023-12-26', '2023-12-27',
               '2023-12-28', '2023-12-29'],
              dtype='datetime64[ns]', name='Date', length=3270, freq=None)

In [None]:
# Get last 300 index values
df.index[-300:]

# 1.3 Calculate return

In [46]:
# Calculate returns and add it to existing DataFrame as a column
df['Return'] = df['Adj Close'].pct_change().fillna(0)

# Get first 5 rows
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-02,3244.669922,3258.139893,3235.530029,3257.850098,3257.850098,3459930000,0.0
2020-01-03,3226.360107,3246.149902,3222.340088,3234.850098,3234.850098,3484700000,-0.00706
2020-01-06,3217.550049,3246.840088,3214.639893,3246.280029,3246.280029,3702460000,0.003533
2020-01-07,3241.860107,3244.909912,3232.429932,3237.179932,3237.179932,3435910000,-0.002803
2020-01-08,3238.590088,3267.070068,3236.669922,3253.050049,3253.050049,3726840000,0.004902


# 1.4 Calculate Mean, Sigma and Scaled Returns

In [47]:
# Calculate mean and sigma
mu = np.mean(df['Return'])
sigma = np.std(df['Return'])

# Calculate the scaled return : zscore
df['Scaled_Return'] = df['Return'].apply(lambda x: (x-mu)/sigma)

# Check the output
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Return,Scaled_Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-02,3244.669922,3258.139893,3235.530029,3257.850098,3257.850098,3459930000,0.0,-0.033455
2020-01-03,3226.360107,3246.149902,3222.340088,3234.850098,3234.850098,3484700000,-0.00706,-0.520925
2020-01-06,3217.550049,3246.840088,3214.639893,3246.280029,3246.280029,3702460000,0.003533,0.210518
2020-01-07,3241.860107,3244.909912,3232.429932,3237.179932,3237.179932,3435910000,-0.002803,-0.227013
2020-01-08,3238.590088,3267.070068,3236.669922,3253.050049,3253.050049,3726840000,0.004902,0.30505


# 1.5 Plot Histogram

In [52]:
# Generate some data (replace with your scaled returns data)
data = df['Scaled_Return']
data

Date
2020-01-02   -0.033455
2020-01-03   -0.520925
2020-01-06    0.210518
2020-01-07   -0.227013
2020-01-08    0.305050
                ...   
2023-12-22    0.081169
2023-12-26    0.258735
2023-12-27    0.065316
2023-12-28   -0.007895
2023-12-29   -0.228617
Name: Scaled_Return, Length: 1006, dtype: float64

In [54]:
# Create the normal distribution plot
x = np.linspace(data.min(), data.max(), 200)
y = norm.pdf(x, data.mean(), data.std()) # y = (1/np.sqrt(2*np.pi) * np.exp(-0.5*x**2))

# Create the histogram
fig = px.histogram(data, x=data, histnorm='probability density', nbins=200, labels={"x":""}, title='Empirical Vs Normal Distribution')
fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Normal Distribution'))  # red line to compare with standard normal distribution

# Show the plot
fig.show()  # lots of fat tails when zooming in on the "sides"