In [41]:
import pandas as pd

file_name = 'canada.csv'

df = pd.read_csv(file_name)[['Date','Close']]

# For excel files:
# df = pd.read_excel(file_name, usecols=['Date', 'Close'])

df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Close'] = pd.to_numeric(df['Close'], errors='coerce')

df.dropna(inplace=True)

df.sort_values(by='Date', ascending=True, inplace=True)

df

Unnamed: 0,Date,Close
0,1999-12-31,8413.799805
1,2000-01-03,8413.799805
2,2000-01-04,8202.599609
3,2000-01-05,8119.399902
4,2000-01-06,8114.200195
...,...,...
4814,2018-12-20,14141.799805
4815,2018-12-21,13935.400391
4816,2018-12-24,13780.200195
4817,2018-12-27,14165.200195


In [42]:
# Calculate Log_Returns
close_values = df['Close'].tolist()

import math

Log_Returns = []

for i in range (1, len(close_values)):
    LogReturn = math.log(close_values[i]) - math.log(close_values[i-1])
    Log_Returns.append(LogReturn)

Log_Returns

[0.0,
 -0.025422062286262204,
 -0.01019488176805794,
 -0.0006406104803300394,
 0.03811000422113153,
 0.020327217200705405,
 -0.018762586962118633,
 -0.009831844630770803,
 0.006902010323599939,
 -0.007201098108092552,
 0.014468536917496166,
 0.01293595752767196,
 0.005271413269902325,
 0.005520372259157469,
 -0.0055434432493157715,
 -0.020604879504251272,
 0.005505716534722538,
 0.009023839273700318,
 0.007336778998183746,
 -0.029985389591319134,
 0.01075187052184745,
 0.006370369897354067,
 0.02706931004878932,
 0.014142116961254558,
 0.03478109638524529,
 -0.005793603472236342,
 0.01569099490868986,
 -0.004914820197230441,
 0.0070843422772401965,
 -0.01778404601578032,
 0.0170864526843868,
 0.004456169454615022,
 0.007719086597738567,
 0.0036947116968573113,
 -0.017911881408702257,
 -0.00694134006579894,
 -0.0039400469774193425,
 0.012976979008636036,
 -0.010880210512999255,
 -0.007954104331970413,
 0.008952086333009035,
 -0.010287615958647578,
 0.03048676989706145,
 0.01104213043429

In [43]:
bin_no = 12

# Calculate frequency of each bin
min_val, max_val = min(Log_Returns), max(Log_Returns)

frequency_array = [0] * bin_no

L = min_val - 1

for i in range (bin_no):
    R = min_val + (max_val - min_val) / bin_no * (i + 1)

    for log_return in Log_Returns:
        if L < log_return <= R: # Check if value is in interval (L, R]
            frequency_array[i] += 1

    L = R

print(frequency_array)

[4, 3, 7, 23, 173, 1475, 2829, 241, 34, 5, 3, 1]


In [44]:
# Calculate probabilities
probabilities = [0] * len(frequency_array)
total_frequency = sum(frequency_array)

for i in range (len(frequency_array)):
    probabilities[i] = frequency_array[i] / total_frequency

probabilities

[0.0008336807002917883,
 0.0006252605252188412,
 0.0014589412255106293,
 0.004793664026677783,
 0.03605669028761984,
 0.30741975823259693,
 0.5896206752813672,
 0.05022926219258024,
 0.0070862859524802,
 0.0010421008753647354,
 0.0006252605252188412,
 0.00020842017507294707]

In [45]:
# Calculate Shannon-Entropy
Entropy = 0

for i in range (len(probabilities)):
    if probabilities[i] != 0:
        Entropy += (probabilities[i] * math.log2(probabilities[i]))

Entropy = -Entropy

print(f"Shannon Entropy of '{file_name}' is = {Entropy}")

Shannon Entropy of 'canada.csv' is = 1.4981006723988957
