<a href="https://colab.research.google.com/github/bankehsaz/Box-Cox/blob/main/Box_Cox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.stats import boxcox
import yfinance as yf

In [2]:
# getting data from yahoo finance
df = yf.download(tickers='GLD', start='2023-01-01', end='2023-03-01')
df.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-02-22,170.809998,170.960007,169.559998,169.660004,169.660004,4105700
2023-02-23,169.839996,170.130005,169.080002,169.570007,169.570007,5073500
2023-02-24,168.419998,168.839996,168.190002,168.350006,168.350006,6496500
2023-02-27,168.880005,169.229996,168.550003,169.009995,169.009995,4509200
2023-02-28,168.779999,170.25,168.589996,169.779999,169.779999,6761600


In [3]:
# plotting the close price of SPDR Gold Shares (GLD)
px.line(df, x=df.index, y='Close')

In [4]:
# Sqrt Transformation
df['Sqrt-Close'] = np.sqrt(df['Close'])
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Sqrt-Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-22,170.809998,170.960007,169.559998,169.660004,169.660004,4105700,13.02536
2023-02-23,169.839996,170.130005,169.080002,169.570007,169.570007,5073500,13.021905
2023-02-24,168.419998,168.839996,168.190002,168.350006,168.350006,6496500,12.974976
2023-02-27,168.880005,169.229996,168.550003,169.009995,169.009995,4509200,13.000384
2023-02-28,168.779999,170.25,168.589996,169.779999,169.779999,6761600,13.029965


In [5]:
# plotting the Sqrt-Close of SPDR Gold Shares (GLD)
px.line(df, x=df.index, y='Sqrt-Close')

In [6]:
# Log Transformation
df['Log-Close'] = np.log(df['Close'])
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Sqrt-Close,Log-Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-02-22,170.809998,170.960007,169.559998,169.660004,169.660004,4105700,13.02536,5.133796
2023-02-23,169.839996,170.130005,169.080002,169.570007,169.570007,5073500,13.021905,5.133266
2023-02-24,168.419998,168.839996,168.190002,168.350006,168.350006,6496500,12.974976,5.126045
2023-02-27,168.880005,169.229996,168.550003,169.009995,169.009995,4509200,13.000384,5.129958
2023-02-28,168.779999,170.25,168.589996,169.779999,169.779999,6761600,13.029965,5.134503


In [7]:
# plotting the Log-Close of SPDR Gold Shares (GLD)
px.line(df, x=df.index, y='Log-Close')

In [8]:
# Box-Cox Transformation
data, lam = boxcox(df['Close'])
df['BoxCox-Close'] = data
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Sqrt-Close,Log-Close,BoxCox-Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-02-22,170.809998,170.960007,169.559998,169.660004,169.660004,4105700,13.02536,5.133796,0.243533
2023-02-23,169.839996,170.130005,169.080002,169.570007,169.570007,5073500,13.021905,5.133266,0.243533
2023-02-24,168.419998,168.839996,168.190002,168.350006,168.350006,6496500,12.974976,5.126045,0.243533
2023-02-27,168.880005,169.229996,168.550003,169.009995,169.009995,4509200,13.000384,5.129958,0.243533
2023-02-28,168.779999,170.25,168.589996,169.779999,169.779999,6761600,13.029965,5.134503,0.243533


In [9]:
# show lambda
lam

-4.10621495909559

In [10]:
# plotting the BoxCox-Close of SPDR Gold Shares (GLD)
px.line(df, x=df.index, y='BoxCox-Close')

In [11]:
# compare transformations metrics
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=2, start_cell="bottom-left", subplot_titles=("Close", "Sqrt-Close", "Log-Close", "BoxCox-Close"))

fig.add_trace(go.Scatter(x=df.index, y=df['Close']),
              row=1, col=1)

fig.add_trace(go.Scatter(x=df.index, y=df['Sqrt-Close']),
              row=1, col=2)

fig.add_trace(go.Scatter(x=df.index, y=df['Log-Close']),
              row=2, col=1)

fig.add_trace(go.Scatter(x=df.index, y=df['BoxCox-Close']),
              row=2, col=2)

fig.show()