# Notebook

This Notebook provides the basic methodology for making Stock Returns Distributions using python.

<img src="/static/images/normalstandard.png" alt="normal distribution"></img>

## Packages
* https://pypi.org/project/pandas/
* https://pypi.org/project/plotly/
* https://pypi.org/project/yfinance/

## Methodology
1. Get OHLC data
2. Transform and Manipulate data
3. Display data

**17th January 2024**

*By Jamal N.C Morris*

In [1]:
# Import python packages

# yf Gives handles data retrieval
# pd Transforms and Manipulates the OpenHighLowClose data
# px Is a graphing library for graphing the data

import pandas as pd
import plotly.express as px
import yfinance as yf

In [2]:
# User defined variable

# Select the stock ticker, for example AAPL is Apple inc's stock ticker.
# Find Stock Tickers @ https://www.investing.com/stock-screener/ .

stock_ticker = input("Enter a Stock Ticker: ").upper()
print(stock_ticker)

SPY


In [3]:
# Data Retrieval

# This connects to the https://finance.yahoo.com/ unofficial API
# GET the historical daily OHLC data for a specified stock over the last 10 years
# Then the Oldest and Newest data-points are printed, to provide a visual check

def data_retrieval(stock_ticker=stock_ticker, time_horizon="10y", time_frame="1d"):
    '''
        This function take the following arguments:
            stock_ticker: This is the company identifier which the user has input.
            time_horizon: Is the length of the time series data, aka how far back in time the dataset is.
            time_frame: Is the interval of each data point.

        The function returns OHLC data for the specified stock over the last 10 years
    '''
    data = yf.Ticker(f"{stock_ticker}").history(period=time_horizon, interval=time_frame)
    return data

data = data_retrieval()

print(data['Close'].tail(1))
print("")
print(data['Close'].head(1))

Date
2024-01-17 00:00:00-05:00    472.290009
Name: Close, dtype: float64

Date
2014-01-21 00:00:00-05:00    153.720779
Name: Close, dtype: float64


In [4]:
# Transform and Manipulate data

# This ensure the data is transformed into a Pandas DataFrame of just the columns we require (THE CLOSING PRICE)
# It also adds columns for the % returns over; 1D: 1 Day, 1W: 1 Week, 1M: 1 Month, 1Q: 1 Quarter, 1Y: 1 Year
# We also calculate the σ Sigma: Z-Score of each return, Z-Score basically shows us the extremity of each data-point
# By calculating the Sigma we can understand where each data-point lies on the distribution
# For example a sigma of 1 would mean the return is 1 standard deviation, Z-Score is a good way to standardize data
# It gives us historical context for each data-point and provides a comparable base

data = pd.DataFrame(data['Close'])

data["Returns 1D"] = data['Close'].pct_change(periods=1) * 100
data["Returns 1D σ"] = (data['Returns 1D'] - data['Returns 1D'].mean()) / data['Returns 1D'].std()

data["Returns 1W"] = data['Close'].pct_change(periods=5) * 100
data["Returns 1W σ"] = (data['Returns 1W'] - data['Returns 1W'].mean()) / data['Returns 1W'].std()

data["Returns 1M"] = data['Close'].pct_change(periods=20) * 100
data["Returns 1M σ"] = (data['Returns 1M'] - data['Returns 1M'].mean()) / data['Returns 1M'].std()

data["Returns 1Q"] = data['Close'].pct_change(periods=60) * 100
data["Returns 1Q σ"] = (data['Returns 1Q'] - data['Returns 1Q'].mean()) / data['Returns 1Q'].std()

data["Returns 1Y"] = data['Close'].pct_change(periods=240) * 100
data["Returns 1Y σ"] = (data['Returns 1Y'] - data['Returns 1Y'].mean()) / data['Returns 1Y'].std()

data.tail(10)

Unnamed: 0_level_0,Close,Returns 1D,Returns 1D σ,Returns 1W,Returns 1W σ,Returns 1M,Returns 1M σ,Returns 1Q,Returns 1Q σ,Returns 1Y,Returns 1Y σ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-01-03 00:00:00-05:00,468.790009,-0.816669,-0.785567,-1.442234,-0.743363,3.065681,0.467283,9.58017,1.015105,22.456512,0.831102
2024-01-04 00:00:00-05:00,467.279999,-0.322108,-0.337689,-1.937003,-0.960152,2.753953,0.395954,8.532359,0.856057,19.829759,0.631612
2024-01-05 00:00:00-05:00,467.920013,0.136966,0.078051,-1.839768,-0.917548,3.311008,0.523417,8.118276,0.793203,18.571196,0.53603
2024-01-08 00:00:00-05:00,474.600006,1.427593,1.246851,-0.149374,-0.176878,3.992364,0.679322,9.214387,0.959582,20.393126,0.674397
2024-01-09 00:00:00-05:00,473.880005,-0.151707,-0.183373,0.260237,0.002599,3.390113,0.541518,9.717589,1.035964,20.165438,0.657106
2024-01-10 00:00:00-05:00,476.559998,0.565542,0.466172,1.657456,0.61481,3.571979,0.583131,10.890418,1.213988,19.531327,0.608948
2024-01-11 00:00:00-05:00,476.350006,-0.044064,-0.085891,1.941022,0.739058,3.05566,0.46499,9.687485,1.031394,19.204767,0.584147
2024-01-12 00:00:00-05:00,476.679993,0.069274,0.016748,1.87211,0.708864,1.724262,0.160345,9.768507,1.043692,20.803049,0.705529
2024-01-16 00:00:00-05:00,474.929993,-0.367123,-0.378455,0.069529,-0.080962,1.026579,0.000704,10.842512,1.206716,18.615495,0.539394
2024-01-17 00:00:00-05:00,472.290009,-0.555868,-0.549384,-0.335527,-0.258444,0.630691,-0.089881,11.203454,1.261503,16.715728,0.395116


In [5]:
# Graph the data and show the standard deviation ranges

# It produces a histogram (Probability Distribution) of the defined time frames (1D, 1W, 1M, 1Q, 1Y)
# Also produces the +- Standard Deviations and the Mean Average of returns

# NOTE: The y axis is % probability so must be *100 for the actual number

def distribution_of_returns(time="1D"):
    chart = px.histogram(data, x=f"Returns {time}", histnorm='probability density', title=f"{stock_ticker} % Returns Probability Distribution")
    chart.update_layout(
        paper_bgcolor='#ffffff',  # Transparent background
        plot_bgcolor='#ffffff'  # Light gray background in the plot area
    )
    chart.show()
    print(f"{-data[f"Returns {time}"].std()*3:.2f} σ -3")
    print(f"{-data[f"Returns {time}"].std()*2:.2f} σ -2")
    print(f"{-data[f"Returns {time}"].std():.2f} σ -1")
    print(f" {data[f"Returns {time}"].mean():.2f} μ")
    print(f" {data[f"Returns {time}"].std():.2f} σ 1")
    print(f" {data[f"Returns {time}"].std()*2:.2f} σ 2")
    print(f" {data[f"Returns {time}"].std()*3:.2f} σ 3")


In [6]:
distribution_of_returns(time="1D")

-3.31 σ -3
-2.21 σ -2
-1.10 σ -1
 0.05 μ
 1.10 σ 1
 2.21 σ 2
 3.31 σ 3


In [7]:
distribution_of_returns(time="1W")

-6.85 σ -3
-4.56 σ -2
-2.28 σ -1
 0.25 μ
 2.28 σ 1
 4.56 σ 2
 6.85 σ 3


In [8]:
distribution_of_returns(time="1M")

-13.11 σ -3
-8.74 σ -2
-4.37 σ -1
 1.02 μ
 4.37 σ 1
 8.74 σ 2
 13.11 σ 3


In [9]:
distribution_of_returns(time="1Q")

-19.76 σ -3
-13.18 σ -2
-6.59 σ -1
 2.89 μ
 6.59 σ 1
 13.18 σ 2
 19.76 σ 3


In [10]:
distribution_of_returns(time="1Y")

-39.50 σ -3
-26.33 σ -2
-13.17 σ -1
 11.51 μ
 13.17 σ 1
 26.33 σ 2
 39.50 σ 3
