# Importing Libraries

In [4]:
#!pip install pandas-datareader
#!pip install yfinance

Collecting yfinance
  Obtaining dependency information for yfinance from https://files.pythonhosted.org/packages/e6/b3/388ab967a387cc92926f70e97688dd9a7189b29a0773db815ffc5289e2b5/yfinance-0.2.31-py2.py3-none-any.whl.metadata
  Downloading yfinance-0.2.31-py2.py3-none-any.whl.metadata (11 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Using cached multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.3.8-py311-none-any.whl (14 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.0.tar.gz (2.9 MB)
     ---------------------------------------- 0.0/2.9 MB ? eta -:--:--
     ---------------------------------------- 0.0/2.9 MB ? eta -:--:--
     - -------------------------------------- 0.1/2.9 MB 2.0 MB/s eta 0:00:02
     -- ------------------------------------- 0.2/2.9 MB 2.4 MB/s eta 0:00:02
     --- ------------------------------------ 0.3/2.9 MB 1.8 MB/s eta 0:00:02
     ---- -----------------------

In [5]:
from pandas_datareader import data as pdr
import yfinance as yf
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import random as rd
import statsmodels
from datetime import datetime
yf.pdr_override()

# Method 1 - Getting data from Yahoo Finance

In [None]:
# y_symbols = ['PG']
# startdate = datetime(1995,1,1)
# enddate = datetime(2022,12,31)
# pg = pdr.get_data_yahoo(y_symbols, start=startdate) #end= enddate) - to specify end date use this
# pg

# Method 2 - Getting data from Yahoo Finance

In [None]:
# PcG = yf.download("PG", start = "1995-01-01") # end = "2022-12-31") - to specify end date use this
# PcG
# pg.info()

# Method 3 - Getting data for multiple stocks from Yahoo Finance

In [6]:
tickers = ['PG','MSFT','T','F','GE']
startdate = datetime(1995,1,1)
enddate = datetime(2022,12,31)
pf =pd.DataFrame()
for t in tickers:
    pf[t] = pdr.get_data_yahoo(t, start=startdate)['Adj Close']

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [7]:
pf.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7248 entries, 1995-01-03 to 2023-10-16
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   PG      7248 non-null   float64
 1   MSFT    7247 non-null   float64
 2   T       7248 non-null   float64
 3   F       7248 non-null   float64
 4   GE      7248 non-null   float64
dtypes: float64(5)
memory usage: 339.8 KB


In [None]:
h1 = pf.head()
t1 = pf.tail()
print(h1)
print(t1)

In [None]:
pf.head()

# Normalization

In [None]:
#dividing data from the first row with each and every subsequent rows normalizes the data to 100, which means 
# with first row assumed as 100 ( all stock prices start at price 100) how the subsequent day's stock prices 
# moved over a period of time relative to first date.
(pf / pf.iloc[0] * 100).plot(figsize = (15,7))
plt.show()

#  Without Normalization

In [None]:
# Without normalization the chart looks like below
pf.plot(figsize = (15,7))
plt.show()

# Calculating returns of a Portfolio of Securities

In [None]:
returns = (pf / pf.shift(1)) - 1
returns.head()

In [None]:
# assuming weights of the stocks in the portfolio are equally distributed 
weights = np.array([0.25, 0.25, 0.25, 0.25, 0.25])

In [None]:
np.dot(returns, weights)

In [None]:
annual_returns = returns.mean() * 250
annual_returns

In [None]:
pfolio_1 = str(round(np.dot(annual_returns, weights), 4) * 100) + ' %'
print(pfolio_1)

# Calculating log returns

In [None]:
# We are using logarithmic return since we are examining each company separately

sec_returns = np.log(pf / pf.shift(1))

In [None]:
sec_returns.head()

# Annualized Returns for each security in a Portfolio

In [None]:
# Annualized returns for each security
ann_returns = sec_returns.mean() * 250
print(ann_returns)

# Annualized Variance for each security in a Portfolio

In [None]:
# Annualized variance for each security
ann_var = sec_returns.var() * 250
print(pf_var)

# Annualized Standard Deviation (SD) for each security in a Portfolio

In [None]:
# Annualized standard deviation for each security
ann_std = sec_returns.std() * 250 ** 0.5
print(ann_std)

In [None]:
pg = ann_std['PG'] ** 2
print(pg)

# Portfolio Covariance Matrix

In [None]:
# Covirance Matric of the portfolio
cov_mx = sec_returns.cov()
print(cov_mx)

# Portfolio Correlation Matrix

In [None]:
# Correlation Matrix of the portfolio
corr_mx = sec_returns.corr()
print(corr_mx)

# Returns, Annualized returns, SD and Annualized SD for PG

In [None]:
# Daily Average Return
mean = str(round(sec_returns['PG'].mean(), 4) * 100) + ' %'
print(mean)

# Annualized Average Return
ann_mean = str(round(sec_returns['PG'].mean() * 250, 4) * 100) + ' %'
print(ann_mean)

# Daily Standard Deviation
sd = str(round(sec_returns['PG'].std(), 4) * 100) + ' %'
print(sd)

# Annualized Standard Deviation
ann_sd = str(round(sec_returns['PG'].std() * 250 ** 0.5, 4) * 100) + ' %'
print(ann_sd)

# Proctor & Gamble (pg) Summary Statistics

In [None]:
print(" SUMMARY STATISTICS\n"
     "-------------------\n",
      "Mean               =  {}\n".format(mean),
      "Annualized Mean    =  {}\n".format(ann_mean),
      "Standard Deviation =  {}\n".format(sd),
      "Annualized SD      =  {}\n".format(ann_sd)
     )

# Portfolio Summary Statistics

In [None]:
print(" SUMMARY STATISTICS\n"
     "-------------------\n",
    "Annualized Mean:\n",
      ann_returns,
      "\n",
      "\n",
     "Annualized Variance:\n",
      pf_var,
      "\n",
      "\n",
     "Annualized SD:\n",
      ann_std,
     "\n",
     "\n",
     "Covariance Matrix:\n",
      cov_mx,
     "\n",
     "\n",
     "Correlation Matrix:\n",
      corr_mx
     )

# Weights of securities or funds in a Portfolio

In [None]:
# Manual listing of weights
# weights = np.array([0.5, 0.5])

# Dynamic listing of Weights
sec_weight = (1 / sec_returns.shape[1])
sec_weight

In [None]:
weights = np.repeat(sec_weight, sec_returns.shape[1])
weights

# Expected Portfolio Returns, Variance and Standard Deviation

In [None]:
# To get the Transpose weights vector, we need to use the .T notation after the name of the weights objects

exp_return = str(round(np.sum(weights * sec_returns.mean()) * 250, 4) * 100) + "%"
print("Portfolio Return = ", exp_return)

pf_var = np.dot(weights.T, np.dot(sec_returns.cov() * 250, weights))
print("Portfolio Variance = ", str(round(pf_var * 100, 2)) + "%")

pf_stdev = (np.dot(weights.T, np.dot(sec_returns.cov() * 250, weights))) ** 0.5
# or pf_stdev = np.sqrt(np.dot(weights.T, np.dot(sec_returns.cov() * 250, weights)))
# 
print("Portfolio Standard Deviation = ", str(round(pf_stdev * 100, 2)) + "%")


In [None]:
pf_sdev = np.dot(weights.T, np.dot(sec_returns.corr() * 250, weights))
pf_sdev

# Regression on Portfolio

In [None]:
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
num_assets = len(tickers)
print(num_assets)

# Obtaining the Efficient Frontier in Python

In [None]:
# Creating a hypothetical portfolio of returns and volatilities

pf_returns = []
pf_vols = []

for x in range (1000) :
    weights = np.random.random(num_assets)
    weights /= np.sum(weights)
    pf_returns.append(np.sum(weights * sec_returns.mean()) * 250)
    pf_vols.append(np.sqrt(np.dot(weights.T, np.dot(sec_returns.cov() * 250, weights))))
    
    
pf_returns = np.array(pf_returns)
pf_vols = np.array(pf_vols)

pf_returns, pf_vols