## 1. Viewing in real time the value of Dow Jones Industrial Average (^DJI) stock market index, Last Prices of shares and their Volume for each company

In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#! pip install lxml

In [309]:
import requests 
#! pip install bs4
from bs4 import BeautifulSoup

#Get the data from Yahoo Finance website and create a BeautifulSoup object
response = requests.get('https://finance.yahoo.com/quote/%5EDJI/components?p=%5EDJI')
soupObj = BeautifulSoup(response.text, 'html')

#Retrieving the columns we need from the website and put them into different lists
#Symbol,Company Name, Last Price, Volume of shares

last_price = []
company = []
volume = []
symbol = []

#Identify the table in url 
table = soupObj.find("table")

#looping through table and append the values to our lists
for row in table.find_all("tr")[1:]:
    col = row.find_all("td")
    symbol.append(str(col[0].text))
    company.append(str(col[1].text))
    last_price.append(float(col[2].text))
    volume.append(int(col[5].text.replace(',', '')))
    
# Data frame with columns from web scrapping
components = pd.DataFrame({"Symbol":symbol, "Company Name": company, "Last Price": last_price, "Volume":volume})
components.head()

Unnamed: 0,Symbol,Company Name,Last Price,Volume
0,UNH,UnitedHealth Group Incorporated,334.64,2568915
1,HON,Honeywell International Inc.,206.58,2206213
2,JNJ,Johnson & Johnson,159.02,7611355
3,MCD,McDonald's Corporation,208.67,3016563
4,GS,"The Goldman Sachs Group, Inc.",330.94,2566140


In [310]:
# Real time value of ^DJI
djia = float(soupObj.select("div span[data-reactid*='32']")[0].text.replace(',', ''))
djia

31391.52

### Since August 31, 2020, the Dow Divisor is 0.15198707565833

In [311]:
#Since August 31, 2020, the Dow Divisor is 0.15198707565833
div = 0.15198707565833

# New value of ^DJI based on Last Price in real time
new_index_value = sum(components["Last Price"][:30])/div
new_index_value

31391.484962349878

In [312]:
from bokeh.io import show, output_notebook
from bokeh. plotting import figure
from bokeh.models import  ColumnDataSource, LabelSet 

# Configure the default output state to generate output in notebook cell
output_notebook()

In [313]:
#Creating the figure 
p = figure(plot_width=600, plot_height=450,
           title = "Last Price vs Volume for DJI components",
           toolbar_location = 'right',
           tooltips = [("Company Name", "@{Company Name}"), ("Last Price", "@{Last Price}"),("Volume", "@Volume")])

# Creating and adjusting the scatter
p.scatter('Last Price',
         'Volume',
         source = components,
         fill_alpha = 0.6,fill_color = 'green',color = 'green', size = 10 )

# Adding dataframe `components` as source of columns
source = ColumnDataSource(components)

# Adding labels to points an customizing them
labels = LabelSet(x = 'Last Price', y = 'Volume',text ='Symbol', level = 'glyph',
                  text_font_size='7pt', x_offset = 5, y_offset = 5,
                  source = source, 
                 render_mode = 'canvas')

#Customizing scatter
p.add_layout(labels)
p.xaxis.axis_label = 'Last Price'
p.yaxis.axis_label = 'Volume'
p.left[0].formatter.use_scientific = False
show(p)

## 2. The calculation of the DJI stock index for yesterday and comparing it with its value on the day before yesterday

In [314]:
# Creating an empty DataFrame
all_df = pd.DataFrame()

#Indicating the previous day
previous_day = datetime.datetime.today() - datetime.timedelta(days=1)

#Looping through every `Symbol` and download data for every company which is a component of the DJI index 
for i in components["Symbol"].tolist():
    data = yf.download(i, start = previous_day)
    data['Symbol'] = i
    all_df = all_df.append(data)
    


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [315]:
# The actual index of the df is Date, so I reset it and make Date as a ordinary column
new_df = all_df.reset_index()

# The data has duplicates, so I delete them
new_df.drop_duplicates(subset = ['Date', 'Symbol'], keep = 'first',inplace=True, ignore_index= True)

# Sorted data by Symbol
new_df.sort_values(by=['Symbol'], inplace = True)
new_df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Symbol
28,2021-03-02,128.410004,128.720001,125.010002,125.120003,125.120003,102015300,AAPL
17,2021-03-02,227.0,227.889999,225.279999,225.690002,225.690002,2407500,AMGN
9,2021-03-02,138.240005,140.190002,138.080002,139.25,139.25,2928000,AXP
16,2021-03-02,224.789993,227.190002,222.149994,223.139999,223.139999,11094500,BA
25,2021-03-02,218.690002,219.339996,215.009995,215.820007,215.820007,2851700,CAT


In [316]:
#Define the ticker symbol 
tickerData = yf.Ticker('^DJI')

# Indicating the day before yesterday 
day_before_yesterday = datetime.datetime.today() - datetime.timedelta(days=2)

# historical data for DJI on the day before yesterday 
dataDJ = tickerData.history(start = day_before_yesterday)

# Value of DJI on the day before yesterday
djia_day_before_yesterday = dataDJ.Close[0]

#Printing the result
print(f'The value of ^DJI on {day_before_yesterday.year}-{day_before_yesterday.month}-{day_before_yesterday.day} is {round(djia_day_before_yesterday,2)}.')


The value of ^DJI on 2021-3-1 is 31535.51.



$\text{New DJI} = \displaystyle\frac{\sum \limits _{i=0} ^{n} P_{i}}{D}$


$Divisor =  0.15198707565833 $

### Since August 31, 2020, the Dow Divisor is 0.15198707565833

In [317]:
div = 0.15198707565833

#Sum the values of the Adj Price for yesterday and divide it by Divisor
prv_dji = sum(new_df["Adj Close"])/div

#Printing the result
print(f'The value of ^DJI on {previous_day.year}-{previous_day.month}-{previous_day.day} is {round(prv_dji,2)}.')

The value of ^DJI on 2021-3-2 is 31391.55.


In [318]:
# r = requests.get('https://finance.yahoo.com/quote/%5EDJI?p=%5EDJI')
# soup = BeautifulSoup(r.text, 'lxml')

# #Retrieving the previos value of DJI (from the previous day)
# previous_dji = float(soup.findChild('td','Ta(end) Fw(600) Lh(14px)').text.replace(',', ''))
# previous_dji