# Webscraping some stock market data

## Libraries

In [1]:
import requests
from bs4 import BeautifulSoup

## Scraping Daily FTSE 100 Data From London Stock Exchange Website

In [2]:
# get website for the ftse 100
url = 'https://www.londonstockexchange.com/indices/ftse-100/constituents/table'
website = requests.get(url).text
soup = BeautifulSoup(website, 'lxml')

In [3]:
# check if access to website
requests.get(url)

<Response [200]>

#### Company code, name, currency, market cap, price, change, change %

In [4]:
# create empty list to append to
code, name, currency, mc, price, change, p_change = ([] for i in range(7))

In [5]:
# get all data
company_code = soup.find_all(class_='clickable bold-font-weight instrument-tidm gtm-trackable td-with-link')
company_name = soup.find_all(class_='clickable instrument-name gtm-trackable td-with-link')
company_currency = soup.find_all(class_='instrument-currency hide-on-landscape')
company_mc = soup.find_all(class_='instrument-marketcapitalization hide-on-landscape')
company_price = soup.find_all(class_='instrument-lastprice')
company_change = soup.find_all(class_='instrument-netchange hide-on-landscape positivechange')
company_p_change = soup.find_all(class_='instrument-percentualchange hide-on-landscape gtm-trackable')

In [6]:
# append to list
for x in range(len(company_code)):
    code.append(company_code[x].text)
    name.append(company_name[x].text)
    currency.append(company_currency[x].text)
    mc.append(company_mc[x].text)
    price.append(company_price[x].text)
    change.append(company_change[x].text)
    p_change.append(company_p_change[x].text)

In [7]:
# print data to check
print(code)
print(name)
print(len(code))
print(len(name))

['IHG', 'RR.', 'IAG', 'WTB', 'LAND', 'FLTR', 'CPG', 'MRO', 'HLMA', 'SKG', 'LLOY', 'ENT', 'TW.', 'ABF', 'INF', 'PSN', 'LGEN', 'RKT', 'III', 'SGRO']
['INTERCONTINENTAL HOTELS GROUP PLC ORD 20 340/399P', 'ROLLS-ROYCE HOLDINGS PLC ORD SHS 20P', 'INTL CONSOLIDATED AIRLINES GROUP SA ORD EUR0.10 (CDI)', 'WHITBREAD PLC ORD 76 122/153P', 'LAND SECURITIES GROUP PLC ORD 10 2/3P', 'FLUTTER ENTERTAINMENT PLC ORD EUR0.09 (CDI)', 'COMPASS GROUP PLC ORD 11 1/20P', 'MELROSE INDUSTRIES PLC ORDS 160/21P', 'HALMA PLC ORD 10P', 'SMURFIT KAPPA GROUP PLC ORD EUR0.001 (CDI)', 'LLOYDS BANKING GROUP PLC ORD 10P', 'ENTAIN PLC ORD EUR0.01', 'TAYLOR WIMPEY PLC ORD 1P', 'ASSOCIATED BRITISH FOODS PLC ORD 5 15/22P', 'INFORMA PLC ORD 0.1P', 'PERSIMMON PLC ORD 10P', 'LEGAL & GENERAL GROUP PLC ORD 2 1/2P', 'RECKITT BENCKISER GROUP PLC ORD 10P', '3I GROUP PLC ORD 73 19/22P', 'SEGRO PLC ORD 10P']
20
20


#### Now loop through all the pages

In [8]:
import re # add regex to as one of the classes slightly differ, so had to make sure it gets all of them

In [9]:
# create empty list to append to
code, name, currency, mc, price, change, p_change = ([] for i in range(7))

In [10]:
# create a function to scrape the data, which will be used over 5 different pages
def data_collection():
    # get all data
    company_code = soup.find_all(class_=re.compile('clickable bold-font-weight instrument-tidm gtm-trackable td-with-link'))
    company_name = soup.find_all(class_=re.compile('clickable instrument-name gtm-trackable td-with-link'))
    company_currency = soup.find_all(class_=re.compile('instrument-currency hide-on-landscape'))
    company_mc = soup.find_all(class_=re.compile('instrument-marketcapitalization hide-on-landscape'))
    company_price = soup.find_all(class_=re.compile('instrument-lastprice'))
    company_change = soup.find_all(class_=re.compile('instrument-netchange hide-on-landscape'))
    company_p_change = soup.find_all(class_=re.compile('instrument-percentualchange hide-on-landscape gtm-trackable'))

    # append to list
    for x in range(len(company_code)):
        code.append(company_code[x].text)
        name.append(company_name[x].text)
        currency.append(company_currency[x].text)
        mc.append(company_mc[x].text)
        price.append(company_price[x].text)
        change.append(company_change[x].text)
        p_change.append(company_p_change[x].text)

In [11]:
# find out how many pages to iterate over
list_of_pages = soup.find_all('a', class_='page-number')
print(len(list_of_pages))

5


In [12]:
# loop through each website page
n = 1 # start iteration at page 1

while n < len(list_of_pages) + 1:
    url = f'https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page={n}'
    website = requests.get(url).text
    soup = BeautifulSoup(website, 'lxml')
    
    # run function to scrape
    data_collection()
    print(url)
    
    # count
    n += 1

https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page=1
https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page=2
https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page=3
https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page=4
https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page=5


In [13]:
# verify
print(len(code), len(change), len(name))
print(code)

100 100 100
['IHG', 'RR.', 'IAG', 'WTB', 'LAND', 'FLTR', 'CPG', 'MRO', 'HLMA', 'SKG', 'LLOY', 'ENT', 'TW.', 'ABF', 'INF', 'PSN', 'LGEN', 'RKT', 'III', 'SGRO', 'SBRY', 'ICP', 'BRBY', 'STAN', 'BDEV', 'FERG', 'BARC', 'AV.', 'NXT', 'SPX', 'PSH', 'DGE', 'REL', 'HSBA', 'NWG', 'BA.', 'SDR', 'BKG', 'BLND', 'PRU', 'ANTO', 'BP.', 'RIO', 'AAL', 'CRDA', 'SHEL', 'RTO', 'ITRK', 'LSEG', 'TSCO', 'AUTO', 'VOD', 'MNDI', 'AZN', 'HIK', 'EXPN', 'ABDN', 'SSE', 'WPP', 'KGF', 'MNG', 'SGE', 'STJ', 'SN.', 'PHNX', 'MGGT', 'CRH', 'JD.', 'BATS', 'ECM', 'DCC', 'NG.', 'SVT', 'RMG', 'ADM', 'SMIN', 'ULVR', 'BNZL', 'AHT', 'AVST', 'BME', 'EDV', 'SMDS', 'GSK', 'AAF', 'UU.', 'SMT', 'IMB', 'HL.', 'GLEN', 'PSON', 'RMV', 'HWDN', 'BT.A', 'AVV', 'CCH', 'OCDO', 'FRES', 'ITV', 'DPH']


#### Find out more information for each of the companies data extracted

By clicking on the name of each company, you can get: volume, turnover, ytd return, 1 year return, earnings per share, issue date

In [14]:
# load up soup again as it changed from the loop
url = 'https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page=1'
website = requests.get(url).text
soup = BeautifulSoup(website, 'lxml')

In [15]:
# get links for each company: you can get it by clicking on the name: the links have the code in them so use that
company_name_link = soup.find_all('a')
for link in company_name_link:
    if re.search(code[0], link.text):
        print(link.get('href'))

stock/IHG/intercontinental-hotels-group-plc


In [16]:
# empty array of links
links = []

# count initialisation from 0 - 100 codes
counter = 0

In [17]:
# loop through each name in the list obtained and find the corresponding link on its page
company_name_link = soup.find_all('a')

for x in range(int(len(code) / len(list_of_pages))):
    for link in company_name_link:
        if re.search(f'{code[counter]}', link.text):
            links.append(link.get('href'))
    counter += 1
           
# remove any duplicates
links = list(dict.fromkeys(links))

In [18]:
print(links)
print(len(links))
print(counter)

['stock/IHG/intercontinental-hotels-group-plc', 'stock/RR./rolls-royce-holdings-plc', 'stock/IAG/international-consolidated-airlines-group-s-a', 'stock/WTB/whitbread-plc', 'stock/LAND/land-securities-group-plc', 'stock/FLTR/flutter-entertainment-plc', 'stock/CPG/compass-group-plc', 'stock/MRO/melrose-industries-plc', 'stock/HLMA/halma-plc', 'stock/SKG/smurfit-kappa-group-plc', 'stock/LLOY/lloyds-banking-group-plc', 'stock/ENT/entain-plc', 'stock/TW./taylor-wimpey-plc', 'stock/ABF/associated-british-foods-plc', 'stock/INF/informa-plc', 'stock/PSN/persimmon-plc', 'stock/LGEN/legal-general-group-plc', 'stock/RKT/reckitt-benckiser-group-plc', 'stock/III/3i-group-plc', 'stock/SGRO/segro-plc']
20
20


#### Now loop through all the pages

In [19]:
# empty array of links
links = []

In [20]:
def more_data_collection(counter):
    # loop through each name in the list obtained and find the corresponding link on its page
    company_name_link = soup.find_all('a')

    for x in range(int(len(code) / len(list_of_pages))):
        for link in company_name_link:
            if re.search(f'{code[counter]}', link.text):
                links.append(link.get('href'))
        counter += 1

In [21]:
# loop through each website page
n = 1 # start iteration at page 1
z = 0 # iterate through 1-100 of the companies

while n < len(list_of_pages) + 1:
    url = f'https://www.londonstockexchange.com/indices/ftse-100/constituents/table?page={n}'
    website = requests.get(url).text
    soup = BeautifulSoup(website, 'lxml')
    
    # run function to scrape
    more_data_collection(z)
    
    # count
    n += 1
    z += 20
    
# remove any duplicates
links = list(dict.fromkeys(links))

In [22]:
x = 0
for link in links:
    print(f'{x}: {link}')
    x += 1

0: stock/IHG/intercontinental-hotels-group-plc
1: stock/RR./rolls-royce-holdings-plc
2: stock/IAG/international-consolidated-airlines-group-s-a
3: stock/WTB/whitbread-plc
4: stock/LAND/land-securities-group-plc
5: stock/FLTR/flutter-entertainment-plc
6: stock/CPG/compass-group-plc
7: stock/MRO/melrose-industries-plc
8: stock/HLMA/halma-plc
9: stock/SKG/smurfit-kappa-group-plc
10: stock/LLOY/lloyds-banking-group-plc
11: stock/ENT/entain-plc
12: stock/TW./taylor-wimpey-plc
13: stock/ABF/associated-british-foods-plc
14: stock/INF/informa-plc
15: stock/PSN/persimmon-plc
16: stock/LGEN/legal-general-group-plc
17: stock/RKT/reckitt-benckiser-group-plc
18: stock/III/3i-group-plc
19: stock/SGRO/segro-plc
20: stock/SBRY/sainsbury-j-plc
21: stock/ICP/intermediate-capital-group-plc
22: stock/BRBY/burberry-group-plc
23: stock/STAN/standard-chartered-plc
24: stock/BDEV/barratt-developments-plc
25: stock/FERG/ferguson-plc
26: stock/BARC/barclays-plc
27: stock/AV./aviva-plc
28: stock/NXT/next-plc
29:

In [23]:
# three links for LSEG. Clean data
final_link_list = []
for link in links:
    if re.search('^stock/', link):
        final_link_list.append(link)

In [24]:
len(final_link_list)

100

#### now that a list of the links have all been collected, use the links to get the relevant information

In [25]:
# list of new variables
volume, turnover, eps, issue_date, date = ([] for i in range(5))

In [26]:
# get page of each link
n = 0
url = f'https://www.londonstockexchange.com/{final_link_list[n]}'
website = requests.get(url).text
soup = BeautifulSoup(website, 'lxml')

In [27]:
print(url)

https://www.londonstockexchange.com/stock/IHG/intercontinental-hotels-group-plc


In [28]:
# get relevant parameters for the variables list
company_volume = soup.find(id='chart-table') # can only use id because of javascript
for x in company_volume:
    print(x)

 
 
 
<div _ngcontent-sc29="" class="chart-table-price-information"><div _ngcontent-sc29="" class="component-header"><h3 _ngcontent-sc29="" class="regular-font-size section-title"> Price information </h3><app-whats-this _ngcontent-sc29="" _nghost-sc26=""><!-- --><a _ngcontent-sc26="" class="whats-this" href="/help/whats-issuer-profile-overview#what-is-price-information-" target="_blank" title="What's this?">What's this?</a></app-whats-this></div><div _ngcontent-sc29="" class="flex-wrapper"><!-- --><!-- --><!-- --><app-index-item _ngcontent-sc29="" _nghost-sc30="" class="index-item"><!-- --><!-- --><span _ngcontent-sc30="" class="more-on-this"> Open price </span><!-- --><div _ngcontent-sc30="" class="bold-font-weight regular-font-size">5,020.00</div><!-- --><!-- --><!-- --><!-- --><!-- --><!-- --></app-index-item><!-- --><!-- --><!-- --><!-- --><app-index-item _ngcontent-sc29="" _nghost-sc30="" class="index-item"><!-- --><!-- --><span _ngcontent-sc30="" class="more-on-this"> Previous cl

In [29]:
# get relevant parameters for the variables list
# can only use id because of javascript, changing values: navigating down the tree
company_volume = soup.select('#chart-table > div > div:nth-child(2) > .index-item ') # gets what I need: change last number to navigate to different values
for x in company_volume:
    print(f'{x.text}\n')

 Open price 5,020.00

 Previous close price / date 5,192.00 / 14 April 2022

 Volume 859,263

 Turnover (on book) £22,694,359.08

 Market Main Market

 Instrument market cap (£m) 9,175.05

 Earnings per share 1.45

 Issue date 14 January 2019

 ISIN GB00BHJYC057

 SEDOL BHJYC05

 Market identifier code (MIC) XLON

 Country of share register GB

 Market segment SET1

 Trading service SETS



In [30]:
company_volume = soup.select('#chart-table > div:nth-child(1) > div:nth-child(2) > .index-item')[2].text.split(' ')[-1]
print(company_volume)

company_eps = soup.select('#chart-table > div:nth-child(3) > div:nth-child(2) > .index-item')[2].text.split(' ')[-1]
print(company_eps)

company_turnover =  soup.select('#chart-table > div:nth-child(1) > div:nth-child(2) > .index-item')[3].text.split(' ')[-1]
print(company_turnover)

company_issue_date = soup.select('#chart-table > div:nth-child(3) > div:nth-child(2) > .index-item')[3].text.split(' ')[3:]
company_issue_date = ' '.join(company_issue_date)
print(company_issue_date)

date_of_stock_data = soup.select('#ticker > div > div > div.flex-wrapper.ticker-fields > div.ticker-item.delay > div > .bold-font-weight')
for x in date_of_stock_data:
    print(x.text)

859,263
1.45
£22,694,359.08
14 January 2019
16.04.22 17:49:25


#### loop for every link in list

In [31]:
def variables():
    company_volume = soup.select('#chart-table > div:nth-child(1) > div:nth-child(2) > .index-item')[2].text.split(' ')[-1]
    volume.append(company_volume)

    company_eps = soup.select('#chart-table > div:nth-child(3) > div:nth-child(2) > .index-item')[2].text.split(' ')[-1]
    eps.append(company_eps)

    company_turnover =  soup.select('#chart-table > div:nth-child(1) > div:nth-child(2) > .index-item')[3].text.split(' ')[-1]
    turnover.append(company_turnover)

    company_issue_date = soup.select('#chart-table > div:nth-child(3) > div:nth-child(2) > .index-item')[3].text.split(' ')[3:]
    company_issue_date = ' '.join(company_issue_date)
    issue_date.append(company_issue_date)
    
    date_of_stock_data = soup.select('#ticker > div > div > div.flex-wrapper.ticker-fields > div.ticker-item.delay > div > .bold-font-weight')
    for x in date_of_stock_data:
        date.append(x.text)

In [32]:
for link in final_link_list:
    url = f'https://www.londonstockexchange.com/{link}'
    website = requests.get(url).text
    soup = BeautifulSoup(website, 'lxml')
    
    variables()
    
print('Data collected!')

Data collected!


In [33]:
print(len(volume))

100


#### Move lists to dataframe

In [34]:
import pandas as pd

In [35]:
df = pd.DataFrame({'Date':date, 'Code':code, 'Company Name':name, 'Currency': currency, 'Market Cap (million)': mc, 'Price': price, 'Change':change, 'Percentage Change':p_change, 'Volume':volume, 'Turnover':turnover, 'Earnings Per share':eps, 'Issue Date':issue_date })

In [36]:
df

Unnamed: 0,Date,Code,Company Name,Currency,Market Cap (million),Price,Change,Percentage Change,Volume,Turnover,Earnings Per share,Issue Date
0,16.04.22 17:49:26,IHG,INTERCONTINENTAL HOTELS GROUP PLC ORD 20 340/399P,GBX,9175.05,5192.00,206.00,4.13%,859263,"£22,694,359.08",1.45,14 January 2019
1,16.04.22 17:49:27,RR.,ROLLS-ROYCE HOLDINGS PLC ORD SHS 20P,GBX,7533.35,93.43,3.40,3.78%,53922130,"£33,602,364.86",-0.53,23 May 2011
2,16.04.22 17:49:28,IAG,INTL CONSOLIDATED AIRLINES GROUP SA ORD EUR0.1...,GBX,6967.59,145.48,5.18,3.69%,25606326,"£23,225,393.33",-1.96,02 February 2011
3,16.04.22 17:49:29,WTB,WHITBREAD PLC ORD 76 122/153P,GBX,5680.74,2915.00,103.00,3.66%,2633231,"£19,018,713.51",-4.82,08 January 2007
4,16.04.22 17:49:30,LAND,LAND SECURITIES GROUP PLC ORD 10 2/3P,GBX,5549.30,770.80,22.40,2.99%,2057500,"£12,771,069.43",-1.88,28 September 2017
...,...,...,...,...,...,...,...,...,...,...,...,...
95,16.04.22 17:51:19,CCH,COCA-COLA HBC AG ORD CHF6.70 (CDI),GBX,5946.87,1583.00,-16.50,-1.03%,924445,"£10,467,604.66",1.14,29 April 2013
96,16.04.22 17:51:20,OCDO,OCADO GROUP PLC ORD 2P,GBX,8671.12,1138.50,-15.00,-1.30%,1864471,"£10,475,271.82",-0.18,21 July 2010
97,16.04.22 17:51:21,FRES,FRESNILLO PLC ORD USD0.50,GBX,6063.16,804.00,-18.80,-2.28%,1335202,"£6,922,502.96",0.51,22 September 2008
98,16.04.22 17:51:22,ITV,ITV PLC ORD 10P,GBX,3184.10,77.02,-2.08,-2.63%,13399626,"£7,506,304.75",0.09,22 September 2008


In [37]:
# Clean data
df['Percentage Change'] = df['Percentage Change'].str.replace('%','')
df['Turnover'] = df['Turnover'].str.replace('£','')
df['Date'] = pd.to_datetime(df['Date']).dt.date
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

df

Unnamed: 0,Date,Code,Company Name,Currency,Market Cap (million),Price,Change,Percentage Change,Volume,Turnover,Earnings Per share,Issue Date
0,2022-04-16,IHG,INTERCONTINENTAL HOTELS GROUP PLC ORD 20 340/399P,GBX,9175.05,5192.00,206.00,4.13,859263,22694359.08,1.45,14 January 2019
1,2022-04-16,RR.,ROLLS-ROYCE HOLDINGS PLC ORD SHS 20P,GBX,7533.35,93.43,3.40,3.78,53922130,33602364.86,-0.53,23 May 2011
2,2022-04-16,IAG,INTL CONSOLIDATED AIRLINES GROUP SA ORD EUR0.1...,GBX,6967.59,145.48,5.18,3.69,25606326,23225393.33,-1.96,02 February 2011
3,2022-04-16,WTB,WHITBREAD PLC ORD 76 122/153P,GBX,5680.74,2915.00,103.00,3.66,2633231,19018713.51,-4.82,08 January 2007
4,2022-04-16,LAND,LAND SECURITIES GROUP PLC ORD 10 2/3P,GBX,5549.30,770.80,22.40,2.99,2057500,12771069.43,-1.88,28 September 2017
...,...,...,...,...,...,...,...,...,...,...,...,...
95,2022-04-16,CCH,COCA-COLA HBC AG ORD CHF6.70 (CDI),GBX,5946.87,1583.00,-16.50,-1.03,924445,10467604.66,1.14,29 April 2013
96,2022-04-16,OCDO,OCADO GROUP PLC ORD 2P,GBX,8671.12,1138.50,-15.00,-1.30,1864471,10475271.82,-0.18,21 July 2010
97,2022-04-16,FRES,FRESNILLO PLC ORD USD0.50,GBX,6063.16,804.00,-18.80,-2.28,1335202,6922502.96,0.51,22 September 2008
98,2022-04-16,ITV,ITV PLC ORD 10P,GBX,3184.10,77.02,-2.08,-2.63,13399626,7506304.75,0.09,22 September 2008


In [38]:
df['Date'].dtype

dtype('<M8[ns]')

In [39]:
# Create new columns
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

Unnamed: 0,Date,Code,Company Name,Currency,Market Cap (million),Price,Change,Percentage Change,Volume,Turnover,Earnings Per share,Issue Date,Day,Month,Year
0,2022-04-16,IHG,INTERCONTINENTAL HOTELS GROUP PLC ORD 20 340/399P,GBX,9175.05,5192.00,206.00,4.13,859263,22694359.08,1.45,14 January 2019,16,4,2022
1,2022-04-16,RR.,ROLLS-ROYCE HOLDINGS PLC ORD SHS 20P,GBX,7533.35,93.43,3.40,3.78,53922130,33602364.86,-0.53,23 May 2011,16,4,2022
2,2022-04-16,IAG,INTL CONSOLIDATED AIRLINES GROUP SA ORD EUR0.1...,GBX,6967.59,145.48,5.18,3.69,25606326,23225393.33,-1.96,02 February 2011,16,4,2022
3,2022-04-16,WTB,WHITBREAD PLC ORD 76 122/153P,GBX,5680.74,2915.00,103.00,3.66,2633231,19018713.51,-4.82,08 January 2007,16,4,2022
4,2022-04-16,LAND,LAND SECURITIES GROUP PLC ORD 10 2/3P,GBX,5549.30,770.80,22.40,2.99,2057500,12771069.43,-1.88,28 September 2017,16,4,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2022-04-16,CCH,COCA-COLA HBC AG ORD CHF6.70 (CDI),GBX,5946.87,1583.00,-16.50,-1.03,924445,10467604.66,1.14,29 April 2013,16,4,2022
96,2022-04-16,OCDO,OCADO GROUP PLC ORD 2P,GBX,8671.12,1138.50,-15.00,-1.30,1864471,10475271.82,-0.18,21 July 2010,16,4,2022
97,2022-04-16,FRES,FRESNILLO PLC ORD USD0.50,GBX,6063.16,804.00,-18.80,-2.28,1335202,6922502.96,0.51,22 September 2008,16,4,2022
98,2022-04-16,ITV,ITV PLC ORD 10P,GBX,3184.10,77.02,-2.08,-2.63,13399626,7506304.75,0.09,22 September 2008,16,4,2022


In [119]:
daily_FTSE = df
daily_FTSE

Unnamed: 0,Date,Code,Company Name,Currency,Market Cap (million),Price,Change,Percentage Change,Volume,Turnover,Earnings Per share,Issue Date,Day,Month,Year
0,2022-04-16,IHG,INTERCONTINENTAL HOTELS GROUP PLC ORD 20 340/399P,GBX,9175.05,5192.00,206.00,4.13,859263,22694359.08,1.45,14 January 2019,16,4,2022
1,2022-04-16,RR.,ROLLS-ROYCE HOLDINGS PLC ORD SHS 20P,GBX,7533.35,93.43,3.40,3.78,53922130,33602364.86,-0.53,23 May 2011,16,4,2022
2,2022-04-16,IAG,INTL CONSOLIDATED AIRLINES GROUP SA ORD EUR0.1...,GBX,6967.59,145.48,5.18,3.69,25606326,23225393.33,-1.96,02 February 2011,16,4,2022
3,2022-04-16,WTB,WHITBREAD PLC ORD 76 122/153P,GBX,5680.74,2915.00,103.00,3.66,2633231,19018713.51,-4.82,08 January 2007,16,4,2022
4,2022-04-16,LAND,LAND SECURITIES GROUP PLC ORD 10 2/3P,GBX,5549.30,770.80,22.40,2.99,2057500,12771069.43,-1.88,28 September 2017,16,4,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2022-04-16,CCH,COCA-COLA HBC AG ORD CHF6.70 (CDI),GBX,5946.87,1583.00,-16.50,-1.03,924445,10467604.66,1.14,29 April 2013,16,4,2022
96,2022-04-16,OCDO,OCADO GROUP PLC ORD 2P,GBX,8671.12,1138.50,-15.00,-1.30,1864471,10475271.82,-0.18,21 July 2010,16,4,2022
97,2022-04-16,FRES,FRESNILLO PLC ORD USD0.50,GBX,6063.16,804.00,-18.80,-2.28,1335202,6922502.96,0.51,22 September 2008,16,4,2022
98,2022-04-16,ITV,ITV PLC ORD 10P,GBX,3184.10,77.02,-2.08,-2.63,13399626,7506304.75,0.09,22 September 2008,16,4,2022


##### Use this daily data to do daily analysis as well as build a database each day by inputting this dataset into a database which will be updated everyday to give a chart that becomes more populated every day

In [125]:
import pyodbc # sql
from sqlalchemy import create_engine # sql to insert df to sql
import urllib # for sql too

SERVER_NAME = 'DESKTOP-GR6T3L7'
DATABASE_NAME = 'stocks'

# connection string
conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server}; \
                       SERVER=' + SERVER_NAME + '; \
                       DATABASE=' + DATABASE_NAME + '; \
                       Trusted_Connection=yes')

# cursor to extract data
cursor = conn.cursor()

# Use this to connect for saving data
quoted = urllib.parse.quote_plus('DRIVER={ODBC Driver 17 for SQL Server}; \
                       SERVER='+SERVER_NAME+'; \
                       DATABASE='+DATABASE_NAME+'; \
                       Trusted_Connection=yes')

# save the dataset into SQL database
engine = create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted))
daily_FTSE.to_sql('daily_FTSE', schema='dbo', con=engine, if_exists='append')
print('Dataset appended.')

Dataset updated.


##### Use SQL dataset to analyse

In [128]:
query = """SELECT * FROM [dbo].[daily_FTSE]"""
query_result = pd.read_sql(query, conn)
query_result.head(1)

Unnamed: 0,index,Date,Code,Company Name,Currency,Market Cap (million),Price,Change,Percentage Change,Volume,Turnover,Earnings Per share,Issue Date,Day,Month,Year
0,0,2022-04-16,IHG,INTERCONTINENTAL HOTELS GROUP PLC ORD 20 340/399P,GBX,9175.05,5192.0,206.0,4.13,859263,22694359.08,1.45,14 January 2019,16,4,2022


## Historical FTSE 100 Analysis

####  Load data in

In [85]:
historical = pd.read_csv(r"C:\Users\Abdulkadir\Documents\Programming\Personal\DataScience\historicalftse100.csv")
historical

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,"Oct 15, 2020",5832.52,5935.06,5935.06,5786.20,665.02M,-1.73%
1,"Oct 14, 2020",5935.06,5969.71,6009.81,5926.49,612.91M,-0.58%
2,"Oct 13, 2020",5969.71,6001.38,6008.04,5949.97,786.88M,-0.53%
3,"Oct 12, 2020",6001.38,6016.65,6037.08,5995.11,600.96M,-0.25%
4,"Oct 09, 2020",6016.65,5978.03,6040.47,5978.03,676.78M,0.65%
...,...,...,...,...,...,...,...
4994,"Jan 09, 2001",6088.10,6149.60,6195.90,6066.40,-,-1.00%
4995,"Jan 08, 2001",6149.60,6198.10,6212.40,6137.70,-,-0.78%
4996,"Jan 05, 2001",6198.10,6185.60,6239.60,6155.00,-,0.20%
4997,"Jan 04, 2001",6185.60,6039.90,6195.30,6039.90,-,2.41%


#### Clean data

In [86]:
# change date to datetime
historical['Date'] = pd.to_datetime(historical['Date'])

In [87]:
# make new columns for month and year
historical['Month'] = historical['Date'].dt.month
historical['Year'] = historical['Date'].dt.year

####  remove M and B from vol and make it to a number

In [108]:
def converter(x):
    if 'M' in x:
        return f"{(float(x.strip('M'))*1000000):,.2f}"
    elif 'B' in x:
        return f"{(float(x.strip('B'))*1000000000):,.2f}"

In [109]:
# remove M and B from vol and make it to a number
historical['Vol.'] = historical['Vol.'].apply(lambda x: converter(x))

In [112]:
historical.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Month,Year
0,2020-10-15,5832.52,5935.06,5935.06,5786.2,665020000.0,-1.73%,10,2020
1,2020-10-14,5935.06,5969.71,6009.81,5926.49,612910000.0,-0.58%,10,2020
2,2020-10-13,5969.71,6001.38,6008.04,5949.97,786880000.0,-0.53%,10,2020
3,2020-10-12,6001.38,6016.65,6037.08,5995.11,600960000.0,-0.25%,10,2020
4,2020-10-09,6016.65,5978.03,6040.47,5978.03,676780000.0,0.65%,10,2020


##### remove percentage signs

In [113]:
historical['Change %'] = historical['Change %'].str.strip('%')

In [117]:
historical.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Month,Year
0,2020-10-15,5832.52,5935.06,5935.06,5786.2,665020000.0,-1.73,10,2020
1,2020-10-14,5935.06,5969.71,6009.81,5926.49,612910000.0,-0.58,10,2020
2,2020-10-13,5969.71,6001.38,6008.04,5949.97,786880000.0,-0.53,10,2020
3,2020-10-12,6001.38,6016.65,6037.08,5995.11,600960000.0,-0.25,10,2020
4,2020-10-09,6016.65,5978.03,6040.47,5978.03,676780000.0,0.65,10,2020


In [120]:
historical_FTSE = historical
historical_FTSE

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,Month,Year
0,2020-10-15,5832.52,5935.06,5935.06,5786.20,665020000.00,-1.73,10,2020
1,2020-10-14,5935.06,5969.71,6009.81,5926.49,612910000.00,-0.58,10,2020
2,2020-10-13,5969.71,6001.38,6008.04,5949.97,786880000.00,-0.53,10,2020
3,2020-10-12,6001.38,6016.65,6037.08,5995.11,600960000.00,-0.25,10,2020
4,2020-10-09,6016.65,5978.03,6040.47,5978.03,676780000.00,0.65,10,2020
...,...,...,...,...,...,...,...,...,...
4994,2001-01-09,6088.10,6149.60,6195.90,6066.40,,-1.00,1,2001
4995,2001-01-08,6149.60,6198.10,6212.40,6137.70,,-0.78,1,2001
4996,2001-01-05,6198.10,6185.60,6239.60,6155.00,,0.20,1,2001
4997,2001-01-04,6185.60,6039.90,6195.30,6039.90,,2.41,1,2001
