In [23]:
import yfinance as yf
import pandas as pd
from pandas_datareader import data as pdr
import datetime as dt

### Date range for the analysis

We begin by creating the start and end date using python datetime module.

In [69]:
# Define the time period
end = pd.Timestamp.now()
start = end - pd.DateOffset(years=3)

### Selecting the Stocks / Tickers to analyse

For this analysis we performed a previous evaluation where we have determined that the stocks with the highest growth until the end of 2022 are the following:

In [70]:
# Define the list of stock tickers
stocks = ['BIRG.IR', 'BA.L', 'PSON.L', 'GLEN.L', 'SHEL.L']

### Retrieving the data

In [71]:
# Retrieve the historical data for the stocks
df = yf.download(stocks, start=start, end=end)

[*********************100%***********************]  5 of 5 completed


In [72]:
df

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,Open,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L,...,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-03-17,415.076996,1.806753,112.807144,481.530151,962.150146,482.899994,2.044,137.960007,525.799988,1080.000000,...,508.799988,2.252,136.000000,535.200012,1099.000000,22440084.0,9880546.0,67594527.0,7184629.0,34751944.0
2020-03-18,410.263489,1.628199,105.398956,484.277588,864.866089,477.299988,1.842,128.899994,528.799988,970.799988,...,465.200012,2.000,130.020004,519.599976,1029.000000,12571134.0,4251833.0,63782209.0,5147031.0,30796382.0
2020-03-19,426.852844,1.536270,95.913879,476.401642,909.231934,496.600006,1.738,117.300003,520.200012,1020.599976,...,476.399994,1.934,125.040001,531.000000,955.299988,12589360.0,9474110.0,72589508.0,5799751.0,32261882.0
2020-03-20,403.988770,1.570744,97.778198,457.536072,946.292542,470.000000,1.777,119.580002,499.600006,1062.199951,...,520.599976,1.800,122.599998,544.599976,1103.400024,30250684.0,9423720.0,109381986.0,7286976.0,37025839.0
2020-03-23,377.256744,1.458485,91.989006,416.233307,1004.377930,438.899994,1.650,112.500000,454.500000,1127.400024,...,444.500000,1.600,111.239998,470.000000,1036.000000,17117813.0,4801698.0,83192340.0,8686120.0,33312190.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-13,908.000000,9.230000,458.700012,842.000000,2438.000000,908.000000,9.230,458.700012,842.000000,2438.000000,...,932.799988,9.966,476.100006,853.000000,2531.500000,21226357.0,5279365.0,64728209.0,2326354.0,19306371.0
2023-03-14,938.000000,9.598000,461.450012,842.200012,2469.500000,938.000000,9.598,461.450012,842.200012,2469.500000,...,912.799988,9.100,457.049988,843.799988,2422.500000,13388298.0,3358168.0,35466058.0,1777764.0,19015334.0
2023-03-15,911.599976,8.834000,412.000000,832.799988,2259.500000,911.599976,8.834,412.000000,832.799988,2259.500000,...,938.400024,9.540,458.649994,844.799988,2459.000000,8325112.0,5605872.0,130623268.0,2159254.0,48427247.0
2023-03-16,916.799988,9.048000,423.200012,838.000000,2209.000000,916.799988,9.048,423.200012,838.000000,2209.000000,...,913.599976,9.000,426.000000,837.799988,2300.000000,8058044.0,4786755.0,88278865.0,1802750.0,26462458.0


In [73]:
df.dropna(inplace=True)
df.isnull().sum()

Adj Close  BA.L       0
           BIRG.IR    0
           GLEN.L     0
           PSON.L     0
           SHEL.L     0
Close      BA.L       0
           BIRG.IR    0
           GLEN.L     0
           PSON.L     0
           SHEL.L     0
High       BA.L       0
           BIRG.IR    0
           GLEN.L     0
           PSON.L     0
           SHEL.L     0
Low        BA.L       0
           BIRG.IR    0
           GLEN.L     0
           PSON.L     0
           SHEL.L     0
Open       BA.L       0
           BIRG.IR    0
           GLEN.L     0
           PSON.L     0
           SHEL.L     0
Volume     BA.L       0
           BIRG.IR    0
           GLEN.L     0
           PSON.L     0
           SHEL.L     0
dtype: int64

### Understanding the dataframe structure

In [74]:
df.index

DatetimeIndex(['2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20',
               '2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26',
               '2020-03-27', '2020-03-30',
               ...
               '2023-03-06', '2023-03-07', '2023-03-08', '2023-03-09',
               '2023-03-10', '2023-03-13', '2023-03-14', '2023-03-15',
               '2023-03-16', '2023-03-17'],
              dtype='datetime64[ns]', name='Date', length=750, freq=None)

In [75]:
df.columns

MultiIndex([('Adj Close',    'BA.L'),
            ('Adj Close', 'BIRG.IR'),
            ('Adj Close',  'GLEN.L'),
            ('Adj Close',  'PSON.L'),
            ('Adj Close',  'SHEL.L'),
            (    'Close',    'BA.L'),
            (    'Close', 'BIRG.IR'),
            (    'Close',  'GLEN.L'),
            (    'Close',  'PSON.L'),
            (    'Close',  'SHEL.L'),
            (     'High',    'BA.L'),
            (     'High', 'BIRG.IR'),
            (     'High',  'GLEN.L'),
            (     'High',  'PSON.L'),
            (     'High',  'SHEL.L'),
            (      'Low',    'BA.L'),
            (      'Low', 'BIRG.IR'),
            (      'Low',  'GLEN.L'),
            (      'Low',  'PSON.L'),
            (      'Low',  'SHEL.L'),
            (     'Open',    'BA.L'),
            (     'Open', 'BIRG.IR'),
            (     'Open',  'GLEN.L'),
            (     'Open',  'PSON.L'),
            (     'Open',  'SHEL.L'),
            (   'Volume',    'BA.L'),
            

### Accesing the attributes using the close price information

In [76]:
Close = df.Close
Close.head()

Unnamed: 0_level_0,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-17,482.899994,2.044,137.960007,525.799988,1080.0
2020-03-18,477.299988,1.842,128.899994,528.799988,970.799988
2020-03-19,496.600006,1.738,117.300003,520.200012,1020.599976
2020-03-20,470.0,1.777,119.580002,499.600006,1062.199951
2020-03-23,438.899994,1.65,112.5,454.5,1127.400024


### Quick Insights 

In [77]:
Close.describe(percentiles=[0.1,0.5,0.9])

Unnamed: 0,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L
count,750.0,750.0,750.0,750.0,750.0
mean,619.037054,4.853028,346.587413,725.135178,1714.682798
std,141.229384,2.267092,132.267084,142.989003,458.364447
min,7.748,1.33,112.5,423.5,900.0
10%,486.080005,1.7859,167.118004,531.0,1182.619995
50%,554.799988,5.079,333.875,752.699982,1529.599976
90%,825.0,7.6384,519.220013,916.659979,2363.15
max,938.0,10.69,578.099976,995.200012,2604.5


In [78]:
# Change over the last 100 days
Close[Close.index > end - dt.timedelta(days=100)].describe(percentiles=[0.1,0.5,0.9])

Unnamed: 0,BA.L,BIRG.IR,GLEN.L,PSON.L,SHEL.L
count,67.0,67.0,67.0,67.0,67.0
mean,867.426865,9.375642,525.703732,912.785073,2413.171642
std,32.898515,0.696054,36.316061,29.373958,105.789405
min,821.0,7.746,412.0,832.799988,2209.0
10%,832.319983,8.3896,477.059998,863.120007,2288.5
50%,856.799988,9.51,537.400024,921.400024,2379.0
90%,913.63999,10.211,558.980005,939.280017,2575.4
max,938.0,10.69,578.099976,954.400024,2604.5


## Plotting the Stocks

In [79]:
import plotly.offline as pyo 
pyo.init_notebook_mode(connected=True)

pd.options.plotting.backend = 'plotly'

Close.plot()

### Percentage of Change

As we can notice, Shell stock has overperformed over the last period, we can examine the percentage of change in this particular stock.

In [80]:
Close['SHEL.L'].pct_change().plot(kind='hist')