In [58]:
#Pulls from dataset "EPL Standings 2000-2022.csv" and converts it into a dataframe
#This is a dataset of all the English Premier League standings from 2000 - 2022
#We import pandas for our use of dataframes
#The creation of the class colour will be referenced in instances where we need to call upon difference CSS colours in 
#order to better convey our analysis

import pandas as pd
import numpy as np

class colour:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

PL = pd.read_csv("EPL Standings 2000-2022.csv",index_col = 0)
PL

Unnamed: 0_level_0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Qualification or relegation
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-01,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...
2000-01,2,Arsenal,38,20,10,8,63,38,25,70,Qualification for the Champions League first g...
2000-01,3,Liverpool,38,20,9,9,71,39,32,69,Qualification for the Champions League third q...
2000-01,4,Leeds United,38,20,8,10,64,43,21,68,Qualification for the UEFA Cup first round[a]
2000-01,5,Ipswich Town,38,20,6,12,57,42,15,66,Qualification for the UEFA Cup first round[a]
...,...,...,...,...,...,...,...,...,...,...,...
2021-22,16,Everton,38,11,6,21,43,66,-23,39,Not Applicable
2021-22,17,Leeds United,38,9,11,18,42,79,-37,38,Not Applicable
2021-22,18,Burnley,38,7,14,17,34,53,-19,35,Relegation to the EFL Championship
2021-22,19,Watford,38,6,5,27,34,77,-43,23,Relegation to the EFL Championship


In [3]:
#View of the dataframe

print(PL.head())

         Pos               Team  Pld   W   D   L  GF  GA  GD  Pts  \
Season                                                              
2000-01    1  Manchester United   38  24   8   6  79  31  48   80   
2000-01    2            Arsenal   38  20  10   8  63  38  25   70   
2000-01    3          Liverpool   38  20   9   9  71  39  32   69   
2000-01    4       Leeds United   38  20   8  10  64  43  21   68   
2000-01    5       Ipswich Town   38  20   6  12  57  42  15   66   

                               Qualification or relegation  
Season                                                      
2000-01  Qualification for the Champions League first g...  
2000-01  Qualification for the Champions League first g...  
2000-01  Qualification for the Champions League third q...  
2000-01      Qualification for the UEFA Cup first round[a]  
2000-01      Qualification for the UEFA Cup first round[a]  


In [4]:
#Count of rows and columns

print(PL.shape)

(440, 11)


In [5]:
#Checking if there are any data gaps

missing_values_count = PL.isnull().sum()
print(missing_values_count[0:11])


Pos                            0
Team                           0
Pld                            0
W                              0
D                              0
L                              0
GF                             0
GA                             0
GD                             0
Pts                            0
Qualification or relegation    0
dtype: int64


In [6]:
#Data Cleaning
#Drop any rows that are duplicates

drop_duplicates= PL.drop_duplicates()
print(PL.shape,drop_duplicates.shape)

(440, 11) (440, 11)


In [7]:
#Sorting & Filtering
#Making a duplicate of the PL dataframe using the copy function in order to still keep the original 
#also be able to alter PL_Seasons without fear of any data loss


PL_Seasons = PL.copy()
print(PL_Seasons)

         Pos               Team  Pld   W   D   L  GF  GA  GD  Pts  \
Season                                                              
2000-01    1  Manchester United   38  24   8   6  79  31  48   80   
2000-01    2            Arsenal   38  20  10   8  63  38  25   70   
2000-01    3          Liverpool   38  20   9   9  71  39  32   69   
2000-01    4       Leeds United   38  20   8  10  64  43  21   68   
2000-01    5       Ipswich Town   38  20   6  12  57  42  15   66   
...      ...                ...  ...  ..  ..  ..  ..  ..  ..  ...   
2021-22   16            Everton   38  11   6  21  43  66 -23   39   
2021-22   17       Leeds United   38   9  11  18  42  79 -37   38   
2021-22   18            Burnley   38   7  14  17  34  53 -19   35   
2021-22   19            Watford   38   6   5  27  34  77 -43   23   
2021-22   20       Norwich City   38   5   7  26  23  84 -61   22   

                               Qualification or relegation  
Season                                   

In [8]:
#Simple way of pulling the first season out of the dataframe

result = PL_Seasons.head(20)
print("First Season of the DataFrame:")
print(result)

First Season of the DataFrame:
         Pos               Team  Pld   W   D   L  GF  GA  GD  Pts  \
Season                                                              
2000-01    1  Manchester United   38  24   8   6  79  31  48   80   
2000-01    2            Arsenal   38  20  10   8  63  38  25   70   
2000-01    3          Liverpool   38  20   9   9  71  39  32   69   
2000-01    4       Leeds United   38  20   8  10  64  43  21   68   
2000-01    5       Ipswich Town   38  20   6  12  57  42  15   66   
2000-01    6            Chelsea   38  17  10  11  68  45  23   61   
2000-01    7         Sunderland   38  15  12  11  46  41   5   57   
2000-01    8        Aston Villa   38  13  15  10  46  43   3   54   
2000-01    9  Charlton Athletic   38  14  10  14  50  57  -7   52   
2000-01   10        Southampton   38  14  10  14  40  48  -8   52   
2000-01   11   Newcastle United   38  14   9  15  44  50  -6   51   
2000-01   12  Tottenham Hotspur   38  13  10  15  47  54  -7   49   
200

In [9]:
#Selecting by position
#Select single value by row & column

PL_Seasons.iloc[[7],[7]] 

Unnamed: 0_level_0,GA
Season,Unnamed: 1_level_1
2000-01,43


In [10]:
#Selecting by Label & Position
#The below code gives a list of all the teams in each season of the dataframe.

PL_Seasons.iloc[:, 1]

Season
2000-01    Manchester United
2000-01              Arsenal
2000-01            Liverpool
2000-01         Leeds United
2000-01         Ipswich Town
                 ...        
2021-22              Everton
2021-22         Leeds United
2021-22              Burnley
2021-22              Watford
2021-22         Norwich City
Name: Team, Length: 440, dtype: object

In [11]:
#A view of every team that finished the season with over 90 points.
#This method of selecting is a form of Boolean Indexing

PL_Seasons[PL_Seasons['Pts']>90]

Unnamed: 0_level_0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Qualification or relegation
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2004-05,1,Chelsea,38,29,8,1,72,15,57,95,Qualification for the Champions League group s...
2005-06,1,Chelsea,38,29,4,5,72,22,50,91,Qualification for the Champions League group s...
2016-17,1,Chelsea,38,30,3,5,85,33,52,93,Qualification for the Champions League group s...
2017-18,1,Manchester City,38,32,4,2,106,27,79,100,Qualification for the Champions League group s...
2018-19,1,Manchester City,38,32,2,4,95,23,72,98,Qualification for the Champions League group s...
2018-19,2,Liverpool,38,30,7,1,89,22,67,97,Qualification for the Champions League group s...
2019-20,1,Liverpool,38,32,3,3,85,33,52,99,Qualification for the Champions League group s...
2021-22,1,Manchester City,38,29,6,3,99,26,73,93,Qualification for the Champions League group s...
2021-22,2,Liverpool,38,28,8,2,94,26,68,92,Qualification for the Champions League group s...


In [46]:
# This returns the mean of values
# Here we use the mean function to find out the mean of the Points and Goals against columns 

PL_Average_W = PL_Seasons[["W"]].mean().astype(int)

print(PL_Average_W)


W    14
dtype: int32


In [13]:
# Here we are using the median function to compare the difference between the results for mean and median

PL_Seasons[["Pts","GA"]].median()

Pts    48.0
GA     51.0
dtype: float64

In [14]:
#Below we use Boolean indexing and the sum function to determine how much points Chelsea have amounted in the last 22 years.

ChelseaTotalPoints = PL_Seasons.loc[PL_Seasons['Team'] == 'Chelsea', 'Pts'].sum()
print("Chelseas total Premier League points scoring tally from 2000-2022:",ChelseaTotalPoints,"pts")

Chelseas total Premier League points scoring tally from 2000-2022: 1665 pts


In [15]:
#Below we use Boolean indexing and the sum function to determine how much points Chelsea have amounted in the last 22 years.
liverpoolmin = PL_Seasons.loc[PL_Seasons['Team'] == 'Liverpool', 'Pts'].min()
liverpoolmax = PL_Seasons.loc[PL_Seasons['Team'] == 'Liverpool', 'Pts'].max()
print("Liverpool's lowest points scoring season:",liverpoolmin,"pts")
print("Liverpool's lowest points scoring season:",liverpoolmax,"pts")

Liverpool's lowest points scoring season: 52 pts
Liverpool's lowest points scoring season: 99 pts


In [16]:
#.describe() can be used by data analysts to give a summary of statistics in a dataframe

PL_Seasons.describe()

Unnamed: 0,Pos,Pld,W,D,L,GF,GA,GD,Pts
count,440.0,440.0,440.0,440.0,440.0,440.0,440.0,440.0,440.0
mean,10.5,38.0,14.261364,9.477273,14.261364,50.863636,50.863636,0.0,52.240909
std,5.772845,0.0,5.987638,2.856385,5.54808,15.778205,12.89345,26.02627,17.097234
min,1.0,38.0,1.0,2.0,0.0,20.0,15.0,-69.0,11.0
25%,5.75,38.0,10.0,7.0,10.0,40.0,42.0,-19.0,40.75
50%,10.5,38.0,13.0,9.0,15.0,47.0,51.0,-6.0,48.0
75%,15.25,38.0,18.0,11.0,18.0,60.0,59.0,16.0,63.25
max,20.0,38.0,32.0,17.0,29.0,106.0,89.0,79.0,100.0


In [17]:
#In order to alter the season column (which is set to index on default) then we must use the .reset_index() function.
#This will create a new index column called 'index' and number each row sequentially to the length of the dataframe.
#This will make Season now column 0

PL_Seasons.reset_index(inplace=True)

#PL_Seasons.iat[0,0]=2000
print(PL_Seasons)

      Season  Pos               Team  Pld   W   D   L  GF  GA  GD  Pts  \
0    2000-01    1  Manchester United   38  24   8   6  79  31  48   80   
1    2000-01    2            Arsenal   38  20  10   8  63  38  25   70   
2    2000-01    3          Liverpool   38  20   9   9  71  39  32   69   
3    2000-01    4       Leeds United   38  20   8  10  64  43  21   68   
4    2000-01    5       Ipswich Town   38  20   6  12  57  42  15   66   
..       ...  ...                ...  ...  ..  ..  ..  ..  ..  ..  ...   
435  2021-22   16            Everton   38  11   6  21  43  66 -23   39   
436  2021-22   17       Leeds United   38   9  11  18  42  79 -37   38   
437  2021-22   18            Burnley   38   7  14  17  34  53 -19   35   
438  2021-22   19            Watford   38   6   5  27  34  77 -43   23   
439  2021-22   20       Norwich City   38   5   7  26  23  84 -61   22   

                           Qualification or relegation  
0    Qualification for the Champions League first g...

In [23]:

#Using .loc we can change the values of all the year (which were strings) to integer equivalents.

PL_Seasons.loc[PL_Seasons['Season'] == '2000-01', 'Season'] = 2000
PL_Seasons.loc[PL_Seasons['Season'] == '2001-02', 'Season'] = 2001
PL_Seasons.loc[PL_Seasons['Season'] == '2002-03', 'Season'] = 2002
PL_Seasons.loc[PL_Seasons['Season'] == '2003-04', 'Season'] = 2003
PL_Seasons.loc[PL_Seasons['Season'] == '2004-05', 'Season'] = 2004
PL_Seasons.loc[PL_Seasons['Season'] == '2005-06', 'Season'] = 2005
PL_Seasons.loc[PL_Seasons['Season'] == '2006-07', 'Season'] = 2006
PL_Seasons.loc[PL_Seasons['Season'] == '2007-08', 'Season'] = 2007
PL_Seasons.loc[PL_Seasons['Season'] == '2008-09', 'Season'] = 2008
PL_Seasons.loc[PL_Seasons['Season'] == '2009-10', 'Season'] = 2009
PL_Seasons.loc[PL_Seasons['Season'] == '2010-11', 'Season'] = 2010
PL_Seasons.loc[PL_Seasons['Season'] == '2011-12', 'Season'] = 2011
PL_Seasons.loc[PL_Seasons['Season'] == '2012-13', 'Season'] = 2012
PL_Seasons.loc[PL_Seasons['Season'] == '2013-14', 'Season'] = 2013
PL_Seasons.loc[PL_Seasons['Season'] == '2014-15', 'Season'] = 2014
PL_Seasons.loc[PL_Seasons['Season'] == '2015-16', 'Season'] = 2015
PL_Seasons.loc[PL_Seasons['Season'] == '2016-17', 'Season'] = 2016
PL_Seasons.loc[PL_Seasons['Season'] == '2017-18', 'Season'] = 2017
PL_Seasons.loc[PL_Seasons['Season'] == '2018-19', 'Season'] = 2018
PL_Seasons.loc[PL_Seasons['Season'] == '2019-20', 'Season'] = 2019
PL_Seasons.loc[PL_Seasons['Season'] == '2020-21', 'Season'] = 2020
PL_Seasons.loc[PL_Seasons['Season'] == '2021-22', 'Season'] = 2021
print(PL_Seasons)

    Season  Pos               Team  Pld   W   D   L  GF  GA  GD  Pts  \
0     2000    1  Manchester United   38  24   8   6  79  31  48   80   
1     2000    2            Arsenal   38  20  10   8  63  38  25   70   
2     2000    3          Liverpool   38  20   9   9  71  39  32   69   
3     2000    4       Leeds United   38  20   8  10  64  43  21   68   
4     2000    5       Ipswich Town   38  20   6  12  57  42  15   66   
..     ...  ...                ...  ...  ..  ..  ..  ..  ..  ..  ...   
435   2021   16            Everton   38  11   6  21  43  66 -23   39   
436   2021   17       Leeds United   38   9  11  18  42  79 -37   38   
437   2021   18            Burnley   38   7  14  17  34  53 -19   35   
438   2021   19            Watford   38   6   5  27  34  77 -43   23   
439   2021   20       Norwich City   38   5   7  26  23  84 -61   22   

                           Qualification or relegation  
0    Qualification for the Champions League first g...  
1    Qualification fo

In [26]:
#Now we can use the integers in 'Season' to pick and choose which years we want to view.
#As seen below being integers we can now use them in conjunction with > < >= <= etc to pull mulitple years.
#In the below example we pull the data from Manchester United's 2008-2011 seasons.

PL_Seasons[(PL_Seasons['Season'] <= 2011) & (PL_Seasons['Season'] >= 2008)& (PL_Seasons['Team'] == 'Manchester United')]

Unnamed: 0,Season,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Qualification or relegation
160,2008,1,Manchester United,38,28,6,4,68,24,44,90,Qualification for the Champions League group s...
181,2009,2,Manchester United,38,27,4,7,86,28,58,85,Qualification for the Champions League group s...
200,2010,1,Manchester United,38,23,11,4,78,37,41,80,Qualification for the Champions League group s...
221,2011,2,Manchester United,38,28,5,5,89,33,56,89,Qualification for the Champions League group s...


In [63]:
#Using the .groupby() function we can get the mean of each teams data over the 22 years and present it consisely

Teams_Grouped = PL_Seasons.groupby(["Team"]).mean()
Teams_Grouped

Unnamed: 0_level_0,Pos,Pld,W,D,L,GF,GA,GD,Pts
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Arsenal,3.772727,38.0,21.363636,8.772727,7.863636,70.954545,39.818182,31.136364,72.863636
Aston Villa,11.947368,38.0,11.842105,11.052632,15.105263,45.578947,53.105263,-7.526316,46.578947
Birmingham City,14.142857,38.0,10.428571,11.714286,15.857143,39.0,51.428571,-12.428571,43.0
Blackburn Rovers,11.636364,38.0,12.727273,10.0,15.272727,47.090909,53.818182,-6.727273,48.181818
Blackpool,19.0,38.0,10.0,9.0,19.0,55.0,78.0,-23.0,39.0
Bolton Wanderers,12.454545,38.0,12.0,10.0,16.0,45.0,55.727273,-10.727273,46.0
Bournemouth,13.8,38.0,11.2,8.6,18.2,48.2,66.0,-17.8,42.2
Bradford City,20.0,38.0,5.0,11.0,22.0,30.0,70.0,-40.0,26.0
Brentford,13.0,38.0,13.0,7.0,18.0,48.0,56.0,-8.0,46.0
Brighton & Hove Albion,14.4,38.0,9.6,13.0,15.4,38.0,51.6,-13.6,41.8


In [66]:
#The below code sorts every team in descending order with the priority of columns being team then season then league position.

PL_Seasons.sort_values(
     by=["Team", "Season","Pos"],
     ascending=False
 )[["Team", "Season","Pos"]]

Unnamed: 0,Team,Season,Pos
429,Wolverhampton Wanderers,2021,10
412,Wolverhampton Wanderers,2020,13
386,Wolverhampton Wanderers,2019,7
366,Wolverhampton Wanderers,2018,7
239,Wolverhampton Wanderers,2011,20
...,...,...,...
81,Arsenal,2004,2
60,Arsenal,2003,1
41,Arsenal,2002,2
20,Arsenal,2001,1


In [71]:
#Manually creating a dataframe using lists of the Manchester United starting 11 squad.

# initialize data of lists.
MU_Player_data = {'Name': ['David De Gea', 'Harry Maguire', 'Victor Lindelof', 'Luke Shaw',
                        'Diogo Dalot', 'Fred', 'Scott McTominay', 'Bruno Fernandes',
                       'Cristiano Ronaldo', 'Marcus Rashford', 'Jadon Sancho'],
                'Age': [31, 29, 28, 27,
                       23, 29, 25, 27,
                       37, 24, 22],
                 'Team': ['Manchester United','Manchester United','Manchester United','Manchester United',
                         'Manchester United','Manchester United','Manchester United','Manchester United',
                         'Manchester United','Manchester United','Manchester United']}
# Create DataFrame
MU_Player = pd.DataFrame(MU_Player_data)

# Print the output.
MU_Player

Unnamed: 0,Name,Age,Team
0,David De Gea,31,Manchester United
1,Harry Maguire,29,Manchester United
2,Victor Lindelof,28,Manchester United
3,Luke Shaw,27,Manchester United
4,Diogo Dalot,23,Manchester United
5,Fred,29,Manchester United
6,Scott McTominay,25,Manchester United
7,Bruno Fernandes,27,Manchester United
8,Cristiano Ronaldo,37,Manchester United
9,Marcus Rashford,24,Manchester United


In [75]:
#In the below code we can see an inner merge being used with its default arguments, which results in an inner join.
#In an innner join you lose rows that don't match in the other DataFrame's key column
#This results in showing the full team for each season Manchester United played.

MU_inner_merged = pd.merge(PL_Seasons, MU_Player)
MU_inner_merged

Unnamed: 0,Season,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Qualification or relegation,Name,Age
0,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,David De Gea,31
1,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Harry Maguire,29
2,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Victor Lindelof,28
3,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Luke Shaw,27
4,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Diogo Dalot,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237,2021,6,Manchester United,38,16,10,12,57,57,0,58,Qualification for the Europa League group stag...,Scott McTominay,25
238,2021,6,Manchester United,38,16,10,12,57,57,0,58,Qualification for the Europa League group stag...,Bruno Fernandes,27
239,2021,6,Manchester United,38,16,10,12,57,57,0,58,Qualification for the Europa League group stag...,Cristiano Ronaldo,37
240,2021,6,Manchester United,38,16,10,12,57,57,0,58,Qualification for the Europa League group stag...,Marcus Rashford,24


In [74]:
#In the below code we can see an outer merge being used with the how parameter, which results in an full outer join.
#In an outer join all rows from both DataFrames will be present in the new DataFrame.
#This results in showing the full team for each season Manchester United played and also all the other teams, with their
#player information showing NaN (Not a Number)
#No rows are lost in an outer join, even when they don’t have a match in the other DataFrame.

MU_outer_merged = pd.merge(
     PL_Seasons, MU_Player, how="outer", on=["Team"]
 )
MU_outer_merged

Unnamed: 0,Season,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts,Qualification or relegation,Name,Age
0,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,David De Gea,31.0
1,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Harry Maguire,29.0
2,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Victor Lindelof,28.0
3,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Luke Shaw,27.0
4,2000,1,Manchester United,38,24,8,6,79,31,48,80,Qualification for the Champions League first g...,Diogo Dalot,23.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,2020,16,Brighton & Hove Albion,38,9,14,15,40,46,-6,41,Not Applicable,,
656,2021,9,Brighton & Hove Albion,38,12,15,11,42,44,-2,51,Not Applicable,,
657,2017,16,Huddersfield Town,38,9,10,19,28,58,-30,37,Not Applicable,,
658,2018,20,Huddersfield Town,38,3,7,28,22,76,-54,16,Relegation to the EFL Championship,,


In [60]:
#Below we introduce a for loop which will help minimise the amount of code needed in the analysis.
#This for loop will loop continuously through the amount of values in the 'Wins' column until it has passed through them all
#For each row of the dataframe the for loop will check using the condition statement if and elif
#whether the teams wins that year were above the 22 year PL average.
#This PL win average was defined earlier in the project as an integer and is now being called back here as it is relevent.
#If the team had above average wins that year then the loop will return a sentence stating the year, team name and 
#that they have won more than the PL average, else it will return that they won less games than the average.
#The colour class is also used in the return to emphasise the result and help the readability of the sentence.
#The NumPy function .any() is used here to test whether any array element along a given axis evaluates to True.
#Without this function the loop would return a error saying a series cannot be used.

for i in range(len(PL_Seasons['W'])):
    if (PL_Seasons['W'][i] > PL_Average_W).any():
        print("In",PL_Seasons['Season'][i],PL_Seasons['Team'][i],"won ",colour.GREEN + color.BOLD + colour.UNDERLINE +"more"+ colour.END," games than the 22 year Premier League average.")
    elif (PL_Seasons['W'][i] < PL_Average_W).any():
        print("In",PL_Seasons['Season'][i],PL_Seasons['Team'][i],"won ",colour.RED + color.BOLD+ colour.UNDERLINE +"less"+ colour.END," games than the 22 year Premier League average.")

In 2000 Manchester United won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Arsenal won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Liverpool won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Leeds United won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Ipswich Town won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Chelsea won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Sunderland won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2000 Aston Villa won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2000 Tottenham Hotspur won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2000 Middlesbrough won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2000 West Ham United won  [91m[1m[4mles

In 2007 West Ham United won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Tottenham Hotspur won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Newcastle United won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Middlesbrough won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Wigan Athletic won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Sunderland won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Bolton Wanderers won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Fulham won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Reading won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Birmingham City won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2007 Derby County won  [91m

In 2014 Southampton won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2014 Swansea City won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2014 Stoke City won  [92m[1m[4mmore[0m  games than the 22 year Premier League average.
In 2014 Crystal Palace won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 Everton won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 West Ham United won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 West Bromwich Albion won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 Leicester City won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 Newcastle United won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 Sunderland won  [91m[1m[4mless[0m  games than the 22 year Premier League average.
In 2014 Aston Villa won  [91m

In [76]:
#XGA4O6I3IUUXDA1P - API Key
#Here we use alphavantage in order to retrieve live data from their online API
#In this example we are pulling the stock information of the ticker symbol MANU
#This ticker symbol is Manchester United's stock price, which at the time of writing this was 11.16 USD

import requests 

url = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol=MANU&interval=5min&apikey=XGA4O6I3IUUXDA1P'
r = requests.get(url)
data = r.json()

print(data)

{'Meta Data': {'1. Information': 'Intraday (5min) open, high, low, close prices and volume', '2. Symbol': 'MANU', '3. Last Refreshed': '2022-07-19 16:05:00', '4. Interval': '5min', '5. Output Size': 'Compact', '6. Time Zone': 'US/Eastern'}, 'Time Series (5min)': {'2022-07-19 16:05:00': {'1. open': '11.1600', '2. high': '11.1600', '3. low': '11.1600', '4. close': '11.1600', '5. volume': '883'}, '2022-07-19 16:00:00': {'1. open': '11.1800', '2. high': '11.1800', '3. low': '11.1300', '4. close': '11.1600', '5. volume': '44937'}, '2022-07-19 15:55:00': {'1. open': '11.1700', '2. high': '11.1800', '3. low': '11.1600', '4. close': '11.1800', '5. volume': '6854'}, '2022-07-19 15:50:00': {'1. open': '11.1700', '2. high': '11.1750', '3. low': '11.1650', '4. close': '11.1700', '5. volume': '3645'}, '2022-07-19 15:45:00': {'1. open': '11.1800', '2. high': '11.1850', '3. low': '11.1700', '4. close': '11.1750', '5. volume': '4014'}, '2022-07-19 15:40:00': {'1. open': '11.1750', '2. high': '11.1750'

In [192]:
#This user defined function checks to see if the team typed into the function played in the premier league.
#This function uses the conditional statement if to check the dataframe for the function's argument

def print_player_names(i):
    if (i == PL_Seasons['Team']).any():
        print("This team", colour.GREEN + color.BOLD + "did" + colour.END, "play in the premier league.")
    else:
        print("This team", colour.RED + color.BOLD + "didn't" + colour.END, "play in the premier league.")
    
    

In [193]:
#An example of a team that is in the premier league
print_player_names('Chelsea')

This team [92m[1mdid[0m play in the premier league.


In [194]:
#An example of a team that isn't in the premier league
print_player_names('Barcelona')

This team [91m[1mdidn't[0m play in the premier league.


In [202]:
import plotly.express as px
df = PL_Seasons
fig = px.histogram(df, x="Team",y="W")
fig.show()

In [219]:
#Using Plotly we can visualise the dataframe in a multitude of different ways
#Firstly below is a histogram chart of the total wins by each team thats participated in the premier league.
#By simply hovering over each bin the user can get a view of how much wins each team exactly had

import plotly.express as px
df = PL
fig = px.histogram(df, x="Team",y="W", title="Total Wins")
fig.show()

In [232]:
#Secondly below is a bar chart of the total wins by each team thats participated in the premier league.
#By simply hovering over each bin the user can get a view of how much wins each team exactly per season, starting with
#the earliest season they participated in at the bottom.
#Using the color attribute on points, we can also visually see how each teams points tally was in each season.
#The legend to the right of the graph shows that the higher the points tally the brighter the season will display.
#By defining Hover_data we can expand on the information providing giving users the full W/L/D/Points data for each season

fig = px.bar(PL_Seasons, x='Team', y='W', hover_data=['Team','Season','Pos', 'W', 'D', 'L', 'Pts'],
             title="Breakdown of Total Wins by Season", color = "Pts")
fig.show()

In [240]:
#Here we see the visualisation of scatter graphs being used with Goal Difference, Goals Against and Goals For.
#The colors correspond with the points gained with the more points resulting in brighter datapoints

fig = px.scatter_matrix(PL_Seasons,dimensions=["Pos", "GD", "GA", "GF"],
    hover_data=['Team','Season', 'GA', 'GF', 'Pts','Pos'],color="Pts")
fig.show()