# Manchester United Analysis

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
pd.set_option('display.max_rows', None)
import plotly.offline as py
import plotly.graph_objects as go

In [3]:
df = pd.read_csv(r'C:\Users\Chongkyung\Documents\github\fifa_analysis\FIFA-21 Complete.csv',sep=';', index_col='player_id')

In [4]:
# in the FIFA21 player database, the team names had a space at the end: truncated.
df['team'] = df['team'].str.rstrip()

In [58]:
# Let's look at the raw FIFA stats first and see how Manchester United
# compares to other clubs in the world and especially their PL rivals.
# Manchester United ranks 15th in the world, by mean FIFA21 overall stats,
# Man City and Liverpool are the only PL clubs with higher mean overall stats
team_stat = df.groupby(['team'])['overall'].mean().sort_values(ascending=False)
team_stat.head(15)

team
Juventus               83.043478
FC Bayern München      81.565217
Napoli                 79.730769
Real Madrid            79.606061
Paris Saint-Germain    78.866667
Inter                  78.576923
FC Barcelona           78.090909
Bayer 04 Leverkusen    77.916667
Manchester City        76.875000
SL Benfica             76.600000
Lazio                  76.500000
Liverpool              76.454545
Atlético Madrid        76.424242
Milan                  76.040000
Manchester United      76.030303
Name: overall, dtype: float64

In [35]:
# It is commonly accepted by FIFA fans that world class players have at
# least a 85 overall rating. By this popular metric, Manchester United has
# only 3 world class players, less than Man City's 11, Liverpool's 10,
# and even Tottenham's 5.
best_itw=df.loc[df['overall'] > 84 ].sort_values(by = ['overall'],ascending=False)
best_itw['team'].value_counts().head(15)

FC Barcelona           12
Manchester City        11
Real Madrid            11
Liverpool              10
Paris Saint-Germain     9
Juventus                8
FC Bayern München       8
Inter                   6
Tottenham Hotspur       5
Borussia Dortmund       5
Atlético Madrid         4
Manchester United       3
Napoli                  3
Lazio                   3
Milan                   2
Name: team, dtype: int64

In [7]:
#Average Position Analysis
links = ["https://en.wikipedia.org/wiki/2009%E2%80%9310_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2010%E2%80%9311_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2011%E2%80%9312_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2012%E2%80%9313_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2013%E2%80%9314_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2014%E2%80%9315_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2015%E2%80%9316_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2016%E2%80%9317_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2017%E2%80%9318_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2018%E2%80%9319_Manchester_United_F.C._season","https://en.wikipedia.org/wiki/2019%E2%80%9320_Manchester_United_F.C._season"]
league_pos = []

In [8]:
def league_pos_extractor(link,league):
    manu_df = pd.read_html(link)
    if len(manu_df[4]) ==38:
        positions = manu_df[4].iloc[:,-1].values.tolist()
    elif link == "https://en.wikipedia.org/wiki/2018%E2%80%9319_Manchester_United_F.C._season":
        positions = manu_df[3].iloc[:,-2].values.tolist()
    else:
        positions = manu_df[3].iloc[:,-1].values.tolist()
    league.append(positions)

In [9]:
for link in links:
    league_pos_extractor(link,league_pos)
agg_lg_pos = pd.DataFrame(league_pos)

In [36]:
df2 = agg_lg_pos.T
df2.columns = ["09/10","10/11","11/12","12/13","13/14","14/15","15/16","16/17","17/18","18/19","19/20"]

In [37]:
for col in df2:
    if col == "15/16":
        df2[col] = df2[col].str[0]
    else:
        df2[col] = df2[col].str[:-2]
    df2[col] = pd.to_numeric(df2[col])

In [13]:
df2.index = range(1,39)

In [14]:
df2

Unnamed: 0,09/10,10/11,11/12,12/13,13/14,14/15,15/16,16/17,17/18,18/19,19/20
1,9,4,3,16,1,16,4,1,1,7,2
2,10,3,2,7,4,13,1,1,1,9,4
3,3,3,1,5,7,14,2,2,1,13,5
4,3,3,1,2,5,9,5,3,1,10,7
5,2,3,1,2,8,12,2,7,2,8,4
6,2,2,1,3,12,7,2,6,2,7,8
7,1,3,1,2,9,4,1,6,2,10,10
8,1,3,2,2,8,6,3,7,2,8,12
9,1,3,2,2,8,8,2,7,2,10,14
10,2,3,2,1,8,9,4,8,2,8,7


In [83]:
for col in df2:
    print(np.mean(df2[col]))

2.210526315789474
1.6578947368421053
1.6578947368421053
1.868421052631579
7.0
5.2631578947368425
4.2631578947368425
5.473684210526316
1.9210526315789473
6.7368421052631575
6.2105263157894735


In [15]:
pd.options.plotting.backend = "plotly"

In [24]:
fig = df2.plot(labels = dict(index="Gameweek",value="League Position",variable="season"))
fig['layout']['yaxis']['autorange'] = "reversed"

fig.show(include_plotlyjs=False,output_type='div')

# fig.write_html(r"C:\Users\Chongkyung\Desktop\fifa_fig.html")
# plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')
# py.plot(fig,filename ="fig1.html",auto_open=False)

In [94]:
fig2 = go.Figure(data=[go.Table(header=dict(values=['Season','Average Position', 'Final Position']),
                 cells=dict(values=[df2.columns,[2.210526315789474,
1.6578947368421053,
1.6578947368421053,
1.868421052631579,
7.0,
5.2631578947368425,
4.2631578947368425,
5.473684210526316,
1.9210526315789473,
6.7368421052631575,
6.2105263157894735], [2,1,2,1,7,4,5,6,2,6,3]]))
                     ])
fig2.show()

fig.write_html(r"C:\Users\Chongkyung\Desktop\fifa_fig1.html")

In [None]:
# We can see that since the 09/10 season, the team usually finishes at the position that it stays near for the entire season.
# The over-performance of 19/20 season, from an average to 6.2 to 3, can be attributed to Bruno Fernandes, 
# which we will look further into.