# Soccer Analytics

In [None]:
# import Python libraries

import pandas as pd
from pandas import read_csv
import plotly.graph_objects as go
import plotly.express as px

from IPython.display import YouTubeVideo

## Example: anonymized professional soccer event data
Data source: https://github.com/metrica-sports/sample-data

In [None]:
df_soccer = pd.read_csv("https://raw.githubusercontent.com/metrica-sports/sample-data/master/data/Sample_Game_1/Sample_Game_1_RawEventsData.csv");
df_soccer

In [None]:
px.scatter(df_soccer, x='Start X', y='Start Y', color='Type', hover_data=['Subtype'], title='All events during the game by on-field location')

In [None]:
Count = df_soccer.groupby("Type").size().reset_index(name="Count")
Count.sort_values('Count', ascending=False)

In [None]:
events_df = px.bar(Count, x='Type', y='Count', color="Type", title='Total counts for types of events')
events_df.update_layout(xaxis={'categoryorder':'total descending'})

In [None]:
px.scatter(df_soccer, x='Start Time [s]', y='Type', hover_data=['Subtype'], color='Team', title="Timeline of events by type for Away and Home")

In [None]:
px.scatter(df_soccer.loc[lambda df: (df['Team'] == 'Home') & (df['From'] == 'Player10'), :], x='Start Time [s]', y='Type', color='Type', title='Timeline of events by type for player 10 on Home team')

In [None]:
px.scatter(df_soccer.loc[lambda df: (df['Team'] == 'Home') & (df['Period'] == 2) & (df['Type'] == 'PASS'), :], x='Start X', y='Start Y', size='Period', size_max=6, title='Home team passes, second half')

## Accessing UEFA Champions League data to create graphs with Python

## Inputting data from a table to create a bar graph

### 🔎 Which techniques lead to goal scoring?
Data source: https://www.uefa.com/uefachampionsleague/season=2021/statistics/round=2001252/clubs/kind=goaltypes/index.html

In [None]:
url = 'https://www.uefa.com/uefachampionsleague/season=2021/statistics/round=2001252/clubs/kind=goaltypes/index.html'
import pandas as pd
df1 = pd.read_html(url)[0]
df1

In [None]:
labels = ['left foot', 'right foot', 'header', 'other']
values = [4, 13, 5, 0]
px.bar(x=labels, y=values, title='Bayern: Type of goal')

## Inputting data from a table to create a double bar graph

### 🔎 Comparing teams: Do similar techniques lead to goal scoring?

In [None]:
goal_type=['left foot', 'right foot', 'header', 'other']

bar_goal_type = go.Figure(data=[
    go.Bar(name='Bayern', x=goal_type, y=[4, 13, 5, 0]),
    go.Bar(name='Barcelona', x=goal_type, y=[6, 10, 1, 1])])

bar_goal_type.update_layout(title_text='Bayern versus Barcelona: Type of goal')

## Inputting data to create a circle graph

### 🔎 How do on target goals and unsuccessful goal attempts compare?

In [None]:
url = 'https://www.uefa.com/uefachampionsleague/season=2021/statistics/round=2001252/clubs/kind=attempts/index.html'
import pandas as pd
df2 = pd.read_html(url)[0]
df2

In [None]:
px.pie(names=['On target', 'Off target', 'Blocked', 'Against woodwork'], 
       values=[44, 42, 34, 4], 
       title='Bayern: Goal attempts')

### 🔎 How do ball possession and scoring relate?

### Making a csv file
Data source: https://www.uefa.com/uefachampionsleague/standings/
<br>Data were collected for the group phase (6 games per team) for the 2020-2021 season.

In [None]:
%%writefile possession.csv
Total goals,Goal difference,Average ball possession (%),Team
18,16,61,Bayern
16,11,57,Barcelona
16,7,44,Monchengladbach
15,5,50,Man. United
14,12,54,Chelsea
14,10,51,Juventus
13,12,59,Man. City
13,7,54,Paris
12,7,56,Dortmund
11,2,58,Real Madrid
11,-1,51,Leipzig
11,4,47,Lazio
10,7,53,Liverpool
10,7,41,Porto
10,-7,48,RB Salzburg
10,2,47,Atalanta
9,1,57,Sevilla
8,-2,51,Club Brugge
7,0,55,Ajax
7,-2,51,Inter Milan
7,-1,50,Atletico Madrid
7,-11,45,Istanbul Basaksehir
6,-5,40,Krasnodar
5,-12,47,Ferencvaros
5,-7,47,Shakhtar Donetsk
5,-5,42,Lokomotiv Moskva
4,-9,47,Zenit
4,-9,46,Midtjylland
4,-9,45,Dynamo Kyiv
3,-8,50,Rennes
2,-8,50,Olympiacos
2,-11,50,Marseille

In [None]:
possession_df = pd.read_csv('possession.csv')
possession_df.sort_values('Average ball possession (%)', ascending=False)

In [None]:
possession_df['Average ball possession (%)'].min()

In [None]:
possession_df['Average ball possession (%)'].max()

In [None]:
range = (possession_df['Average ball possession (%)'].max()) - (possession_df['Average ball possession (%)'].min())
print(range)

In [None]:
possession_df['Average ball possession (%)'].median()

In [None]:
possession_df['Average ball possession (%)'].mean()

In [None]:
bar_df = px.bar(possession_df, x='Team', y='Average ball possession (%)', title='Average ball possession (%) by team')
bar_df.update_layout(xaxis={'categoryorder':'total descending'})

In [None]:
scatter_total_df = px.scatter(possession_df,
                    x="Average ball possession (%)", y="Total goals", hover_data=["Team"], trendline="ols",
                    title="Relationship between average ball possession (%) and total goals")
scatter_total_df.show()

In [None]:
scatter_difference_df = px.scatter(possession_df,
                    x="Average ball possession (%)", y="Goal difference",
                    size="Total goals", color="Team",
                    title="Relationship between average ball possession (%) and goal difference by team")
scatter_difference_df.show()