# Premier league Analysis
## Imports

In [1004]:
import pandas as pd
import altair as alt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import ConfusionMatrixDisplay, precision_score, recall_score
import matplotlib as plt

## Setup

In [1005]:
pd.options.display.max_columns = None

## Data Sourcing

In [1006]:
premier_league_data = pd.read_csv("premier_league_data.csv")

## Data Cleaning

In [1007]:
premier_league_data.head()

Unnamed: 0.1,Unnamed: 0,season,match_week,home_team,away_team,score
0,0,2012,1,Arsenal,Sunderland,0 - 0
1,1,2012,1,Fulham,Norwich City,5 - 0
2,2,2012,1,Queens Park Rangers,Swansea City,0 - 5
3,3,2012,1,Reading,Stoke City,1 - 1
4,4,2012,1,West Bromwich Albion,Liverpool,3 - 0


### Remove Unnamed Column

In [1008]:
premier_league_data = premier_league_data.drop(columns=["Unnamed: 0"])

### Add home_score Column

In [1009]:
premier_league_data["home_score"] = premier_league_data["score"].apply(lambda x: x.split(" - ")[0])

### Add away_score Column

In [1010]:
premier_league_data["away_score"] = premier_league_data["score"].apply(lambda x: x.split(" - ")[1])

### Alter home_score and away_score Type

In [1011]:
premier_league_data["home_score"] = premier_league_data["home_score"].astype("int")

In [1012]:
premier_league_data["away_score"] = premier_league_data["away_score"].astype("int")

### Add winner Column

In [1013]:
def find_winner(x) -> str:
    """Returns the winning team based on score."""
    if x["home_score"] > x["away_score"]:
        return x["home_team"]
    if x["home_score"] < x["away_score"]:
        return x["away_team"]
    return "Draw"

In [1014]:
premier_league_data["winner"] = premier_league_data.apply(find_winner, axis=1)

## Data Exploration

In [1015]:
premier_league_data.head()

Unnamed: 0,season,match_week,home_team,away_team,score,home_score,away_score,winner
0,2012,1,Arsenal,Sunderland,0 - 0,0,0,Draw
1,2012,1,Fulham,Norwich City,5 - 0,5,0,Fulham
2,2012,1,Queens Park Rangers,Swansea City,0 - 5,0,5,Swansea City
3,2012,1,Reading,Stoke City,1 - 1,1,1,Draw
4,2012,1,West Bromwich Albion,Liverpool,3 - 0,3,0,West Bromwich Albion


In [1016]:
win_count = premier_league_data["winner"].value_counts().reset_index()
win_count = win_count[win_count["winner"] != "Draw"]

In [1017]:
alt.Chart(win_count).mark_bar().encode(
    x = "winner",
    y = "count"
)