In [2]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv("arsenal_data_since_2019.csv")

# Show the first few rows
df.head()

# Separate home and away games
home_games = df[df['HomeTeam'] == 'Arsenal']
away_games = df[df['AwayTeam'] == 'Arsenal']

# Calculate results
home_wins = len(home_games[home_games['FullTimeResult'] == 'H'])
away_wins = len(away_games[away_games['FullTimeResult'] == 'A'])
home_draws = len(home_games[home_games['FullTimeResult'] == 'D'])
away_draws = len(away_games[away_games['FullTimeResult'] == 'D'])
home_losses = len(home_games[home_games['FullTimeResult'] == 'A'])
away_losses = len(away_games[away_games['FullTimeResult'] == 'H'])

print("Total Number of Home Wins:", home_wins)
print("Total Number of Away Wins:", away_wins)
print("Total Number of Home Draws:", home_draws)
print("Total Number of Away Draws:", away_draws)
print("Total Number of Home Losses:", home_losses)
print("Total Number of Away Losses:", away_losses)

# Bar chart of results
labels = ['Home Wins', 'Away Wins', 'Home Draws', 'Away Draws', 'Home Losses', 'Away Losses']
values = [home_wins, away_wins, home_draws, away_draws, home_losses, away_losses]

plt.bar(labels, values)
plt.xlabel('Result')
plt.ylabel('Count')
plt.title('Arsenal Wins, Draws, and Losses After December 20, 2019')
plt.xticks(rotation=45, ha='right')
plt.show()

# Goals scored and conceded
home_goals_scored = home_games['FullTimeHomeTeamGoals'].sum()
away_goals_scored = away_games['FullTimeAwayTeamGoals'].sum()
home_goals_conceded = home_games['FullTimeAwayTeamGoals'].sum()
away_goals_conceded = away_games['FullTimeHomeTeamGoals'].sum()

labels = ['Home Scored', 'Away Scored', 'Home Conceded', 'Away Conceded']
values = [home_goals_scored, away_goals_scored, home_goals_conceded, away_goals_conceded]

plt.bar(labels, values)
plt.xlabel('Goals')
plt.ylabel('Count')
plt.title('Arsenal Goals Scored and Conceded (Home and Away)')
plt.xticks(rotation=45, ha='right')
plt.show()

print("Total number of goals scored at home:", home_goals_scored)
print("Total number of goals scored away from home:", away_goals_scored)

# Win distribution by goal advantage
wins = df[
    ((df['HomeTeam'] == 'Arsenal') & (df['FullTimeResult'] == 'H')) |
    ((df['AwayTeam'] == 'Arsenal') & (df['FullTimeResult'] == 'A'))
].copy()
wins['GoalDifference'] = abs(wins['FullTimeHomeTeamGoals'] - wins['FullTimeAwayTeamGoals'])
goal_advantage_counts = wins.groupby('GoalDifference').size()
goal_advantage_percentages = goal_advantage_counts / goal_advantage_counts.sum() * 100

plt.figure(figsize=(8, 6))
plt.bar(goal_advantage_percentages.index, goal_advantage_percentages.values)
plt.xlabel('Goal Advantage')
plt.ylabel('Percentage of Wins')
plt.title('Arsenal Win Distribution by Goal Advantage (Since Dec 20, 2019)')
for i, v in enumerate(goal_advantage_percentages.values):
    plt.text(goal_advantage_percentages.index[i], v + 1, f'{v:.1f}%', ha='center')
plt.show()

# Loss distribution by goal difference
losses = df[
    ((df['HomeTeam'] == 'Arsenal') & (df['FullTimeResult'] == 'A')) |
    ((df['AwayTeam'] == 'Arsenal') & (df['FullTimeResult'] == 'H'))
].copy()
losses['GoalDifference'] = abs(losses['FullTimeHomeTeamGoals'] - losses['FullTimeAwayTeamGoals'])
loss_goal_diff_counts = losses.groupby('GoalDifference').size()
loss_goal_diff_percentages = loss_goal_diff_counts / loss_goal_diff_counts.sum() * 100

plt.figure(figsize=(8, 6))
plt.bar(loss_goal_diff_percentages.index, loss_goal_diff_percentages.values)
plt.xlabel('Goal Difference in Losses')
plt.ylabel('Percentage of Losses')
plt.title('Arsenal Loss Distribution by Goal Difference (Since Dec 20, 2019)')
for i, v in enumerate(loss_goal_diff_percentages.values):
    plt.text(loss_goal_diff_percentages.index[i], v + 1, f'{v:.1f}%', ha='center')
plt.show()
print(loss_goal_diff_counts)

ModuleNotFoundError: No module named 'pandas'