# Data Exploration

This notebook is used for exploratory data analysis on player and fixture data. The goal is to visualize trends and insights that can inform team selection strategies.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load player data
players_df = pd.read_csv('../data/players.csv')

# Load fixture data
fixtures_df = pd.read_csv('../data/fixtures.csv')

# Display the first few rows of player data
players_df.head()

In [None]:
# Display the first few rows of fixture data
fixtures_df.head()

In [None]:
# Visualize the distribution of player costs
plt.figure(figsize=(10, 6))
sns.histplot(players_df['now_cost'], bins=30, kde=True)
plt.title('Distribution of Player Costs')
plt.xlabel('Cost (in tenths of millions)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Visualize expected points by player position
plt.figure(figsize=(10, 6))
sns.boxplot(x='element_type', y='expected_points_next_5', data=players_df)
plt.title('Expected Points by Player Position')
plt.xlabel('Player Position')
plt.ylabel('Expected Points (Next 5 Matches)')
plt.xticks(ticks=[0, 1, 2, 3], labels=['GK', 'DEF', 'MID', 'FWD'])
plt.show()

In [None]:
# Analyze fixture difficulty
plt.figure(figsize=(10, 6))
sns.countplot(x='fixture_difficulty', data=fixtures_df)
plt.title('Count of Fixtures by Difficulty')
plt.xlabel('Fixture Difficulty')
plt.ylabel('Count')
plt.show()

In [None]:
# Correlation heatmap of player attributes
plt.figure(figsize=(12, 8))
correlation_matrix = players_df.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap of Player Attributes')
plt.show()