<h3><strong>Import Libraries</strong></h3>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Change Pandas Display Options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)  

<h3><strong>Read and Verify CSV</strong></h3>

In [None]:
# Read CSV
df = pd.read_csv('battles.csv')

# Print first five lines of CSV
print(df.head())

<h3><strong>Data Cleaning & Initial Inspection</strong></h3>

In [None]:
# Shape of Dataframe
print(df.shape)

In [None]:
# All Columns in Dataframe
cols = df.columns.tolist()
print(cols)

In [None]:
# Print Numerical Columns
numerical_cols = df.select_dtypes(include=np.number).columns
print(numerical_cols)

In [None]:
# Print Data Types of Each Column
print(df.dtypes)

In [None]:
# Null Values
print(df.isnull().sum())

In [None]:
# Print Number of Unique Values
print(df.nunique())

<h5>Handling Null Values & Dropping Uninfluential Columns</h5>

In [None]:
# Dropping Columns (Columns with not mutch meaning/influence)
df = df.drop(columns=['Unnamed: 0', 'winner.tag', 'winner.clan.tag', 'winner.clan.badgeId', 'loser.tag', 'loser.clan.tag', 'loser.clan.badgeId', 'tournamentTag' ])

In [None]:
# Resolving Null Values to 0
df['winner.princessTowersHitPoints'] = df['winner.princessTowersHitPoints'].fillna(0)
df['loser.kingTowerHitPoints'] = df['loser.kingTowerHitPoints'].fillna(0)
df['loser.princessTowersHitPoints'] = df['loser.princessTowersHitPoints'].fillna(0)

In [None]:
# Reason we are able to resolve null values to 0

# Only appears when loser crowns is 2 which means that both winner and loser princess towers are destroyed (0 hitpoints)
print('Winner Princess Tower Hit Points to Loser Crowns:')
print(df.groupby(df['winner.princessTowersHitPoints'].isna())['loser.crowns'].describe())

# Null values appear when winner crowns is 3, which means that the king tower was destroyed (0 hit points)
print('\nLoser King Tower Hit Points to Winner Crowns:')
print(df.groupby(df['loser.kingTowerHitPoints'].isna())['winner.crowns'].describe())

# # Null values appear only when both princess towers are destroyed (0 hit points)
print('\nLoser Princess Tower Hit Points to Winner Crowns:')
print(df.groupby(df['loser.princessTowersHitPoints'].isna())['winner.crowns'].describe())

In [None]:
# Check for null values again
print(df.isnull().sum())

<h3><strong>Exploratory Data Analysis</strong></h3>

In [None]:
# Summary Statistics
print(df.describe())

In [None]:
# Correlation Heatmap (How much does one variable affect the other)
plt.figure(figsize=(20, 20))
co_mtx = df.corr(numeric_only=True)
sns.heatmap(co_mtx, cmap='coolwarm', fmt='.2f', annot=True)
plt.show()

In [None]:
# Bar Plot of Cards used the most
card_cols = [
    'winner.card1.id', 'winner.card2.id', 'winner.card3.id', 'winner.card4.id',
    'winner.card5.id', 'winner.card6.id', 'winner.card7.id', 'winner.card8.id',
    'loser.card1.id', 'loser.card2.id', 'loser.card3.id', 'loser.card4.id',
    'loser.card5.id', 'loser.card6.id', 'loser.card7.id', 'loser.card8.id'
]

all_cards = df[card_cols].values.flatten()
card_counts = pd.Series(all_cards).value_counts()

plt.figure(figsize=(14, 10))
sns.barplot(x=card_counts.index, y=card_counts.values)
plt.title("Frequency of Cards Used in All Matches")
plt.xlabel("Card ID")
plt.ylabel("Usage Count")
plt.xticks(rotation=90)
plt.show()


In [None]:
# Show the ten most used cards
top10 = card_counts.head(10)

plt.figure(figsize=(14, 10))
sns.barplot(x=top10.index, y=top10.values)
plt.title("Frequency of Cards Used in All Matches")
plt.xlabel("Card ID")
plt.ylabel("Usage Count")
plt.xticks(rotation=90)
plt.show()

In [None]:
# Histogram of Winner Elixir Average vs Loser Elixir Average
plt.figure(figsize=(14, 10))
sns.boxplot(data=df[['winner.elixir.average', 'loser.elixir.average']])
plt.title("Elixir Average Distribution (Winner vs Loser)")
plt.ylabel("Elixir Average")
plt.show()

# Shows that elixir average alone doesn't determine match outcomes

<h3><strong>Data Prep for Model Building</strong></h3>

<h5>Creating a new dataframe with winning and losing cards in different rows and if the deck won (won = 0 or 1)</h5>

<h5>Creating a new dataframe which includes winning and losing cards from each match on the same row and which deck won (won = 0 or 1)</h5>

<h3><strong>Model Building</strong> (not sure what model to use right now)</h3>

<h5>Predicting overall win rate of one deck</h5>

<h5>Predicting overall win rate of deck A vs deck B</h5>