In [1]:
import sqlite3
import pandas as pd

In [2]:
# Connect to the marvel database
conn = sqlite3.connect('marvel.db')

# Get the table names
query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql(query, conn)
table_names = tables["name"].tolist()

# Load the tables into DataFrames
if len(table_names) >= 2:
    stats_df = pd.read_sql(f"SELECT * FROM {table_names[0]} ORDER BY game_id DESC", conn)
    matches_df = pd.read_sql(f"SELECT * FROM {table_names[1]} ORDER BY game_id DESC", conn)

    # Display the DataFrames
    print(f"First table: {table_names[0]}")
    print(f"\nSecond table: {table_names[1]}")
else:
    print("Database contains less than two tables.")

# Close the connection
conn.close()

First table: stats

Second table: matches


In [3]:
matches_df.head()

Unnamed: 0,game_id,Map,Score,Replay ID,Match Length,Result,Game Mode
0,169,YGGDRASILL PATH,2:3,10461226090,14m 52s,LOSS,Competitive
1,168,ROYAL PALACE,1:2,10572222444,14m 53s,LOSS,Competitive
2,167,SYMBIOTIC SURFACE,1:2,10506427193,8m 0s,LOSS,Competitive
3,166,MIDTOWN,3:4,10976523234,19m 13s,LOSS,Competitive
4,165,HALL OF DJALIA,0:1,10622326105,6m 14s,LOSS,Competitive


In [4]:
# Extract Kills, Deaths, Assists from KDA
stats_df[['Kills', 'Deaths', 'Assists']] = stats_df['KDA'].str.extract(r'(\d+)\s*/\s*(\d+)\s*/\s*(\d+)')

# Convert columns to integers
stats_df[['Kills', 'Deaths', 'Assists']] = stats_df[['Kills', 'Deaths', 'Assists']].astype(int)

# Drop the KDA column
stats_df = stats_df.drop(columns=['KDA'])

# Define the new column order
new_column_order = [
    'game_id', 'Player', 'Rank', 'Kills', 'Deaths', 'Assists',
    'Damage', 'Dmg Taken', 'Healing', 'Heroes Played', 'Time Played'
]

# Reorder the DataFrame
stats_df = stats_df[new_column_order]

# Remove any non-numeric characters (like commas) and convert to numbers
numeric_columns = ['Damage', 'Dmg Taken', 'Healing']

for col in numeric_columns:
    stats_df[col] = stats_df[col].astype(str).str.replace(r'[^\d]', '', regex=True).astype(float)

In [5]:
# Ensure matches_df exists before proceeding
if 'matches_df' in locals() or 'matches_df' in globals():
    # Merge stats_df with matches_df to get game mode
    merged_df = stats_df.merge(matches_df[['game_id', 'Game Mode']], on='game_id', how='left')

    # Filter for Competitive games
    competitive_df = merged_df[merged_df['Game Mode'] == 'Competitive'].drop(columns=['Game Mode'])

else:
    print("Error: matches_df is not available. Ensure it is loaded before running this code.")

# Count how many Competitive games were played
num_games = competitive_df["game_id"].nunique()

# Display the result
print(f"Number of Competitive games: {num_games}")

Number of Competitive games: 158


In [6]:
# Define the target players
target_players = {"6macb", "TravyPG", "Camprin"}

# Find game_ids where all three players are present
game_counts = competitive_df.groupby("game_id")["Player"].apply(set)
valid_game_ids = game_counts[game_counts.apply(lambda players: target_players.issubset(players))].index

# Filter the DataFrame
filtered_df = competitive_df[competitive_df["game_id"].isin(valid_game_ids)]

# Count the number of unique game_id values
num_games = filtered_df["game_id"].nunique()

# Display the result
print(f"Number of games where 6macb, TravyPG, and Camprin were all present: {num_games}")

Number of games where 6macb, TravyPG, and Camprin were all present: 121


In [7]:
# Perform the join with matches_df on 'game_id'
full_df = filtered_df.merge(matches_df[['game_id', 'Map', 'Result', 'Game Mode']], 
                            on='game_id', how='left')

# Rename 'Result' to 'outcome' for consistency
full_df.rename(columns={'Result': 'outcome', 'Game Mode': 'game_mode'}, inplace=True)

# Display the first few rows
full_df.head(12)

Unnamed: 0,game_id,Player,Rank,Kills,Deaths,Assists,Damage,Dmg Taken,Healing,Heroes Played,Time Played,Map,outcome,game_mode
0,168,captt,Platinum 3,11,10,9,8495.0,15625.0,17734.0,Cloak and Dagger / Luna Snow,10m / 4m,ROYAL PALACE,LOSS,Competitive
1,168,unfortunate000,Gold 2,21,8,1,17214.0,48844.0,0.0,Dr. Strange / Magneto,8m / 6m,ROYAL PALACE,LOSS,Competitive
2,168,AgentStreusel,Gold 1,21,1,12,17341.0,4117.0,21115.0,Loki,14m,ROYAL PALACE,LOSS,Competitive
3,168,SoopaWoopa,Gold 1,35,3,0,20495.0,6229.0,0.0,Star Lord,14m,ROYAL PALACE,LOSS,Competitive
4,168,Edwinfast10,Platinum 3,23,10,9,16948.0,10895.0,0.0,Bucky,14m,ROYAL PALACE,LOSS,Competitive
5,168,TheFloat,Gold 1,15,5,16,10568.0,13540.0,21231.0,Invisible Woman,14m,ROYAL PALACE,LOSS,Competitive
6,168,SR405,Gold 1,11,6,24,2925.0,5200.0,31946.0,Rocket / Cloak and Dagger,9m / 5m,ROYAL PALACE,LOSS,Competitive
7,168,6macb,Gold 1,16,7,12,12654.0,25690.0,18883.0,Cloak and Dagger / Hulk,9m / 5m,ROYAL PALACE,LOSS,Competitive
8,168,TravyPG,Gold 1,12,10,0,15921.0,5628.0,0.0,Moon Knight / Iron Man / Hela / Majic,9m / 4m / 41s / 10s,ROYAL PALACE,LOSS,Competitive
9,168,Camprin,Gold 1,13,8,13,10071.0,19562.0,15845.0,Invisible Woman,14m,ROYAL PALACE,LOSS,Competitive


In [8]:
# Function to filter heroes based on Time Played
def filter_heroes_by_time(row):
    heroes = row["Heroes Played"].split(" / ")
    times = row["Time Played"].split(" / ")

    # Convert times to numeric values (in seconds)
    time_seconds = []
    for t in times:
        if 'm' in t:
            time_seconds.append(int(t.replace('m', '')) * 60)
        elif 's' in t:
            time_seconds.append(int(t.replace('s', '')))
        else:
            time_seconds.append(0)  # Handle unexpected cases

    # Filter out heroes with less than 60 seconds played
    filtered_heroes = [heroes[i] for i in range(len(heroes)) if time_seconds[i] >= 60]
    filtered_times = [times[i] for i in range(len(times)) if time_seconds[i] >= 60]

    # Return updated strings
    return pd.Series([" / ".join(filtered_heroes), " / ".join(filtered_times)])

# Apply the filtering function to the DataFrame
full_df[['Heroes Played', 'Time Played']] = full_df.apply(filter_heroes_by_time, axis=1)

# Display the updated DataFrame
full_df.head(12)

Unnamed: 0,game_id,Player,Rank,Kills,Deaths,Assists,Damage,Dmg Taken,Healing,Heroes Played,Time Played,Map,outcome,game_mode
0,168,captt,Platinum 3,11,10,9,8495.0,15625.0,17734.0,Cloak and Dagger / Luna Snow,10m / 4m,ROYAL PALACE,LOSS,Competitive
1,168,unfortunate000,Gold 2,21,8,1,17214.0,48844.0,0.0,Dr. Strange / Magneto,8m / 6m,ROYAL PALACE,LOSS,Competitive
2,168,AgentStreusel,Gold 1,21,1,12,17341.0,4117.0,21115.0,Loki,14m,ROYAL PALACE,LOSS,Competitive
3,168,SoopaWoopa,Gold 1,35,3,0,20495.0,6229.0,0.0,Star Lord,14m,ROYAL PALACE,LOSS,Competitive
4,168,Edwinfast10,Platinum 3,23,10,9,16948.0,10895.0,0.0,Bucky,14m,ROYAL PALACE,LOSS,Competitive
5,168,TheFloat,Gold 1,15,5,16,10568.0,13540.0,21231.0,Invisible Woman,14m,ROYAL PALACE,LOSS,Competitive
6,168,SR405,Gold 1,11,6,24,2925.0,5200.0,31946.0,Rocket / Cloak and Dagger,9m / 5m,ROYAL PALACE,LOSS,Competitive
7,168,6macb,Gold 1,16,7,12,12654.0,25690.0,18883.0,Cloak and Dagger / Hulk,9m / 5m,ROYAL PALACE,LOSS,Competitive
8,168,TravyPG,Gold 1,12,10,0,15921.0,5628.0,0.0,Moon Knight / Iron Man,9m / 4m,ROYAL PALACE,LOSS,Competitive
9,168,Camprin,Gold 1,13,8,13,10071.0,19562.0,15845.0,Invisible Woman,14m,ROYAL PALACE,LOSS,Competitive


In [9]:
hero_roles = {
    "Hulk": "Tank",
    "Punisher": "DPS",
    "Storm": "DPS",
    "Loki": "Healer",
    "Dr. Strange": "Tank",
    "Mantis": "Healer",
    "Hawkeye": "DPS",
    "Captain America": "Tank",
    "Rocket": "Healer",
    "Hela": "DPS",
    "Cloak and Dagger": "Healer",
    "Black Panther": "DPS",
    "Groot": "Tank",
    "Majic": "DPS",
    "Moon Knight": "DPS",
    "Luna Snow": "Healer",
    "Squirrel Girl": "DPS",
    "Black Widow": "DPS",
    "Iron Man": "DPS",
    "Venom": "Tank",
    "Spiderman": "DPS",
    "Magneto": "Tank",
    "Scarlet Witch": "DPS",
    "Thor": "Tank",
    "Mr. Fantastic": "DPS",
    "Bucky": "DPS",
    "Penny": "Tank",
    "Star Lord": "DPS",
    "Namor": "DPS",
    "Adam Warlocke": "Healer",
    "Jeff": "Healer",
    "Psylocke": "DPS",
    "Wolverine": "DPS",
    "Invisible Woman": "Healer",
    "Iron Fist": "DPS"
}