# Ahmad Saleem Mirza

## Research question/interests

- What play styles (ground, aerial) have higher chances to win a game?
- Do players from specific regions play more aggressively or passively than those from other regions (movement_total_distance, core_shot & demos)?
- How many 'interesting' games were there? (Interesting here is games with a higher than avg save count)
- Which teams (and which regions) have more teamwork involved (looking at team assists here)
- Notable statistical differences from local tournaments upto the world championships

In [9]:
import pandas as pd
import numpy as np
#import project_functions1 as pf

In [11]:
df = pd.read_csv('../data/raw/matches_by_teams.csv')

#Removing missing data
df = df.dropna()

#Removing unwanted columns
df = df.drop(['team_slug', 'demo_taken', 'movement_count_powerslide', 'core_score', 'core_shooting_percentage', 'color', 'team_id'], axis=1)
df = df[df.columns.drop(list(df.filter(regex='positioning*')))]
df = df.rename(columns={'movement_time_high_air': 'air_time'})
df = df[df.columns.drop(list(df.filter(regex='movement_time*')))]
df = df[df.columns.drop(list(df.filter(regex='boost*')))]

#df = pf.load_clean('../data/raw/matches_by_teams.csv')
df

Unnamed: 0,match_id,team_name,team_region,core_shots,core_goals,core_saves,core_assists,movement_total_distance,air_time,demo_inflicted,score,winner
0,6159ad3d143c37878b2384a9,GROUND ZERO GAMING,Oceania,34.0,9.0,8.0,7.0,4694698.0,210.83,6.0,3,True
1,6159ad3d143c37878b2384a9,RANGA ROUNDUP,Oceania,15.0,5.0,21.0,3.0,4774768.0,175.53,7.0,0,False
2,6159ad3d143c37878b2384aa,RENEGADES,Oceania,46.0,21.0,10.0,21.0,5098012.0,238.28,11.0,3,True
3,6159ad3d143c37878b2384aa,TRIDENT ESPORTS,Oceania,16.0,4.0,16.0,3.0,4893312.0,174.47,7.0,0,False
4,6159ad3d143c37878b2384ab,DIRE WOLVES,Oceania,46.0,12.0,13.0,9.0,6680885.0,290.85,13.0,3,True
...,...,...,...,...,...,...,...,...,...,...,...,...
10589,62e02371da9d7ca1c7bb2f61,TEAM BDS,Europe,61.0,18.0,29.0,14.0,10010525.0,523.30,19.0,4,True
10590,62e02371da9d7ca1c7bb2f62,G2 ESPORTS,North America,46.0,14.0,38.0,12.0,9022216.0,485.48,22.0,4,True
10591,62e02371da9d7ca1c7bb2f62,FAZE CLAN,North America,50.0,5.0,26.0,4.0,8961362.0,459.89,15.0,2,False
10592,62e02371da9d7ca1c7bb2f63,TEAM BDS,Europe,36.0,10.0,19.0,8.0,7387374.0,355.54,11.0,4,True


In [3]:
# Data for Q1
mean_air_time = df["air_time"].mean()

# Data for Q2
mean_demos = df["demo_inflicted"].mean()
mean_shots = df["core_shots"].mean()
mean_total_distance = df["movement_total_distance"].mean()

# Data for Q3
mean_save_count = df["core_saves"].mean()

# Data for Q4
mean_assists = df["core_assists"].mean()
print(mean_air_time)

261.1549642114985


In [4]:
#Importing Regional and Splits
df2 = pd.read_csv('../data/raw/main.csv')[['match_id', 'event', 'event_split']].drop_duplicates()
result = pd.merge(df , df2 , on='match_id')

result

Unnamed: 0,match_id,team_name,team_region,core_shots,core_goals,core_saves,core_assists,movement_total_distance,air_time,demo_inflicted,score,winner,event,event_split
0,6159ad3d143c37878b2384a9,GROUND ZERO GAMING,Oceania,34.0,9.0,8.0,7.0,4694698.0,210.83,6.0,3,True,Regional 1,Fall
1,6159ad3d143c37878b2384a9,RANGA ROUNDUP,Oceania,15.0,5.0,21.0,3.0,4774768.0,175.53,7.0,0,False,Regional 1,Fall
2,6159ad3d143c37878b2384aa,RENEGADES,Oceania,46.0,21.0,10.0,21.0,5098012.0,238.28,11.0,3,True,Regional 1,Fall
3,6159ad3d143c37878b2384aa,TRIDENT ESPORTS,Oceania,16.0,4.0,16.0,3.0,4893312.0,174.47,7.0,0,False,Regional 1,Fall
4,6159ad3d143c37878b2384ab,DIRE WOLVES,Oceania,46.0,12.0,13.0,9.0,6680885.0,290.85,13.0,3,True,Regional 1,Fall
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8657,62e02371da9d7ca1c7bb2f61,TEAM BDS,Europe,61.0,18.0,29.0,14.0,10010525.0,523.30,19.0,4,True,World Championship,Summer
8658,62e02371da9d7ca1c7bb2f62,G2 ESPORTS,North America,46.0,14.0,38.0,12.0,9022216.0,485.48,22.0,4,True,World Championship,Summer
8659,62e02371da9d7ca1c7bb2f62,FAZE CLAN,North America,50.0,5.0,26.0,4.0,8961362.0,459.89,15.0,2,False,World Championship,Summer
8660,62e02371da9d7ca1c7bb2f63,TEAM BDS,Europe,36.0,10.0,19.0,8.0,7387374.0,355.54,11.0,4,True,World Championship,Summer


In [5]:
# QUESTION 1: What play styles (ground, aerial) have higher chances to win a game?
# ANSWER: Staying on the ground is the more successful strategy overall, but at the higher levels of competition 
# (majors & world championship) aerial based plays are more successful. This could be because of the greater skill of players.

# Creating columns
q1 = result
q1["playstyle"] = np.where(df['air_time'] >= mean_air_time, "air", "ground")

print("Ground, Air (Total)")
print(q1.query("playstyle == 'ground' and winner and event_split == 'Fall'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Fall'").shape[0])
print(q1.query("playstyle == 'ground' and winner and event_split == 'Winter'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Winter'").shape[0])
print(q1.query("playstyle == 'ground' and winner and event_split == 'Spring'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Spring' ").shape[0])
print(q1.query("playstyle == 'ground' and winner and event_split == 'Summer'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Summer'").shape[0])

print("Ground, Air (Majors)")
print(q1.query("playstyle == 'ground' and winner and event_split == 'Fall' and event == 'Major'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Fall' and event == 'Major'").shape[0])
print(q1.query("playstyle == 'ground' and winner and event_split == 'Winter' and event == 'Major'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Winter' and event == 'Major'").shape[0])
print(q1.query("playstyle == 'ground' and winner and event_split == 'Spring' and event == 'Major'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Spring' and event == 'Major'").shape[0])
print(q1.query("playstyle == 'ground' and winner and event_split == 'Summer'and event == 'World Championship'").shape[0],
     q1.query("playstyle == 'air' and winner and event_split == 'Summer'and event == 'World Championship'").shape[0])

Ground, Air (Total)
840 665
838 646
658 624
1 59
Ground, Air (Majors)
13 28
11 27
15 16
1 59


In [6]:
# QUESTION 2: Do players from specific regions play more aggressively or passively than those from other regions 
# (movement_total_distance, core_shot & demos)?
# ANSWER: Yes. NA and EU played significantly more aggressively than the other regions by moving around more, shoooting at the net and demoing.

# Creating columns
q2 = result
q2["agg_pass"] = np.where((df['movement_total_distance'] >= mean_air_time) & 
                                    (df['core_shots'] >= mean_shots) & 
                                    (df['demo_inflicted'] >= mean_demos), "aggressive", "passive")

agg_count = {
    "Oceania": 0,
    "North America": 0,
    "South America": 0,
    "Europe": 0,
    "Middle East & North Africa": 0,
    "Sub-Saharan Africa": 0,
    "Asia-Pacific North": 0,
    "Asia-Pacific South": 0
}

agg_count["Oceania"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'Oceania'").shape[0]
agg_count["North America"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'North America'").shape[0]
agg_count["South America"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'South America'").shape[0]
agg_count["Europe"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'Europe'").shape[0]
agg_count["Middle East & North Africa"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'Middle East & North Africa'").shape[0]
agg_count["Sub-Saharan Africa"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'Sub-Saharan Africa'").shape[0]
agg_count["Asia-Pacific North"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'Asia-Pacific North'").shape[0]
agg_count["Asia-Pacific South"] = q2.query("agg_pass == 'aggressive' and winner and team_region == 'Asia-Pacific South'").shape[0]

print(agg_count)

#print("Aggressive, Passive (Total)")
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Winter'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Winter'").shape[0])
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Spring'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Spring' ").shape[0])
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Summer'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Summer'").shape[0])

#print("Aggressive, Passive (Majors)")
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Fall' and event == 'Major'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Fall' and event == 'Major'").shape[0])
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Winter' and event == 'Major'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Winter' and event == 'Major'").shape[0])
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Spring' and event == 'Major'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Spring' and event == 'Major'").shape[0])
#print(q2.query("agg_pass == 'aggressive' and winner and event_split == 'Summer'and event == 'World Championship'").shape[0],
#     q2.query("agg_pass == 'passive' and winner and event_split == 'Summer'and event == 'World Championship'").shape[0])
#print("//////////////////////////////")



{'Oceania': 198, 'North America': 335, 'South America': 228, 'Europe': 310, 'Middle East & North Africa': 192, 'Sub-Saharan Africa': 110, 'Asia-Pacific North': 150, 'Asia-Pacific South': 103}


In [7]:
# QUESTION 3: What percentage of 'interesting' games were there? (Interesting here is games with a higher than avg save count)
# ANSWER: Signifcant increase in interseting games across the season. Rapid spike in world championships

q3 = result
q3["interesting"] = np.where(df['core_saves'] > mean_save_count, True, False)
print(q3.query("interesting and event_split == 'Winter'").shape[0]/q3.query("event_split == 'Winter'").shape[0])
print(q3.query("interesting and event_split == 'Fall'").shape[0]/q3.query("event_split == 'Fall'").shape[0])
print(q3.query("interesting and event_split == 'Spring'").shape[0]/q3.query("event_split == 'Spring'").shape[0])
print(q3.query("interesting and event_split == 'Summer'").shape[0]/q3.query("event_split == 'Summer'").shape[0])

0.45148247978436656
0.4621262458471761
0.4641185647425897
0.7166666666666667


In [8]:
# QUESTION 4: Which teams (and which regions) have more teamwork involved (looking at team assists here, not wins)
# ANSWER: G2 Esports. One of the most famous e-sports organizations

q4 = result
q4["teamwork"] = np.where(df['core_assists'] > mean_assists, True, False)
q4_2 = q4.loc[q4['teamwork']]
top_teamwork_teams = q4_2['team_name'].value_counts().head()#.idxmax()
top_teamwork_teams

G2 ESPORTS             57
FURIA ESPORTS          55
TOKYO VERDY ESPORTS    55
DETONATOR              53
PIRATES EXDEE          50
Name: team_name, dtype: int64