# FUMBBL data report

Report a few analyses. with types of matches etc, star player usage etc. 



## Read the FUMBBL data

In [None]:
import pandas as pd
import numpy as np
import plotnine as p9

from mizani.formatters import date_format

# point this to the location of the HDF5 datasets
path_to_datasets = '../datasets/current/'

# FUMBBL matches
target = 'df_matches.csv'
df_matches = pd.read_csv(path_to_datasets + target) 

# FUMBBL matches by team
target = 'df_mbt.csv'
df_mbt = pd.read_csv(path_to_datasets + target) 

# FUMBBL inducements
target = 'inducements.csv'
inducements = pd.read_csv(path_to_datasets + target) 

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)


# Fix and enrich data

In [None]:
# FUMBBL quartely volumes
target = 'fumbbl_match_counts.csv'
fumbbl_volumes = pd.read_csv('../codelists/' + target, sep = ';') 
fumbbl_volumes['quarter_date'] = fumbbl_volumes['year'].astype(str) + '-Q' + fumbbl_volumes['quarter'].astype(str)
fumbbl_volumes['quarter_date'] = pd.to_datetime(fumbbl_volumes['quarter_date'])

In [None]:
df_matches['match_date'] = pd.to_datetime(df_matches['match_date'])
df_matches['week_date'] = pd.to_datetime(df_matches['week_date'])

df_matches['quarter'] = df_matches['match_date'].dt.to_period('Q')

df_matches['quarter_date'] = pd.PeriodIndex(df_matches['quarter'] , freq='Q').to_timestamp()

df_matches.loc[df_matches['scheduler'].str.contains("Blackbox"), 'division_name'] = 'Blackbox'

In [None]:
df_matches[0:5]

# What data do we have? Weekly game volumes


Let's see what we've got! The pandas DataFrame `df_matches` contains records for all matches played on FUMBBL from august 2020 onwards.

Since we have a proper `datetime` type variable for each week (`week_date`), we can use `pandas` and `plotnine` to plot the weekly game volume as a time series.

The introduction of the new **Competitive division** with BB2020 rules is marked by a vertical red line. Also the World cup in Alicante is marked.

In [None]:
res = (df_matches
    .loc[(df_matches['week_date'] >= '2020-08-01' ) & (df_matches['week_date'] < '2025-03-25')]
    .groupby(['quarter_date']) #  'division_name'
    .agg(        
        n_games_api = ('match_id', "count") 
    )
    .reset_index()) # this adds the "group by" variables back as columns of res

res = fumbbl_volumes.merge(res, how = 'left', on = 'quarter_date')

(p9.ggplot(data = res, mapping = p9.aes(x = 'quarter_date', y = 'n_games'))
+ p9.geom_point(size = 3) 
+ p9.geom_point(mapping = p9.aes(y = 'n_games_api'), color = "red")
+ p9.geom_line()
+ p9.expand_limits(y=[0,2000])
+ p9.geom_vline(xintercept = '2021-09-01', color = "red")
+ p9.geom_vline(xintercept = '2023-09-07', color = "red")
+ p9.theme(figure_size = (10, 5))
+ p9.ggtitle("quartely game volume on FUMBBL august 2020 - august 2024"))

next go to divisions, go to weekly resolution. Competitive, Blackbox, "Other".
Ranked changed to Competitive. Blackbox (BBT 1-5) was off during july 2021/juli 2022.
Season 6 started in August 2022.

In [None]:
res = (df_matches
    .loc[(df_matches['week_date'] >= '2020-08-01' ) & (df_matches['week_date'] < '2025-03-25')]
    .groupby(['quarter_date', 'division_name']) #  'division_name'
    .agg(        
        n_games_api = ('match_id', "count") 
    )
    .reset_index()) # this adds the "group by" variables back as columns of res

(p9.ggplot(data = res, mapping = p9.aes(x = 'quarter_date', y = 'n_games_api', color = 'division_name'))
+ p9.geom_point(size = 3) 
+ p9.geom_line()
+ p9.expand_limits(y=[0,2000])
+ p9.geom_vline(xintercept = '2021-09-01', color = "red")
+ p9.geom_vline(xintercept = '2023-09-07', color = "black")
+ p9.theme(figure_size = (10, 5))
+ p9.ggtitle("quartely game volume on FUMBBL august 2020 - august 2024"))

# Star player usage

The blackbox trophy is a lengthy competitive division meta event. Coaches select a squad of 4 unique teams with a budget of 7 points.


In [None]:
divisions = ['Blackbox', 'Competitive', 'Ranked', 'League']

res = (df_matches
.query("division_name in @divisions")
.groupby(['division_name', 'week_date'])
.agg(
    n_games = ('match_id', 'count'),
    perc_sp = ('has_sp', 'mean')
)
.reset_index()
.sort_values("n_games", ascending=False)
)

(p9.ggplot(data = res.query("n_games > 30"), mapping = p9.aes(x = 'week_date', y = 'perc_sp*100', 
group = 'factor(division_name)', color = 'factor(division_name)'))
    + p9.geom_point(p9.aes(size = 'n_games')) 
    + p9.expand_limits(y=[0,1])
    + p9.scale_size_area()
    + p9.geom_vline(xintercept = '2021-09-01', color = "red")
    + p9.geom_vline(xintercept = '2022-08-01', color = "black") # BBT6
    + p9.geom_vline(xintercept = '2023-01-01', color = "black")
    + p9.geom_vline(xintercept = '2023-05-01', color = "black")
    + p9.geom_vline(xintercept = '2023-09-01', color = "black")
    + p9.geom_vline(xintercept = '2024-01-01', color = "black")
    + p9.geom_vline(xintercept = '2024-05-01', color = "black") # BBT10
    + p9.ggtitle("Star player usage over time, by division/league")
    + p9.theme(figure_size = (10, 6))
    + p9.ylab("% matches with at least one Star Player"))

# Star Players

In [None]:
(inducements
.query('star_player == 1')
.groupby('inducements')
.agg( n_teams = ('star_player', 'count'))
.reset_index()
.sort_values("n_teams", ascending=False)
)

# which race scores the most touchdowns and have them scored against them

In [None]:
divisions = ['Competitive']

tv_bins = ['1.1M', '1.4M', '1.7M']

res = (df_mbt[df_mbt['division_name'].isin(divisions)]
    .loc[df_mbt['tv_bin'].isin(tv_bins)]
    .query("mirror_match == 0 & has_sp == 0 & tv_bin in @tv_bins & division_name in @divisions")
    .groupby(['race_name', 'tv_bin'])
    .agg(        
        avg_td = ('team_score', "mean"),
        avg_away_td = ('away_team_score', "mean"),
        n_games = ('race_name', "count")
    )
    .sort_values( 'n_games', ascending = False)
    .reset_index()) # this adds the group by variables (now index) as a column

res = res.dropna()

(p9.ggplot(data = res.query('n_games > 100'), 
            mapping = p9.aes(y = 'reorder(race_name, avg_away_td)', x = 'avg_away_td', 
                            size = 'n_games', group = 'factor(tv_bin)', 
                            color = 'factor(tv_bin)'))
    + p9.geom_point()
    + p9.scale_size_area() 
    + p9.ggtitle("Competitive Division: average TD against per game"))

In [None]:
(p9.ggplot(data = res.query('n_games > 100'), 
            mapping = p9.aes(y = 'reorder(race_name, avg_td)', x = 'avg_td', 
                            size = 'n_games', group = 'factor(tv_bin)', 
                            color = 'factor(tv_bin)'))
    + p9.geom_point()
    + p9.scale_size_area() 
    + p9.ggtitle("Competitive Division: average TD per game"))

# star players over time

In [None]:
# top 10 star players in BB2016 league
top10 = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id') # add division to inducements
.query("star_player == 1 and division_name == 'League'") # filter 
.groupby(['inducements'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index()
.sort_values('n_games',ascending = False)
.head(10))

top10

In [None]:
# top 10 star players in BB2020
top10 = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id') # add division to inducements
.query("star_player == 1 and division_name == 'Competitive'") # filter 
.groupby(['inducements'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index()
.sort_values('n_games',ascending = False)
.head(10)['inducements'])

top10

In [None]:
res = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id')
.query("star_player == 1 and division_name == 'Competitive' and inducements in @top10")
.groupby(['inducements', 'week_date'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index())

res2 = (inducements
.merge(df_matches[['match_id', 'division_name', 'week_date']], how='left', on='match_id')
.assign(inducements = 'total')
.query("star_player == 1 and division_name == 'Competitive'")
.groupby(['inducements', 'week_date'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index())

resx = pd.concat([res, res2], axis = 0)

In [None]:
label_week = '2022-05-30'
(p9.ggplot(data = resx.query("week_date < '2025-06-01'"), mapping = p9.aes(x = 'week_date', y = 'n_games', 
group = 'factor(inducements)', color = 'factor(inducements)'))
    + p9.geom_point() 
    + p9.geom_line() 
    + p9.expand_limits(y=[0,1])
    #+ p9.scale_size_area()
    + p9.geom_vline(xintercept = '2021-09-01', color = "red")
    + p9.geom_vline(xintercept = '2022-11-01', color = "red")
    + p9.geom_vline(xintercept = '2023-04-25', color = "blue") # mega stars no longer available for hire
    + p9.geom_vline(xintercept = '2023-05-28', color = "red")
    + p9.geom_vline(xintercept = '2024-05-28', color = "red")
    + p9.ggtitle("Star player usage over time BB2020 FUMBBL")
    + p9.theme(figure_size = (10, 6))
    + p9.ylab("matches")
    #+ p9.geom_text(data = resx.query("week_date == @label_week"), mapping = p9.aes(label = 'inducements'), nudge_x = 7)
    #+ p9.guides(color = False)
)

Now we see where the big increases come from: the introduction of Akhorne in november 2011, and the introduction of Bomber Dribblesnot and Fungus the Loon in march 2022.

# BB2020 game volume by race

In [None]:
# top 26 most popular races in FUMBBL BB2020
top10 = (df_mbt
.query("division_name == 'Competitive'") # filter 
.groupby(['race_name'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index()
.sort_values('n_games',ascending = False)
.head(30)['race_name'])



divisions = [ 'Competitive']

res = (df_mbt
.query("division_name in @divisions and race_name in @top10")
.groupby(['division_name', 'race_name', 'week_date'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index()
.sort_values("n_games", ascending=False)
)

res2 = (df_mbt
.assign(race_name = 'total')
.query("division_name in @divisions")
.groupby(['division_name', 'race_name', 'week_date'])
.agg(
    n_games = ('match_id', 'count')
)
.reset_index()
.sort_values("n_games", ascending=False)
)

resx = pd.concat([res, res2], axis = 0)

(p9.ggplot(data = resx.query("week_date < '2024-06-01'"), mapping = p9.aes(x = 'week_date', y = 'n_games', 
group = 'factor(race_name)', color = 'factor(race_name)'))
    + p9.geom_point() 
    + p9.geom_line() 
    + p9.expand_limits(y=[0,1])
    + p9.facet_wrap('race_name', scales = 'free_y', ncol = 3)
    + p9.scale_x_datetime(labels = date_format('%b'))   
    + p9.geom_vline(xintercept = '2021-09-01', color = "red")
    + p9.ggtitle("BB2020 Matches by race")
    + p9.theme(figure_size = (10, 12))
    + p9.ylab("Number of matches") 
    + p9.guides(color = False)
    + p9.theme(subplots_adjust={'wspace': 0.25}))
    

# inducements for a given race, as a function of CTV difference

In [None]:
df_mbt.info()

In [None]:


res = df_mbt.query("tv_diff < 40 & race_name == 'Gnome' & year > 2023")

res

In [None]:
inducements.query("match_id == 4562639")

In [None]:
df_mbt.query("match_id == 4562639")

In [None]:
PM FIX TV diff