In [None]:
import csv
import pandas as pd
from collections import defaultdict
from prettytable import PrettyTable
import trueskill
import altair as alt
from getpass import getpass
from apiclient.discovery import build

### Read in CSV Data

In [2]:
with open('Game Grumps VS data.csv') as csv_file:
    csv_data = [line for line in csv.reader(csv_file)]
    
    csv_df = pd.DataFrame(csv_data[1:], columns=csv_data[0])

In [3]:
vs_df = csv_df.copy()
vs_df['Loser(s)'] = vs_df['Loser(s)'].map(lambda s: [loser.strip() for loser in s.split(';')])
vs_df['Competitors'] = (vs_df['Loser(s)'] + vs_df['Winner'].map(lambda s: [s])).map(lambda l: sorted(l))

#### Add Youtube Data

In [4]:
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'
api_key = getpass('Youtube API Key')
youtube_client = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=api_key)

Youtube API Key ·······································


In [5]:
# Retrieve almost all relevant data on a given video id
# youtube_client.videos().list(part='snippet,contentDetails,statistics', id='_MIMm3OCZXA').execute()
vs_df['Publish Date'] = vs_df['Episode-Id'].map(
    lambda episode_id: youtube_client.videos().list(part='snippet', id=episode_id).execute()['items'][0]['snippet']['publishedAt']
)
vs_df = vs_df.sort_values(by='Publish Date').reset_index(drop=True)

#### Add Match Numbers

In [6]:
vs_df['Overall Match Number'] = vs_df.index + 1

In [7]:
vs_df = vs_df.join(
    vs_df.groupby(['Game', 'Episode-Id']).first()['Overall Match Number'].rename('First Game Match'),
    on=['Game', 'Episode-Id']
)
vs_df['Match Number'] = vs_df['Overall Match Number'] - vs_df['First Game Match'] + 1
vs_df = vs_df.drop(labels='First Game Match', axis=1)

In [8]:
competitor_df = vs_df.explode('Competitors').rename({'Competitors':'Competitor'}, axis=1)

In [9]:
vs_df

Unnamed: 0,Winner,Loser(s),Game,Episode-Id,Notes,Competitors,Publish Date,Overall Match Number,Match Number
0,Jon,[Arin],Super Smash Bros. Brawl,uiofcRg6XEo,Arin is red,"[Arin, Jon]",2012-08-23T01:44:05Z,1,1
1,Arin,[Jon],Super Smash Bros. Brawl,uiofcRg6XEo,,"[Arin, Jon]",2012-08-23T01:44:05Z,2,2
2,Jon,[Arin],Super Smash Bros. Brawl,uiofcRg6XEo,,"[Arin, Jon]",2012-08-23T01:44:05Z,3,3
3,Arin,[Jon],Super Smash Bros. Brawl,uiofcRg6XEo,,"[Arin, Jon]",2012-08-23T01:44:05Z,4,4
4,Arin,[Jon],Super Smash Bros. Brawl,uiofcRg6XEo,,"[Arin, Jon]",2012-08-23T01:44:05Z,5,5
...,...,...,...,...,...,...,...,...,...
152,Tie (Arin; Dan),[],Mario Golf: Toadstool Tour,wRPKjKG4KKo,,"[, Tie (Arin; Dan)]",2015-06-01T21:00:01Z,153,1
153,Danny,[Arin],Millionaire,cFurtZ7Gvb4,,"[Arin, Danny]",2016-02-05T22:00:00Z,154,1
154,Arin,"[Danny, Jacob Anderson]",Wheel of Fortune,90lejF5TzGM,,"[Arin, Danny, Jacob Anderson]",2016-04-25T17:00:00Z,155,1
155,Arin,"[Danny, Finn Wolfhard]",Wheel of Fortune,fg_QS2yAsfI,,"[Arin, Danny, Finn Wolfhard]",2017-01-15T22:00:01Z,156,1


In [10]:
competitor_df

Unnamed: 0,Winner,Loser(s),Game,Episode-Id,Notes,Competitor,Publish Date,Overall Match Number,Match Number
0,Jon,[Arin],Super Smash Bros. Brawl,uiofcRg6XEo,Arin is red,Arin,2012-08-23T01:44:05Z,1,1
0,Jon,[Arin],Super Smash Bros. Brawl,uiofcRg6XEo,Arin is red,Jon,2012-08-23T01:44:05Z,1,1
1,Arin,[Jon],Super Smash Bros. Brawl,uiofcRg6XEo,,Arin,2012-08-23T01:44:05Z,2,2
1,Arin,[Jon],Super Smash Bros. Brawl,uiofcRg6XEo,,Jon,2012-08-23T01:44:05Z,2,2
2,Jon,[Arin],Super Smash Bros. Brawl,uiofcRg6XEo,,Arin,2012-08-23T01:44:05Z,3,3
...,...,...,...,...,...,...,...,...,...
155,Arin,"[Danny, Finn Wolfhard]",Wheel of Fortune,fg_QS2yAsfI,,Arin,2017-01-15T22:00:01Z,156,1
155,Arin,"[Danny, Finn Wolfhard]",Wheel of Fortune,fg_QS2yAsfI,,Danny,2017-01-15T22:00:01Z,156,1
155,Arin,"[Danny, Finn Wolfhard]",Wheel of Fortune,fg_QS2yAsfI,,Finn Wolfhard,2017-01-15T22:00:01Z,156,1
156,Arin,[Danny],Mario Golf: Toadstool Tour,1JHX8B8m5p8,,Arin,2019-02-12T18:00:00Z,157,1


### Overview Stats

In [11]:
all_competitors = list(
    filter(
        lambda item: item and '(' not in item,
        vs_df['Winner'].append(vs_df.explode('Loser(s)')['Loser(s)']).unique()
    )
)
print(all_competitors)

['Jon', 'Arin', 'Barry', 'Danny', 'AI', 'Ross', 'Susy', 'Jacob Anderson', 'Finn Wolfhard']


In [12]:
competitor_data = {competitor: {'wins':0, 'games':0, 'won_vs':defaultdict(lambda: 0)} for competitor in all_competitors}
for i, match in vs_df.iterrows():
    winner = match['Winner']
    # TODO: add in ignoring AI in favor of top place human
    if 'AI' in winner:
        winner = 'AI'
    # TODO: Don't skip ties
    elif 'Tie' in winner:
        continue
    competitor_data[winner]['wins']+=1
    competitor_data[winner]['games']+=1
    for loser in match['Loser(s)']:
        competitor_data[loser]['games']+=1
        competitor_data[winner]['won_vs'][loser]+=1

In [13]:
table = PrettyTable(['Competitor','Wins','Games Played', 'Win Rate'])
for competitor, data in competitor_data.items():
    win_rate = round(data['wins'] / data['games'] * 100, 2)
    table.add_row([competitor, data['wins'], data['games'], win_rate])
print(table)

+----------------+------+--------------+----------+
|   Competitor   | Wins | Games Played | Win Rate |
+----------------+------+--------------+----------+
|      Jon       |  70  |     146      |  47.95   |
|      Arin      |  69  |     156      |  44.23   |
|     Barry      |  1   |      1       |  100.0   |
|     Danny      |  3   |      9       |  33.33   |
|       AI       |  13  |      26      |   50.0   |
|      Ross      |  0   |      1       |   0.0    |
|      Susy      |  0   |      1       |   0.0    |
| Jacob Anderson |  0   |      1       |   0.0    |
| Finn Wolfhard  |  0   |      1       |   0.0    |
+----------------+------+--------------+----------+


In [14]:
table = PrettyTable(['vv Winner \ Loser ->'] + sorted(competitor_data))
for competitor in sorted(competitor_data):
    row = []
    for competitor_2 in sorted(competitor_data):
        if competitor == competitor_2:
            row.append('X')
        else:
            row.append(competitor_data[competitor]['won_vs'][competitor_2])
    table.add_row([competitor] + row)
print(table)

+----------------------+----+------+-------+-------+---------------+----------------+-----+------+------+
| vv Winner \ Loser -> | AI | Arin | Barry | Danny | Finn Wolfhard | Jacob Anderson | Jon | Ross | Susy |
+----------------------+----+------+-------+-------+---------------+----------------+-----+------+------+
|          AI          | X  |  13  |   0   |   1   |       0       |       0        |  12 |  0   |  0   |
|         Arin         | 5  |  X   |   0   |   5   |       1       |       1        |  64 |  0   |  0   |
|        Barry         | 0  |  1   |   X   |   0   |       0       |       0        |  0  |  1   |  1   |
|        Danny         | 0  |  3   |   0   |   X   |       0       |       0        |  0  |  0   |  0   |
|    Finn Wolfhard     | 0  |  0   |   0   |   0   |       X       |       0        |  0  |  0   |  0   |
|    Jacob Anderson    | 0  |  0   |   0   |   0   |       0       |       X        |  0  |  0   |  0   |
|         Jon          | 8  |  70  |   0   |  

### TrueSkill Analysis

In [15]:
competitor_rankings = defaultdict(trueskill.Rating)
competitor_ranking_over_time = []#pd.DataFrame(columns=['Overall Match Number', 'Competitor', 'Mu', 'Sigma'])

for _, match in vs_df.iterrows():
    if 'AI' in match['Winner']:
        # TODO: implement AI matches with ranks AI: 0, Top Player: 1, All other players: 2
        pass
    elif 'Tie' in match['Winner']:
        # TODO: implement winner ties with ranks Winners: 0, All other players: 2
        pass
    else:
        # Regular Game Grumps VS case
        # Due to how I collected data, we rank the winner as rank 0 and all other competitors as rank 1, teams are all 1-player
        # This also allows us to ignore cases where multiple AI have different ranks in the same match
        competitors = [[competitor_rankings[match['Winner']]]]
        competitors += [[competitor_rankings[competitor]] for competitor in match['Loser(s)']]
        rankings = [0] + [1 for competitor in match['Loser(s)']]
        match_result = trueskill.rate(competitors, rankings)
        # We pass the winner in first, and these are one-player teams
        winner_ranking = match_result[0][0]
        competitor_rankings[match['Winner']] = winner_ranking
        competitor_ranking_over_time.append({
            'Overall Match Number': match['Overall Match Number'],
            'Competitor': match['Winner'],
            'Mu': winner_ranking.mu,
            'Sigma': winner_ranking.sigma
        })
        for i, ranking in enumerate(match_result[1:]):
            loser = match['Loser(s)'][i]
            # Other rankings and 'Loser(s)' share order
            competitor_rankings[loser] = ranking[0]
            competitor_ranking_over_time.append({
            'Overall Match Number': match['Overall Match Number'],
            'Competitor': loser,
            'Mu': ranking[0].mu,
            'Sigma': ranking[0].sigma
        })
        
print(dict(competitor_rankings))
competitor_ranking_over_time_df = pd.DataFrame(competitor_ranking_over_time)
        
            

{'Jon': trueskill.Rating(mu=25.106, sigma=0.815), 'Arin': trueskill.Rating(mu=25.345, sigma=0.809), 'AI': trueskill.Rating(mu=22.626, sigma=1.512), 'Barry': trueskill.Rating(mu=30.913, sigma=6.147), 'Ross': trueskill.Rating(mu=24.181, sigma=4.628), 'Susy': trueskill.Rating(mu=24.183, sigma=4.638), 'Danny': trueskill.Rating(mu=23.336, sigma=2.460), 'Jacob Anderson': trueskill.Rating(mu=23.183, sigma=4.725), 'Finn Wolfhard': trueskill.Rating(mu=22.804, sigma=4.715)}


In [16]:
alt.Chart(competitor_ranking_over_time_df, title='Skill Level over Time').mark_line().encode(
    x='Overall Match Number',
    y=alt.Y('Mu', axis=alt.Axis(title='Skill Level')),
    color='Competitor'
)