## Importing The Data

In [1]:
import pandas as pd
import random

In [12]:
betting_data = pd.read_csv("../data/anonymous_betting_data.csv")

In [13]:
print("There are {0:,} rows in the base dataset"\
      .format(len(betting_data.index)))

There are 129,271 rows in the base dataset


## Match Selection

Remove duplicate matches and remove "cancelled" matches.

In [18]:
all_matches = betting_data[~betting_data["is_cancelled_or_walkover"]]\
    .drop_duplicates(subset="match_uid")\
    .copy()

In [19]:
print("There are {0:,} distinct matches of interest the range from {1:.0f} to {2:.0f}"\
      .format(len(all_matches.index), all_matches["year"].min(), all_matches["year"].max()))

There are 26,006 distinct matches of interest the range from 2009 to 2015


## Player Statistics

In this section we calculate various player statistics.

In [154]:
winners = all_matches\
    .drop_duplicates(subset="winner")\
    .ix[:,"winner"].tolist()
    
losers = all_matches\
    .drop_duplicates(subset="loser")\
    .ix[:,"loser"].tolist()
    
players = winners + list(set(losers) - set(winners))
    
print("There is a total of {0:,} players".format(len(players)))

There is a total of 1,523 players


In [162]:
class Player(object):
    def __init__(self, player_name):
        self.name = player_name
        self.matches = self.get_matches()
        self.wins = len(self.matches[self.matches["winner"] == self.name])
        self.losses = len(self.matches[self.matches["loser"] == self.name])
        self.win_loss_by_year = self.get_win_loss_by_year()
        self.win_loss_ratio = 0 if self.losses <= 0 else float(self.wins) / float(self.losses)

    def get_matches(self):
        player_matches = all_matches.loc[
            (all_matches["winner"] == self.name) |
            (all_matches["loser"] == self.name)]\
            .copy()
        return player_matches
    
    def get_win_loss_by_year(self):
        wins_by_year = self.matches.groupby("year")\
            .apply(lambda x: x[x["winner"] == self.name]["winner"].count())
        losses_by_year = self.matches.groupby("year")\
            .apply(lambda x: x[x["loser"] == self.name]["loser"].count())
        win_loss_by_year = pd.concat([wins_by_year, losses_by_year], axis=1, keys=["wins", "losses"])
        return win_loss_by_year
    
    def __str__(self):
        return "Player {0} has an overall win/loss ratio of {1}/{2} ({3:.2f}) \n{4}"\
             .format(self.name, self.wins, self.losses, self.win_loss_ratio, self.win_loss_by_year)

def print_player(player_name):
    player = Player(player_name)
    if player.wins > 40:
        print player

Import suspicious players

In [163]:
report_players = [
     'f5cecec5a7714e86cf761e7cda278f144d82eac78d15c7f67aecf9ba186e7830',
     'e39d12f03f441a3e8eb207fb12eced70fdf2c06cbaf27e123d457d1780447baf',
     'fa4319726a465ed7c72f125332082b1e1afdef2d8164c4dfff237d78aed2e39e',
     '0ffe23c8b80916f6b2c23a52e08018374d68d12f49b261ccb36fecd52927cc0a',
     'b5c0e84eda074671d6a3d7edf59e65242d080e26d35fa158b11f74c9891355e4',
     '11411268e0ea9e1527a49193485d117e35b0645a17f4b0b40da262300e8d4430',
     '02a755e7afd8581feadcfd369d8a62fc7fec476ce4e0c55de5fc03c0da0f3c81',
     '47f8d9fb7d7156217c15e7aea9127cf8a7ffcabdd3e97fc16c533dc807430308',
     '2ed14b47b1c58532b757d76404dcf1a114b712e50193f0b0a5a05f52e3067134',
     '6840fadf79442f1fa10569f210305a669242159fd31abc2eaa94d158a7e3b301',
     '91066973c924f6a41cef067cb3ebdb8f6d6c6a0cdd85933bb84965c25d377c18',
     'd489880f3981ace1f6c03616fe169a0b5e513ccd5da3547ce971dde26b3bde43',
     '30b4b70b6ed9adb822559be9d7f74747e73af99a33c0649d87dd21cadedb9681',
     '5b94678362f659bd7058eba695e963a2039567f3830d502665808303c27771c4',
     'c06ec5c640acfd2a94350a468185475f73e1d614f497540cf4e05f2a905a8fac',
     '7a46553d6c2a135edb7d6a4e3408be7eb5f41953f442fb108a7b6e587ecee038',
     'dd83d749567ad7c7f4e89656b08d4791acefd60724cc848697903d2aa13731c7',
     'aa2bd77955c425c8da69a09584beaccf24a2dc15b903beecc7e9069d4c520c21',
     '55c14ebb1ec4efa5c6e3dd272c747896d2647c883ca6861ebc6f83d382075c69',
     '694668c73710b80adb51764ae06a1413fb93e7d10e0d329a63c83a14b77c3fd2',
     'dcb744cbd79602f5ad05227acabb3be17729b2b5bda60595f5b62c0f0145843f',
     '51c4b3f11032d72af378075926b7ed628360fd3ec605a9298a00e076ef797f4a',
     'd5e122c7e9bd24d1295d3bbcf29455c21676e09ff8f69255dd387c0240544d20',
     '614c2049880f015352fb695961ec2763194439ce9fbb11ece98e2264eb1942df',
     '061a49265f4f3b6970b8943181aa93431bbfcc6cc96f5a6b23590c2785fddc5a',
     '73f6d26367e4793ebd7dfe1e1ef17cb64455e41c9e30cc78fb7ef7277268b546',
     'cd4a092bde2eba04a8adcb2f241c638b560ee56b9c537f78bd4808937f1b73e2',
     'c9d4889baca9908d2ca2f8515d02f164fcd84642bee5e73cbf3544b26a8315a6'
]

map(print_player, report_players)


Player 0ffe23c8b80916f6b2c23a52e08018374d68d12f49b261ccb36fecd52927cc0a has an overall win/loss ratio of 71/70 (1.01) 
      wins  losses
year              
2009    10      10
2010    28      24
2011    28      24
2012     5      12
Player b5c0e84eda074671d6a3d7edf59e65242d080e26d35fa158b11f74c9891355e4 has an overall win/loss ratio of 158/117 (1.35) 
      wins  losses
year              
2009    54      17
2010    28      20
2011    25      25
2012    23      23
2013    22      22
2014     6      10
Player 2ed14b47b1c58532b757d76404dcf1a114b712e50193f0b0a5a05f52e3067134 has an overall win/loss ratio of 252/133 (1.89) 
      wins  losses
year              
2009    21      15
2010    38      22
2011    34      19
2012    45      23
2013    49      23
2014    31      19
2015    34      12
Player 6840fadf79442f1fa10569f210305a669242159fd31abc2eaa94d158a7e3b301 has an overall win/loss ratio of 42/63 (0.67) 
      wins  losses
year              
2009     9      12
2010     3      11
2011   

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]