In [1]:
import pickle
import sqlite3
from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline

# Load

In [2]:
conn = sqlite3.connect('../loteca/data/raw/betexplorer/db.sqlite3')
betexp = pd.read_sql_query('SELECT id, date, teamH, teamA, score, scoremod FROM matches', conn)
conn.close()

betexp.date = pd.to_datetime(betexp.date, dayfirst=True)
betexp.score = betexp.score.str.strip()
betexp['goalsH'] = [int(score.split(':')[0]) if score else np.nan for score in betexp.score]
betexp['goalsA'] = [int(score.split(':')[1]) if score else np.nan for score in betexp.score]

betexp.head()

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
0,GCjoqXpn,2018-01-22,Israel W,Finland W,0:0,,0.0,0.0
1,xMispDVu,2017-11-28,Spain W,Austria W,4:0,,4.0,0.0
2,xnyYIzXt,2017-11-28,Portugal W,Italy W,0:1,,0.0,1.0
3,pjD4z32E,2017-11-28,Romania W,Moldova W,3:1,,3.0,1.0
4,nZiE6CQJ,2017-11-28,Netherlands W,Ireland W,0:0,,0.0,0.0


In [3]:
loteca = pd.read_pickle('../data/pre/lotecas_matches.pkl')
loteca = loteca[loteca.roundno >= 366]  # exclude old rounds
loteca.head()

Unnamed: 0,roundno,gameno,date,teamH,goalsH,teamA,goalsA,happened
5110,366,1,2009-06-07,CRUZEIRO/MG,1,INTERNACIONAL/RS,1,True
5111,366,2,2009-06-07,ATLÉTICO/PR,0,ATLÉTICO/MG,4,True
5112,366,3,2009-06-07,AVAÍ/SC,0,SÃO PAULO/SP,0,True
5113,366,4,2009-06-06,AMÉRICA/RN,2,BRASILIENSE/DF,1,True
5114,366,5,2009-06-06,PONTE PRETA/SP,5,PORTUGUESA DESPORTOS/SP,2,True


In [4]:
with open('../data/interim/teams_ltb.pkl', mode='rb') as fp:
    teamsd = pickle.load(fp)
    
list(teamsd.items())[:5]

[('A B C/RN', {'ABC'}),
 ('ABAETÉ/PA', set()),
 ('ABC/RN', {'ABC'}),
 ('ALBÂNIA', {'Albania'}),
 ('ALECRIM SUB 20/RN', {'Alecrim U20'})]

# Algorithm

In [5]:
loteca_teams = set(loteca.teamH) | set(loteca.teamA)
betexp_teams = set()
for team in loteca_teams:
    if teamsd[team]:
        betexp_teams.add(next(iter(teamsd[team])))
        
betexp_teams = sorted(betexp_teams)
betexp_teams[:10]

['ABC',
 'AS Roma',
 'ASA',
 'Albania',
 'Alecrim',
 'Alecrim U20',
 'Algeria',
 'America MG',
 'America MG U20',
 'America PE']

In [6]:
dates = set()
for date in loteca.date:
    dates.add(date)

dates = sorted(dates)
dates[:10]

[NaT,
 Timestamp('2009-06-06 00:00:00'),
 Timestamp('2009-06-07 00:00:00'),
 Timestamp('2009-06-13 00:00:00'),
 Timestamp('2009-06-14 00:00:00'),
 Timestamp('2009-06-20 00:00:00'),
 Timestamp('2009-06-21 00:00:00'),
 Timestamp('2009-06-27 00:00:00'),
 Timestamp('2009-06-28 00:00:00'),
 Timestamp('2009-07-04 00:00:00')]

In [7]:
betexp.shape

(148101, 8)

In [8]:
betexp_min = betexp[(betexp.teamH.isin(betexp_teams)) | (betexp.teamA.isin(betexp_teams))]
betexp_min = betexp_min[betexp_min.date.isin(dates)]
betexp_min.shape

(16379, 8)

In [9]:
def find_betexplorer_equivalent(loteca_match):    
    row = loteca_match     
    
    # date
    choices = betexp_min    
    choices = choices[choices.date == row.date]
    
    # score
    choices = choices[choices.goalsH == row.goalsH]
    choices = choices[choices.goalsA == row.goalsA]
    
    # teams
    teamsH = teamsd[row.teamH]
    teamsA = teamsd[row.teamA]    
    if teamsH:
        choices = choices[choices.teamH.isin(teamsH)]
    if teamsA:
        choices = choices[choices.teamA.isin(teamsA)]
        
    # return
    if teamsH or teamsA:
        if choices.shape[0] == 0:
            return None
        if choices.shape[0] == 1:
            if add_team(row.teamH, choices.iloc[0].teamH) or add_team(row.teamA, choices.iloc[0].teamA):
                display(row)
                display(choices)
#             add_team(row.teamH, choices.iloc[0].teamH)
#             add_team(row.teamA, choices.iloc[0].teamA)
            return choices.index[0]
        else:
            raise ValueError("Found %s BetExplorer matches for Loteca #%s" % \
                             (choices.shape[0], row.name))
            
def add_team(loteca_string, betexp_string):
    if not teamsd[loteca_string]:
        print("{: >30} > {}".format(loteca_string, betexp_string))
        teamsd[loteca_string].add(betexp_string)
        return True
    else:
        return False
    

dict = OrderedDict()
            
first = True
newteamcnt = False
roundno = 1
while first or newteamcnt:    
    print("Round number %s" % roundno)
    teamcnt = len([1 for v in teamsd.values() if v])
    
    for id, row in loteca.iterrows():
        if id in dict.keys(): continue
        if not row.happened: continue  # deal with these cases later
        betexp_id = find_betexplorer_equivalent(row)
        if betexp_id:
            dict[id] = betexp_id
            
    first = False
    newteamcnt = len([1 for v in teamsd.values() if v]) - teamcnt
    roundno += 1
    
    print("New teams found: %s" % newteamcnt)
    print("Matches found till now: %s" % len(dict))
    
all([len(v) in (0, 1) for v in teamsd.values()])  # check teamsd dictionary integrity

Round number 1
                      SPORT/PE > Sport Recife


roundno                     366
gameno                       13
date        2009-06-07 00:00:00
teamH                  SPORT/PE
goalsH                        4
teamA               FLAMENGO/RJ
goalsA                        2
happened                   True
Name: 5122, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
22747,Io8dfmf7,2009-06-07,Sport Recife,Flamengo RJ,4:2,,4.0,2.0


              VASCO DA GAMA/RJ > Vasco


roundno                     367
gameno                        4
date        2009-06-13 00:00:00
teamH                GUARANI/SP
goalsH                        0
teamA          VASCO DA GAMA/RJ
goalsA                        0
happened                   True
Name: 5127, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
23115,fqVtfr0m,2009-06-13,Guarani,Vasco,0:0,,0.0,0.0


                  VILA NOVA/GO > Vila Nova FC


roundno                     367
gameno                        7
date        2009-06-13 00:00:00
teamH                  A B C/RN
goalsH                        1
teamA              VILA NOVA/GO
goalsA                        0
happened                   True
Name: 5130, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
23111,pAu70pNJ,2009-06-13,ABC,Vila Nova FC,1:0,,1.0,0.0


               SÃO  CAETANO/SP > Sao Caetano


roundno                     372
gameno                        8
date        2009-07-18 00:00:00
teamH           SÃO  CAETANO/SP
goalsH                        4
teamA        DUQUE DE CAXIAS/RJ
goalsA                        0
happened                   True
Name: 5201, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
23058,2P0ky8GD,2009-07-18,Sao Caetano,Duque de Caxias,4:0,,4.0,0.0


                 BRAGANTINO/SP > Bragantino


roundno                     372
gameno                        9
date        2009-07-18 00:00:00
teamH             BRAGANTINO/SP
goalsH                        1
teamA            PONTE PRETA/SP
goalsA                        1
happened                   True
Name: 5202, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
23053,tACtwUo1,2009-07-18,Bragantino,Ponte Preta,1:1,,1.0,1.0


       PORTUGUESA DESPORTOS/SP > Portuguesa


roundno                         373
gameno                            6
date            2009-07-25 00:00:00
teamH       PORTUGUESA DESPORTOS/SP
goalsH                            2
teamA                    AMÉRICA/RN
goalsA                            1
happened                       True
Name: 5213, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
23046,6VEhgT0f,2009-07-25,Portuguesa,America RN,2:1,,2.0,1.0


                   IPATINGA/MG > Betim


roundno                     390
gameno                        6
date        2009-11-21 00:00:00
teamH                AMÉRICA/RN
goalsH                        1
teamA               IPATINGA/MG
goalsA                        0
happened                   True
Name: 5451, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
22800,zuc6o4ea,2009-11-21,America RN,Betim,1:0,,1.0,0.0


                  AMERICANO/RJ > Americano FC


roundno                     396
gameno                       12
date        2010-01-17 00:00:00
teamH              AMERICANO/RJ
goalsH                        0
teamA             FLUMINENSE/RJ
goalsA                        3
happened                   True
Name: 5541, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
18846,U7F6hYE0,2010-01-17,Americano FC,Fluminense,0:3,,0.0,3.0


            GRÊMIO PRUDENTE/SP > Barueri


roundno                     409
gameno                       13
date        2010-04-18 00:00:00
teamH            SANTO ANDRÉ/SP
goalsH                        1
teamA        GRÊMIO PRUDENTE/SP
goalsA                        2
happened                   True
Name: 5724, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
18934,d4IyAGi2,2010-04-18,Santo Andre,Barueri,1:2,,1.0,2.0


                      VASCO/RJ > Vasco


roundno                     423
gameno                        9
date        2010-07-17 00:00:00
teamH                  VASCO/RJ
goalsH                        3
teamA               ATLÉTICO/PR
goalsA                        1
happened                   True
Name: 5916, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
18332,EDMuy35J,2010-07-17,Vasco,Atletico-PR,3:1,,3.0,1.0


             GRÊMIO BARUERI/SP > Barueri


roundno                         466
gameno                            8
date            2011-06-11 00:00:00
teamH             GRÊMIO BARUERI/SP
goalsH                            1
teamA       PORTUGUESA DESPORTOS/SP
goalsA                            1
happened                       True
Name: 6517, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
32436,4rHNNkEH,2011-06-11,Barueri,Portuguesa,1:1,,1.0,1.0


              XV PIRACICABA/SP > Piracicaba


roundno                     500
gameno                        8
date        2012-03-04 00:00:00
teamH          XV PIRACICABA/SP
goalsH                        0
teamA              SÃO PAULO/SP
goalsA                        1
happened                   True
Name: 6993, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
41098,xIERnwlJ,2012-03-04,Piracicaba,Sao Paulo,0:1,,0.0,1.0


                BOA ESPORTE/MG > Boa


roundno                     513
gameno                       12
date        2012-06-02 00:00:00
teamH                    ASA/AL
goalsH                        3
teamA            BOA ESPORTE/MG
goalsA                        2
happened                   True
Name: 7179, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
40848,21MWd2UF,2012-06-02,ASA,Boa,3:2,,3.0,2.0


                      PORTO/PE > CA Porto


roundno                     544
gameno                        4
date        2013-02-02 00:00:00
teamH                  PORTO/PE
goalsH                        0
teamA                NÁUTICO/PE
goalsA                        3
happened                   True
Name: 7605, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
50521,vmkJlBZ1,2013-02-02,CA Porto,Nautico,0:3,,0.0,3.0


                     CAXIAS/RS > SER Caxias


roundno                     544
gameno                        7
date        2013-02-03 00:00:00
teamH              JUVENTUDE/RS
goalsH                        1
teamA                 CAXIAS/RS
goalsA                        1
happened                   True
Name: 7608, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
49804,faVv12j6,2013-02-03,Juventude,SER Caxias,1:1,,1.0,1.0


                   SOROCABA/SP > Atl. Sorocaba


roundno                     546
gameno                        7
date        2013-02-17 00:00:00
teamH               SOROCABA/SP
goalsH                        2
teamA                LINENSE/SP
goalsA                        3
happened                   True
Name: 7636, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
50318,OG6pnQq7,2013-02-17,Atl. Sorocaba,Linense,2:3,,2.0,3.0


                 FLUMINENSE/BA > Fluminense de Feira


roundno                     547
gameno                       13
date        2013-02-24 00:00:00
teamH         BAHIA DE FEIRA/BA
goalsH                        2
teamA             FLUMINENSE/BA
goalsA                        0
happened                   True
Name: 7656, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
49381,hCfE4ata,2013-02-24,Bahia De Feira,Fluminense de Feira,2:0,,2.0,0.0


                   ATLÉTICO/BA > Alagoinhas


roundno                     549
gameno                       11
date        2013-03-10 00:00:00
teamH         BAHIA DE FEIRA/BA
goalsH                        2
teamA               ATLÉTICO/BA
goalsA                        1
happened                   True
Name: 7682, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
49370,hQZnJbeh,2013-03-10,Bahia De Feira,Alagoinhas,2:1,,2.0,1.0


                    CENTRAL/PE > Central SC


roundno                     569
gameno                        7
date        2013-08-04 00:00:00
teamH                CENTRAL/PE
goalsH                        1
teamA                GUARANY/CE
goalsA                        0
happened                   True
Name: 7958, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
49224,G8411H4r,2013-08-04,Central SC,Guarany,1:0,,1.0,0.0


          BOA ESPORTE CLUBE/MG > Boa


roundno                      584
gameno                         6
date         2013-11-16 00:00:00
teamH               PALMEIRAS/SP
goalsH                         3
teamA       BOA ESPORTE CLUBE/MG
goalsA                         0
happened                    True
Name: 8167, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
48568,6eYYBinE,2013-11-16,Palmeiras,Boa,3:0,,3.0,0.0


                    GUARANI/CE > Guarani de Juazeiro


roundno                     593
gameno                        8
date        2014-02-09 00:00:00
teamH              ITAPIPOCA/CE
goalsH                        0
teamA                GUARANI/CE
goalsA                        1
happened                   True
Name: 8295, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
59500,IsIFyJKs,2014-02-09,Itapipoca,Guarani de Juazeiro,0:1,,0.0,1.0


                   JUVENTUS/SC > Gremio Juventus


roundno                     595
gameno                        9
date        2014-02-23 00:00:00
teamH               JUVENTUS/SC
goalsH                        1
teamA               CRICIÚMA/SC
goalsA                        1
happened                   True
Name: 8324, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
59439,SdVDI7YJ,2014-02-23,Gremio Juventus,Criciuma,1:1,,1.0,1.0


                   SÃO JOSÉ/RS > EC Sao Jose


roundno                     597
gameno                        5
date        2014-03-09 00:00:00
teamH            VERANÓPOLIS/RS
goalsH                        1
teamA               SÃO JOSÉ/RS
goalsA                        1
happened                   True
Name: 8348, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
59613,67n8C93d,2014-03-09,Veranopolis,EC Sao Jose,1:1,,1.0,1.0


                    GUARANI/MG > Guarani EC


roundno                     597
gameno                       12
date        2014-03-09 00:00:00
teamH                GUARANI/MG
goalsH                        0
teamA               ATLÉTICO/MG
goalsA                        1
happened                   True
Name: 8355, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
59805,ATfX6I5Q,2014-03-09,Guarani EC,Atletico-MG,0:1,,0.0,1.0


                      ÁGUIA/PA > Aguia De Maraba


roundno                     615
gameno                        5
date        2014-08-03 00:00:00
teamH               BOTAFOGO/PB
goalsH                        2
teamA                  ÁGUIA/PA
goalsA                        0
happened                   True
Name: 8600, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
58903,GS8PE64J,2014-08-03,Botafogo PB,Aguia De Maraba,2:0,,2.0,0.0


                 MOTO CLUBE/MA > Moto Club


roundno                     619
gameno                        4
date        2014-08-31 00:00:00
teamH             MOTO CLUBE/MA
goalsH                        2
teamA                  RIVER/PI
goalsA                        2
happened                   True
Name: 8655, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
59075,QD8QgQuK,2014-08-31,Moto Club,River-PI,2:2,,2.0,2.0


             SÃO JOSÉ (PA) /RS > EC Sao Jose


roundno                     640
gameno                       10
date        2015-02-15 00:00:00
teamH         SÃO JOSÉ (PA) /RS
goalsH                        0
teamA              JUVENTUDE/RS
goalsA                        1
happened                   True
Name: 8955, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
68696,j7CeYnCO,2015-02-15,EC Sao Jose,Juventude,0:1,,0.0,1.0


                     BRASIL/RS > Brasil de Pelotas


roundno                     640
gameno                       11
date        2015-02-14 00:00:00
teamH          NOVO HAMBURGO/RS
goalsH                        0
teamA                 BRASIL/RS
goalsA                        0
happened                   True
Name: 8956, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
68700,CGDiZSdI,2015-02-14,Novo Hamburgo,Brasil de Pelotas,0:0,,0.0,0.0


                 RIO BRANCO/AC > Rio Branco


roundno                     641
gameno                        9
date        2015-02-22 00:00:00
teamH             RIO BRANCO/AC
goalsH                        0
teamA                   REMO/PA
goalsA                        2
happened                   True
Name: 8968, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
69589,063ecp89,2015-02-22,Rio Branco,Remo,0:2,,0.0,2.0


                   PALMEIRA/RN > Palmeira de Una


roundno                     685
gameno                        9
date        2016-01-23 00:00:00
teamH                    ABC/RN
goalsH                        2
teamA               PALMEIRA/RN
goalsA                        1
happened                   True
Name: 9584, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
74487,xWzqPb4c,2016-01-23,ABC,Palmeira de Una,2:1,,2.0,1.0


                BOCA JÚNIOR/SE > Boca Junior


roundno                     685
gameno                       11
date        2016-01-24 00:00:00
teamH              ITABAIANA/SE
goalsH                        0
teamA            BOCA JÚNIOR/SE
goalsA                        0
happened                   True
Name: 9586, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
74547,niZl3m7c,2016-01-24,Itabaiana,Boca Junior,0:0,,0.0,0.0


                    VITORIA/PE > Academica Vitoria


roundno                     685
gameno                       12
date        2016-01-24 00:00:00
teamH                VITORIA/PE
goalsH                        0
teamA              PESQUEIRA/PE
goalsA                        1
happened                   True
Name: 9587, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
74351,tz2jMaah,2016-01-24,Academica Vitoria,Pesqueira,0:1,,0.0,1.0


                    GUARANI/SC > Guarani de Palhoca


roundno                     687
gameno                        7
date        2016-02-06 00:00:00
teamH            CHAPECOENSE/SC
goalsH                        1
teamA                GUARANI/SC
goalsA                        1
happened                   True
Name: 9610, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
73577,YZmZVX0D,2016-02-06,Chapecoense-SC,Guarani de Palhoca,1:1,,1.0,1.0


                   CAMBURIÚ/SC > Camboriu


roundno                     687
gameno                        8
date        2016-02-07 00:00:00
teamH               CAMBURIÚ/SC
goalsH                        1
teamA                   AVAÍ/SC
goalsA                        1
happened                   True
Name: 9611, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
73575,IcMilFGs,2016-02-07,Camboriu,Avai,1:1,,1.0,1.0


                    FORMOSA/GO > Bosque Formosa


roundno                     687
gameno                       11
date        2016-02-07 00:00:00
teamH                FORMOSA/GO
goalsH                        0
teamA                   GAMA/DF
goalsA                        2
happened                   True
Name: 9614, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
73404,OvN6AE7f,2016-02-07,Bosque Formosa,Gama,0:2,,0.0,2.0


                  COLO COLO/BA > Colo C.


roundno                     689
gameno                        7
date        2016-02-21 00:00:00
teamH              COLO COLO/BA
goalsH                        2
teamA                  BAHIA/BA
goalsA                        3
happened                   True
Name: 9638, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
73319,pldDFp1s,2016-02-21,Colo C.,Bahia,2:3,,2.0,3.0


                 ESTANCIANO/SE > Estanciano EC


roundno                     697
gameno                        7
date        2016-04-17 00:00:00
teamH             ESTANCIANO/SE
goalsH                        0
teamA              ITABAIANA/SE
goalsA                        2
happened                   True
Name: 9750, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
74560,dteEqHSc,2016-04-17,Estanciano EC,Itabaiana,0:2,,0.0,2.0


                   YPIRANGA/RS > Ypiranga FC


roundno                         713
gameno                            8
date            2016-08-07 00:00:00
teamH                   YPIRANGA/RS
goalsH                            1
teamA       PORTUGUESA DESPORTOS/SP
goalsA                            0
happened                       True
Name: 9975, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
72793,SbEx4xY1,2016-08-07,Ypiranga FC,Portuguesa,1:0,,1.0,0.0


           TIGRES DO BRASIL/RJ > Tigres Brasil


roundno                     732
gameno                        3
date        2017-01-14 00:00:00
teamH            NOVA IGUAÇU/RJ
goalsH                        2
teamA       TIGRES DO BRASIL/RJ
goalsA                        0
happened                   True
Name: 10236, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
78905,4QfzTVKs,2017-01-14,Nova Iguacu,Tigres Brasil,2:0,,2.0,0.0


                     CUIABÁ/MT > Cuiaba Esporte


roundno                     755
gameno                        5
date        2017-06-25 00:00:00
teamH                 CUIABÁ/MT
goalsH                        1
teamA                    ASA/AL
goalsA                        1
happened                   True
Name: 10560, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
78213,rqvdbQ0o,2017-06-25,Cuiaba Esporte,ASA,1:1,,1.0,1.0


          RIO BRANCO/AC SUB 20 > Rio Branco AC U20


roundno                      781
gameno                         3
date         2018-01-06 00:00:00
teamH       RIO BRANCO/AC SUB 20
goalsH                         0
teamA            CEARÁ/CE SUB 20
goalsA                         6
happened                    True
Name: 10922, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
83286,rebmGMEk,2018-01-06,Rio Branco AC U20,Ceara U20,0:6,,0.0,6.0


       FERNANDÓPOLIS/SP SUB 20 > Fernandopolis U20


roundno                         781
gameno                            5
date            2018-01-06 00:00:00
teamH       FERNANDÓPOLIS/SP SUB 20
goalsH                            2
teamA             GUARANI/SP SUB 20
goalsA                            4
happened                       True
Name: 10924, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
83272,OvimZR1o,2018-01-06,Fernandopolis U20,Guarani U20,2:4,,2.0,4.0


                  VILA NOVA/MG > Villa Nova MG


roundno                     787
gameno                        7
date        2018-02-17 00:00:00
teamH               CRUZEIRO/MG
goalsH                        1
teamA              VILA NOVA/MG
goalsA                        0
happened                   True
Name: 11010, dtype: object

Unnamed: 0,id,date,teamH,teamA,score,scoremod,goalsH,goalsA
84076,bwTio95S,2018-02-17,Cruzeiro,Villa Nova MG,1:0,,1.0,0.0


New teams found: 43
Matches found till now: 4227
Round number 2
New teams found: 0
Matches found till now: 4227


True

In [19]:
# http://www.betexplorer.com/soccer/brazil/serie-b-2009/america-rn-betim/zuc6o4ea/
# it's Ipatinga, not Betim
betexp.loc[22800]

id                     zuc6o4ea
date        2009-11-21 00:00:00
teamH                America RN
teamA                     Betim
score                       1:0
scoremod                       
goalsH                        1
goalsA                        0
Name: 22800, dtype: object

In [20]:
# http://www.betexplorer.com/soccer/brazil/campeonato-paulista-2010/santo-andre-barueri/d4IyAGi2/
# It's Grêmio Prudente, not Barueri
betexp.loc[18934]

id                     d4IyAGi2
date        2010-04-18 00:00:00
teamH               Santo Andre
teamA                   Barueri
score                       1:2
scoremod                       
goalsH                        1
goalsA                        2
Name: 18934, dtype: object

In [21]:
# http://www.betexplorer.com/soccer/brazil/serie-b-2011/barueri-portuguesa/4rHNNkEH/
# This one is right, but it contradicts with the first one
betexp.loc[32436]

id                     4rHNNkEH
date        2011-06-11 00:00:00
teamH                   Barueri
teamA                Portuguesa
score                       1:1
scoremod                       
goalsH                        1
goalsA                        1
Name: 32436, dtype: object

In [23]:
# we should have caught Fernadopolis
# OH, it is because of the state
betexp.loc[83272]

id                     OvimZR1o
date        2018-01-06 00:00:00
teamH         Fernandopolis U20
teamA               Guarani U20
score                       2:4
scoremod                       
goalsH                        2
goalsA                        4
Name: 83272, dtype: object

In [10]:
print(len(dict))
print(loteca.shape[0])
print(len(dict) / loteca.shape[0])

# todo: one day before and one day after

4227
5908
0.715470548408937
