In [14]:
import logging
from typing import List, Set, Dict, Tuple, Optional

from unittest.mock import patch
from itertools import zip_longest
import functools

import os
import tarfile
import zipfile
import bz2
import glob

import numpy as np
import pandas as pd
import betfairlightweight
from betfairlightweight import StreamListener
from tqdm.notebook import tqdm



def cric_betfair_wrangle(filenames,filename_to_save='betfair_data'):

    market_paths=[]
    for filename in filenames:
        market_paths.append('./Cricket Data/'+filename+'.tar')

    # the path directories to the data sets
    # accepts tar files, zipped files or 
    # directory with bz2 file(s)    
    
#---------------------------------------loading from tar and extracting bz2 files------------------------------
    
    def load_markets(file_paths: List[str]):
        for file_path in file_paths:
            if os.path.isdir(file_path):
                for path in glob.iglob(file_path + '**/**/*.bz2', recursive=True):
                    f = bz2.BZ2File(path, 'rb')
                    yield f
                    f.close()
            elif os.path.isfile(file_path):
                ext = os.path.splitext(file_path)[1]
                # iterate through a tar archive
                if ext == '.tar':
                    with tarfile.TarFile(file_path) as archive:
                        for file in archive:
                            yield bz2.open(archive.extractfile(file))
                # or a zip archive
                elif ext == '.zip':
                    with zipfile.ZipFile(file_path) as archive:
                        for file in archive.namelist():
                            yield bz2.open(archive.open(file))
        return None

#------------------------------------------betfairlightweight json interpretation---------------------------
          
    # setup logging
    logging.basicConfig(level=logging.INFO)

    # create trading instance (don't need username/password)
    trading = betfairlightweight.APIClient("username", "password","app_key")

    # create listener
    listener = StreamListener(max_latency=None)

    # create historical stream (update file_path to your file location)
    
    market_id=[]
    date=[]
    team_a=[]
    team_b=[]
    team_a_sp=[]
    team_b_sp=[]
    team_a_win=[]
    
    n_files=0
    for file in load_markets(market_paths):
        n_files+=1

    for file_obj in tqdm(load_markets(market_paths),total=n_files):

        stream = trading.streaming.create_historical_generator_stream(
            file_path=file_obj,
            listener=listener,
        )

        # create generator
        with patch("builtins.open", lambda f, _: f):
            gen = stream.get_generator()

            for market_books in gen():
                for market_book in market_books:
                        
                    team=[]
                    team_sp=[]
                    team_win=[]
                    if market_book.inplay!=3:
                        date_temp=market_book.publish_time
                        market_id_temp=market_book.market_id
                        for runner in market_book.runners:

                            team.append(next(rd.name for rd in market_book.market_definition.runners if rd.selection_id == runner.selection_id))

                            team_sp.append(runner.last_price_traded or np.NaN)
                            team_win.append(runner.status)

                        if len(market_book.runners)==2:

                            market_id.append(market_id_temp)
                            date.append(date_temp)
                            team_a.append(team[0])
                            team_b.append(team[1])
                            team_a_sp.append(team_sp[0])
                            team_b_sp.append(team_sp[1])
                            team_a_win.append(team_win[0])

    data=pd.DataFrame.from_dict({'market_id':market_id,'date':date,'team_a':team_a,'team_b':team_b,
                                 'team_a_sp':team_a_sp,'team_b_sp':team_b_sp,'team_a_win':team_a_win}).dropna()
    
    data.to_csv('./Cricket Data/'+filename_to_save+'.csv',index=False)
    
#------------------------------------------backtest wrangling-----------------------------------------------

    last_preplay_index=[]
    for id_loop in tqdm(data['market_id'].unique()):
        match_data=data[data['market_id']==id_loop]
        last_preplay_index.append(match_data.index[match_data['date'].argmax()])
        
    data=data[data.index.isin(last_preplay_index)].drop('market_id',axis=1).reset_index(drop=True)
    data['date']=data['date'].dt.date
    
    #data.to_csv('./Cricket Data/'+filename_to_save+'.csv',index=False)

    return data
    
#---------------------------------------------------------test----------------------------------------------------------

cric_betfair_wrangle(['data_post_apr20'],'betfair_data')

  0%|          | 0/1714 [00:00<?, ?it/s]

  0%|          | 0/1429 [00:00<?, ?it/s]

Unnamed: 0,date,team_a,team_b,team_a_sp,team_b_sp,team_a_win
0,2021-07-09,West Indies Women,Pakistan Women,1.01,1000.00,WINNER
1,2021-07-09,Sussex,Essex,1.01,1000.00,WINNER
2,2021-07-09,Hampshire,Somerset,1.01,1000.00,WINNER
3,2021-07-09,Nottinghamshire,Yorkshire,1.01,1000.00,WINNER
4,2021-07-09,Surrey,Kent,260.00,1.01,LOSER
...,...,...,...,...,...,...
1424,2021-06-18,Essex,Gloucestershire,2.04,1.96,REMOVED
1425,2021-06-18,Multan Sultans,Lahore Qalandars,1.01,1000.00,WINNER
1426,2021-06-18,Birmingham Bears,Lancashire,2.16,1.87,REMOVED
1427,2021-06-18,Yorkshire,Durham,1.78,1.70,REMOVED


In [8]:
import logging

import betfairlightweight
from betfairlightweight import StreamListener


"""
Data needs to be downloaded from:
    https://historicdata.betfair.com
"""

# setup logging
logging.basicConfig(level=logging.FATAL)

# create trading instance (don't need username/password)
trading = betfairlightweight.APIClient("username", "password","app_key")

# create listener
listener = StreamListener(max_latency=None)

# create historical stream (update file_path to your file location)
stream = trading.streaming.create_historical_generator_stream(
    file_path="1.185122736",
    listener=listener,
)

# create generator
gen = stream.get_generator()

# record prices to a file
with open("output.csv", "w") as output:
    output.write("Time,MarketId,Market_Status,Inplay,SelectionId,LastPriceTraded,Runner_Status\n")

for market_books in gen():

    for market_book in market_books:
        with open("output.csv", "a") as output:
            for runner in market_book.runners:
                
                print(runner.last_price_traded or "")
                
                # how to get runner details from the market definition
                market_def = market_book.market_definition
                runners_dict = {
                    (runner.selection_id, runner.handicap): runner
                    for runner in market_def.runners
                }
                runner_def = runners_dict.get((runner.selection_id, runner.handicap))

                output.write(
                    "%s,%s,%s,%s,%s,%s,%s\n"
                    % (
                        market_book.publish_time,
                        market_book.market_id,
                        market_book.status,
                        market_book.inplay,
                        next(rd.name for rd in market_book.market_definition.runners if rd.selection_id == runner.selection_id),
                        runner.last_price_traded or "",
                        runner.status,
                    )
                )



2.54
1.65
2.54
1.64
2.56
1.65
2.56
1.61
2.54
1.65
2.54
1.63
2.56
1.63
2.54
1.63
2.54
1.67
2.54
1.65
2.56
1.65
2.48
1.65
2.46
1.69
2.46
1.68
2.48
1.66
2.52
1.66
2.52
1.65
2.52
1.64
2.52
1.65
2.52
1.64
2.52
1.65
2.5
1.66
2.5
1.69
2.5
1.67
2.5
1.69
2.44
1.69
2.44
1.7
2.44
1.69
2.44
1.65
2.44
1.69
2.44
1.7
2.42
1.7
2.42
1.73
2.38
1.7
2.34
1.74
2.34
1.75
2.32
1.74
2.34
1.73
2.36
1.73
2.36
1.7
2.34
1.68
2.32
1.72
2.28
1.78
2.28
1.75
2.3
1.76
1.56
2.5
1.9
1.78
2.3
1.79
2.18
1.84
2.24
1.7
2.74
1.56
2.8
1.51
2.84
1.55
2.82
1.57
2.78
1.56
3.7
1.5
2.78
1.52
2.84
1.54
2.78
1.56
3.1
1.47
3.25
1.45
3.7
1.3
3.35
1.33
4.3
1.3
4.2
1.31
4.3
1.3
4.5
1.19
4.5
1.32
3.85
1.35
4.1
1.35
3.75
1.36
3.3
1.41
3.05
1.48
3.2
1.46
3.4
1.49
3.2
1.45
2.4
1.66
2.5
1.67
2.46
1.69
2.5
1.68
2.42
1.69
3.0
1.5
2.8
1.53
2.96
1.45
3.4
1.42
3.0
1.44
3.4
1.39
3.0
1.45
3.3
1.46
2.54
1.64
2.7
1.59
2.8
1.65
2.44
1.61
2.6
1.66
2.42
1.71
2.6
1.64
2.26
1.8
2.38
1.73
2.1
1.91
2.0
2.0
1.94
2.0
1.96
2.04
1.96
2.0
1.82
2.2
1.68
2.46
1.