# Knihovny

In [9]:
import json
import pandas as pd
import os
import numpy as np

# Funkce

In [2]:
def create_detail_dict(bookmaker_names):
    # script_dir = os.path.dirname(os.path.realpath(__file__))
    detail_dict = {}
    for bookmaker_name in bookmaker_names:
        file_path = os.path.join(f"../data/data_{bookmaker_name}_detail.json") # join(script_dir, f"../data/data_{bookmaker_name}_detail.json")
        with open(file_path, 'r') as file:
            data = json.load(file)
        for item in data:
            try:
                detail_dict[item['event_url']] = item['bet_dict']
            except:
                pass
    return detail_dict

def create_all_bets_df(events, detail_dict):
    template_path = 'betscraper/files/detail_dicts/template.json'
    with open(template_path, 'r') as file:
        full_template = json.load(file)
    
    all_bets_df = pd.DataFrame(columns=[
        'events_id',
        'event_url_dict',
        'bet_name',
        'group_name',
        'option_names'
    ])

    for idx, row in events.iterrows():
        new_row = {
            'events_id': row['events_id'],
            'event_url_dict': row['event_url_dict']
        }
        try:
            template = full_template[row['sport_name']]
        except:
            template = full_template['other']
        for bet_name, group_dict in template.items():
            new_row['bet_name'] = bet_name
            for group_name, option_dict in group_dict.items():
                new_row['group_name'] = group_name
                new_row['option_names'] = tuple(option_dict.keys())
                new_row['bet_values_dict'] = {}
                for bookmaker_name, event_url in row['event_url_dict'].items():
                    try:
                        new_row['bet_values_dict'][bookmaker_name] = tuple(detail_dict[event_url][bet_name][group_name].values())
                    except:
                        new_row['bet_values_dict'][bookmaker_name] = tuple(option_dict.values())
                new_row_df = pd.DataFrame([new_row])
                dfs_to_concat = [df for df in [all_bets_df, new_row_df] if not df.empty and not df.isna().all().all()]
                all_bets_df = pd.concat(dfs_to_concat, ignore_index=True)
    return all_bets_df

def create_sure_bets_df(all_bets_df):
    sure_bets_df = pd.DataFrame(columns=[
        'events_id',
        'bet_name',
        'group_name',
        'option_names',
        'event_url_dict',
        'bet_values_dict',
        'bet_amounts_100_dict',
        'sure_bet_result',
    ])

    for idx, row in all_bets_df[all_bets_df["bet_values_max"].apply(lambda t: sum(1/x for x in t) < 1)].reset_index(drop=True).iterrows():
        new_row = {
            'events_id': row['events_id'],
            'bet_name': row['bet_name'],
            'group_name': row['group_name'],
            'option_names': row['option_names']
        }
        if len(row['option_names']) == 2:
            for name_1, values_1 in row['bet_values_dict'].items():
                for name_2, values_2 in row['bet_values_dict'].items():
                    sure_bet_result = sum(1/x for x in [1 if v < 1 else v for v in (values_1[0], values_2[1])])
                    if sure_bet_result < 1:
                        new_row['event_url_dict'] = {
                            name_1: row['event_url_dict'][name_1],
                            name_2: row['event_url_dict'][name_2],
                        }
                        new_row['bet_values_dict'] = {
                            f"{name_1}_{row['option_names'][0]}": values_1[0],
                            f"{name_2}_{row['option_names'][1]}": values_2[1],
                        }
                        new_row['bet_amounts_100_dict'] = {
                            f"{name_1}_{row['option_names'][0]}": 100 / values_1[0],
                            f"{name_2}_{row['option_names'][1]}": 100 / values_2[1],
                        }
                        new_row['sure_bet_result'] = sure_bet_result
                        new_row_df = pd.DataFrame([new_row])
                        dfs_to_concat = [df for df in [sure_bets_df, new_row_df] if not df.empty and not df.isna().all().all()]
                        sure_bets_df = pd.concat(dfs_to_concat, ignore_index=True)
        if len(row['option_names']) == 3:
            for name_1, values_1 in row['bet_values_dict'].items():
                for name_2, values_2 in row['bet_values_dict'].items():
                    for name_3, values_3 in row['bet_values_dict'].items():
                        sure_bet_result = sum(1/x for x in [1 if v < 1 else v for v in (values_1[0], values_2[1], values_3[2])])
                        if sure_bet_result < 1:
                            new_row['event_url_dict'] = {
                                name_1: row['event_url_dict'][name_1],
                                name_2: row['event_url_dict'][name_2],
                                name_3: row['event_url_dict'][name_3],
                            }
                            new_row['bet_values_dict'] = {
                                f"{name_1}_{row['option_names'][0]}": values_1[0],
                                f"{name_2}_{row['option_names'][1]}": values_2[1],
                                f"{name_3}_{row['option_names'][2]}": values_3[2],
                            }
                            new_row['bet_amounts_100_dict'] = {
                                f"{name_1}_{row['option_names'][0]}": 100 / values_1[0],
                                f"{name_2}_{row['option_names'][1]}": 100 / values_2[1],
                                f"{name_3}_{row['option_names'][2]}": 100 / values_3[2],
                            }
                            new_row['sure_bet_result'] = sure_bet_result
                            new_row_df = pd.DataFrame([new_row])
                            dfs_to_concat = [df for df in [sure_bets_df, new_row_df] if not df.empty and not df.isna().all().all()]
                            sure_bets_df = pd.concat(dfs_to_concat, ignore_index=True)
    return sure_bets_df

def create_value_bets_df(all_bets_df):
    value_bets_df = pd.DataFrame(columns=[
        'events_id',
        'bet_name',
        'group_name',
        'option_names',
        'bookmaker_name',
        'event_url',
        'option_name',
        'bet_value',
        'bet_value_fair',
        'value_bet_result',
    ])

    for idx, row in all_bets_df[all_bets_df.apply(lambda row: any(a > b for a, b in zip(row["bet_values_max"], row["bet_values_fair"])), axis=1)].reset_index(drop=True).iterrows():
        new_row = {
            'events_id': row['events_id'],
            'bet_name': row['bet_name'],
            'group_name': row['group_name'],
            'option_names': row['option_names']
        }
        for bookmaker_name, bet_values in row['bet_values_dict'].items():
            new_row['bookmaker_name'] = bookmaker_name
            new_row['event_url'] = row['event_url_dict'][bookmaker_name]
            for option_index, (bet_value, bet_value_fair) in enumerate(zip(bet_values, row['bet_values_fair'])):
                if bet_value > bet_value_fair:
                    new_row['option_name'] = row['option_names'][option_index]
                    new_row['bet_value'] = bet_value
                    new_row['bet_value_fair'] = bet_value_fair
                    new_row['value_bet_result'] = (bet_value - bet_value_fair) / (bet_value_fair - 1)
                    new_row_df = pd.DataFrame([new_row])
                    dfs_to_concat = [df for df in [value_bets_df, new_row_df] if not df.empty and not df.isna().all().all()]
                    value_bets_df = pd.concat(dfs_to_concat, ignore_index=True)
    return value_bets_df


# Main

In [3]:
# nahrat detail data do spolecneho detail_dict

bookmaker_names = ['betano', 'betx', 'forbet', 'fortuna', 'kingsbet', 'merkur', 'sazka', 'synottip', 'tipsport']
detail_dict = create_detail_dict(bookmaker_names)

In [4]:
# nahrat tabulku events

events = pd.read_pickle('data/events.pkl')

In [27]:
# vytvorit dataframe vsech sazek, ktere jsem byl schopen vytahnout pomoci detail spideru

all_bets_df = create_all_bets_df(events, detail_dict)

In [28]:
# odstranit nepotrebne sazky a doplnit maximalni, prumerne a ferove hodnoty sazek

all_bets_df['bet_values_max'] = all_bets_df['bet_values_dict'].apply(lambda x: tuple(max(values) for values in zip(*[t for t in x.values() if t])))
all_bets_df = all_bets_df[all_bets_df["bet_values_max"].apply(lambda x: min(x) > 1)].reset_index(drop=True)
all_bets_df['bet_values_mean'] = all_bets_df['bet_values_dict'].apply(lambda x: tuple(float(np.mean([y for y in values if y >= 1])) for values in zip(*[t for t in x.values() if t])))
all_bets_df['bet_values_fair'] = all_bets_df['bet_values_mean'].apply(lambda x: tuple(b*sum(1/y for y in x) for b in x))

# all_bets_df['bet_result'] = all_bets_df['bet_values_max'].apply(lambda t: sum(1/x for x in [1 if x in {0, -1} else x for x in t]))

# all_bets_df['bet_result_bool'] = all_bets_df['bet_values_max'].apply(lambda t: sum(1/x for x in [1 if x in {0, -1} else x for x in t]) < 1)

In [3]:
# vytvorit tabulku pro sure bets

sure_bets_df = create_sure_bets_df(all_bets_df)

In [7]:
# vytvorit tabulku pro value bets

value_bets_df = create_value_bets_df(all_bets_df)

In [1]:
import pandas as pd


# nahrat tabulky all_bets_df, sure_bets_df, value_bets_df

all_bets_df = pd.read_pickle('data/all_bets_df.pkl')
sure_bets_df = pd.read_pickle('data/sure_bets_df.pkl')
value_bets_df = pd.read_pickle('data/value_bets_df.pkl')