<a href="https://colab.research.google.com/github/crerarc/DataScience/blob/main/emill.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Best stab at picking Lottery numbers

C. Christie, 01/02/2022



In [None]:
#@title Module Imports

# Import libraries
import random
import datetime as dt
import itertools as itr
import os
from numpy import select as npselect
import pandas as pd
from google.colab import drive # Google file access

In [None]:
#@title MAIN SEQUENCE
def main() -> None:
    """Main run sequence
    """

    # Data file
    drive = "gdrive/My Drive/Colab Notebooks/Data/"
    #ifile = "/mnt/Disk01/DataA/ProjectsA/python_workspace/EuroMilLot/"
    #ifile = "/home/crerar/DataA/ProjectsA/python_workspace/EuroMilLot/"

    # Get data
    df5, df2 = get_data(drive)

    # Df setup to record results
    dfr = pd.DataFrame(columns = ['GB1','GB2','GB3','GB4','GB5','GS1','GS2',
        'W1', 'W2', 'W3', 'W4', 'W5', 'W6', 'W7', 'W8', 'W9', 'W10', 'W11', 'W12', 'W13', ])
    
    # Try
    try5 = [10, 11, 13, 35, 41]
    try2 = [6, 8]

    # Check guess
    wins = get_guess(df5, df2, try5, try2)
    print(wins)
    print(f"Win frequency: {round(100* sum(wins.values())/ len(df5), 2)}%")


    return 0 # Return successful completion = zero errors, or errors = False

In [None]:
#@title Function: "get_data" - Reads draw results and update repository as reqd
def get_data(rt_dir: str) -> tuple:
    """[summary]

    Args:
        ifile (str): input repository directory

    Returns:
        tuple: df1, df2 - dataframes for 5 main balls and 2 star balls respectively
    """

    # COLLAB for user repository file
    # ...Check path open
    if not(os.path.ismount("/content/gdrive")):
        drive.mount("/content/gdrive")
    #print(os.listdir())

    # LOCAL for user repository file
    rfile = 'https://www.national-lottery.co.uk/results/euromillions/draw-history/csv'

    # Create parser to parse date variables
    my_parser = lambda csv_date: dt.datetime.strptime(csv_date, "%d-%b-%Y")

    # Read in historic draws csv as a datafile with parsed date as index
    src_file = rt_dir + "euroLotData.csv"
    df_draws_repo = pd.read_csv(src_file, parse_dates = ['DrawDate'],
                                date_parser = my_parser,
                                index_col = 0).fillna(value = 0)
    #print(df_draws_repo.head())

    # Read in latest draw table csv, with parsed date as index
    df_draws_ltst = pd.read_csv(rfile, parse_dates= ['DrawDate'],
                                date_parser = my_parser,
                                index_col = 0).fillna(value = 0)
    #print(df_draws_ltst.head())

    # Get first entry in repository, and check timestamp
    repo_last = df_draws_repo.index[0]
    ltst_last = df_draws_ltst.index[0]
    if ltst_last > repo_last:
        # Create a dataframe of missing values, aligned with repository
        print("! Repo out of date !")
        print(f"Last Repo: {repo_last.day}, {repo_last.month}, {repo_last.year}")
        print(f"Last Draw: {ltst_last.day}, {ltst_last.month}, {ltst_last.year}")
        print("! Appending !")
        df_2mrg = df_draws_ltst.loc[df_draws_ltst.index > repo_last, :'Lucky Star 2']
        df_2mrg.columns = ['B1', 'B2', 'B3', 'B4', 'B5', 'S1', 'S2']
        # Concatenate existing repo with missing data to move missing to top
        df_drws = pd.concat([df_2mrg, df_draws_repo])
        # Create a new file for the data
        tst_fil = "test_out.csv"
        df_drws.to_csv(tst_fil, date_format="%d-%b-%Y")
        # Provision to prevent over-write of original file 
        !mv "test_out.csv" "gdrive/My Drive/Colab Notebooks/Data/euroLotData.csv"
        # mvfile = 'mv ' + ifile + 'test_out.csv ' + lfile
        # print(mvfile)
        # os.system(mvfile)
    else:
        print("No change to Repository Required")
        df_drws = df_draws_repo.copy()

    # Split data frame for 5 draw and 2 draw parts and sum the outcomes
    df1 = df_drws.iloc[:,:-2].astype('int')
    df1["Sum"] = df1.sum(axis = 1).astype('int')
    df2 = df_drws.iloc[:,-2:].astype('int')
    df2["Sum"] = df2.sum(axis = 1).astype('int')

    return df1, df2


In [None]:
#@title Get a best guess
def get_guess(data5: pd.DataFrame, data2: pd.DataFrame,
              gues5: list, gues2: list) -> tuple:
    """ Find how successful the chosen guess is
        Method: Set intersection to count no in intersection
        13 chances to win:
        Default no win rank: 14
        Idx   5 & 2 Match Rank
        [0]     2 + 0       13
        [1]     2 + 1       12
        [2]     1 + 2       11
        [3]     3 + 0       10
        [4]     3 + 1       9
        [5]     2 + 2       8
        [6]     4 + 0       7
        [7]     3 + 2       6
        [8]     4 + 1       5
        [9]     4 + 2       4
        [10]    5 + 0       3
        [11]    5 + 1       2
        [12]    5 + 2       1

    Args:
        data5 (pd.DataFrame): Frequencies for Draw 5 numbers
        data2 (pd.DataFrame): Frequencies for Draw 2 numbers
        gues5 (list): Guess for 5 Ball draw
        gues2 (list): Guess for 2 Ball draw

    Returns:
        
    """

    # Create rank dictionary
    ranks = {'52':1, '51':2, '50':3, '42':4, '41':5, '32': 6, '40':7,
            '22':8, '31':9, '30':10, '12':11, '21':12, '20':13}
    
    # Create dictionary distribution of wins
    win_distrib = {i : 0 for i in range(1, 14)}

    for i in range(len(data5)):
        # Extract df data as set, i - single row, [[i]] - multiple rows
        set5 = set(data5.iloc[i, [0, 1, 2, 3, 4]])
        set2 = set(data2.iloc[i, [0, 1]])
        # Find all the matching numbers
        mtch5 = set5.intersection(set(gues5))
        mtch2 = set2.intersection(set(gues2))
        res = str(len(mtch5)) + str(len(mtch2))
        if res in ranks:
            win_distrib[ranks[res]] += 1
    
    return win_distrib


In [None]:
#@title Execution Starting Point
if __name__ == "__main__":
    main()


No change to Repository Required
{1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 3, 9: 2, 10: 2, 11: 5, 12: 34, 13: 76}
Win frequency: 8.1%
