### Find the most popular numbers from lottery history

##### inspired by: https://www.youtube.com/watch?v=hPucbi0H1EI

#### powerball

In [68]:
import pandas as pd


In [69]:
power_url = r'https://www.texaslottery.com/export/sites/lottery/Games/Powerball/Winning_Numbers/powerball.csv'
mega_url = r'https://www.texaslottery.com/export/sites/lottery/Games/Mega_Millions/Winning_Numbers/megamillions.csv'


In [70]:
# read and save data from url
def get_data(url):
    df = pd.read_csv(url, header=None)
    df = df.iloc[:,:-1]
    df.columns = ['type', 'month', 'day', 'year', 'num1', 'num2','num3','num4','num5','num6']
    df['download_time'] = pd.Timestamp.today(tz='America/Chicago').strftime('%Y-%m-%d, %I:%M:%S %p %Z')
    df['date'] = pd.to_datetime(dict(year=df.year, month=df.month, day=df.day))
    df = df.reindex(columns=['type', 'date', 'num1', 'num2','num3','num4','num5','num6', 'download_time'])
    filename = df['type'].unique()[0]+ '_'+pd.Timestamp.today(tz='America/Chicago').strftime('%Y%m%d')
    df.to_csv('./data/'+filename+'.csv', index=False)
    return df


In [71]:
# get data
df = get_data(power_url)


In [72]:
len(df)


1713

In [73]:
df.head()


Unnamed: 0,type,date,num1,num2,num3,num4,num5,num6,download_time
0,Powerball,2010-02-03,37,52,22,36,17,24,"2024-11-16, 11:54:38 PM CST"
1,Powerball,2010-02-06,22,54,52,14,59,4,"2024-11-16, 11:54:38 PM CST"
2,Powerball,2010-02-10,29,8,37,38,5,34,"2024-11-16, 11:54:38 PM CST"
3,Powerball,2010-02-13,14,10,40,51,30,1,"2024-11-16, 11:54:38 PM CST"
4,Powerball,2010-02-17,36,7,26,8,19,15,"2024-11-16, 11:54:38 PM CST"


In [74]:
cols = ['num1', 'num2','num3','num4','num5']
allrecords = df[cols].values.tolist()
num6 = list(df.num6)


In [75]:
single_occurence = {}
regular_occurence = {}

for record in allrecords:
    for regnum in record:
        if regnum in regular_occurence:
            occurence = regular_occurence[regnum]
            occurence += 1
            regular_occurence[regnum] = occurence
        else:
            regular_occurence[regnum] = 1

for pnum in num6:
    if pnum in single_occurence:
        occurence = single_occurence[pnum]
        occurence += 1
        single_occurence[pnum] = occurence
    else:
        single_occurence[pnum] = 1


# print(regular_occurence)
# regular_occurence_sorted = sorted(regular_occurence.items(), key= lambda x:x[1])
regular_occurence_sorted = sorted(regular_occurence.items(), key= lambda x:x[1], reverse = True)
single_occurence_sorted = sorted(single_occurence.items(), key = lambda x:x[1], reverse = True)
# single_occurence_sorted 
# regular_occurence_sorted

# create a list of numbers sorted based on occurence
regular_by_occurence = list(map(lambda x: x[0], regular_occurence_sorted ))
single_by_occurence = list(map(lambda x: x[0], single_occurence_sorted))


# single_by_occurence[:1][0]
regular_by_occurence[:5]

winningTicket = ' '.join(map(str, regular_by_occurence[:5] + single_by_occurence[:1]))
print(f'Powerball most frequent nubers: {winningTicket}')


Powerball most frequent nubers: 39 36 32 21 23 24


#### mega

In [76]:
# get data
df = get_data(mega_url)


In [77]:
len(df), df.head()


(2187,
             type       date  num1  num2  num3  num4  num5  num6  \
 0  Mega Millions 2003-12-05    12    44    15    18     1    42   
 1  Mega Millions 2003-12-09    14    15    48     4    24    41   
 2  Mega Millions 2003-12-12    16    32    46     9    45    26   
 3  Mega Millions 2003-12-16    47    16    31    24    46    47   
 4  Mega Millions 2003-12-19     5    10    39    17    35    38   
 
                  download_time  
 0  2024-11-16, 11:54:38 PM CST  
 1  2024-11-16, 11:54:38 PM CST  
 2  2024-11-16, 11:54:38 PM CST  
 3  2024-11-16, 11:54:38 PM CST  
 4  2024-11-16, 11:54:38 PM CST  )

In [78]:
cols = ['num1', 'num2','num3','num4','num5']
allrecords = df[cols].values.tolist()
num6 = list(df.num6)


In [79]:
single_occurence = {}
regular_occurence = {}

for record in allrecords:
    for regnum in record:
        if regnum in regular_occurence:
            occurence = regular_occurence[regnum]
            occurence += 1
            regular_occurence[regnum] = occurence
        else:
            regular_occurence[regnum] = 1

for pnum in num6:
    if pnum in single_occurence:
        occurence = single_occurence[pnum]
        occurence += 1
        single_occurence[pnum] = occurence
    else:
        single_occurence[pnum] = 1


# print(regular_occurence)
# regular_occurence_sorted = sorted(regular_occurence.items(), key= lambda x:x[1])
regular_occurence_sorted = sorted(regular_occurence.items(), key= lambda x:x[1], reverse = True)
single_occurence_sorted = sorted(single_occurence.items(), key = lambda x:x[1], reverse = True)
# single_occurence_sorted 
# regular_occurence_sorted

# create a list of numbers sorted based on occurence
regular_by_occurence = list(map(lambda x: x[0], regular_occurence_sorted ))
single_by_occurence = list(map(lambda x: x[0], single_occurence_sorted))


# single_by_occurence[:1][0]
regular_by_occurence[:5]

winningTicket = ' '.join(map(str, regular_by_occurence[:5] + single_by_occurence[:1]))

print(f'Megamillion most frequent nubers: {winningTicket}')


Megamillion most frequent nubers: 17 10 31 20 14 9
