In [74]:
from datetime import datetime as dt
import pandas as pd

In [75]:
# preview lotto dataframe
lotto_df = pd.read_csv("lotto_data.csv")
lotto_df.head()

Unnamed: 0.1,Unnamed: 0,LOTTO GAME,COMBINATIONS,DRAW DATE,JACKPOT (PHP),WINNERS
0,0,Grand Lotto 6/55,40-17-12-29-11-48,8/11/2025,66935255.0,0
1,1,Megalotto 6/45,40-10-16-28-07-09,8/11/2025,38521162.8,2
2,2,4D Lotto,7-0-7-5,8/11/2025,42039.0,20
3,3,3D Lotto 2PM,8-1-2,8/11/2025,4500.0,594
4,4,3D Lotto 5PM,4-2-2,8/11/2025,4500.0,329


In [76]:
lotto_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32871 entries, 0 to 32870
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Unnamed: 0     32871 non-null  int64 
 1   LOTTO GAME     32871 non-null  object
 2   COMBINATIONS   32871 non-null  object
 3   DRAW DATE      32871 non-null  object
 4   JACKPOT (PHP)  32871 non-null  object
 5   WINNERS        32871 non-null  int64 
dtypes: int64(2), object(4)
memory usage: 1.5+ MB


In [77]:
# drop Unnamed: 0 column
lotto_df = lotto_df.drop('Unnamed: 0', axis=1)
lotto_df.head()

Unnamed: 0,LOTTO GAME,COMBINATIONS,DRAW DATE,JACKPOT (PHP),WINNERS
0,Grand Lotto 6/55,40-17-12-29-11-48,8/11/2025,66935255.0,0
1,Megalotto 6/45,40-10-16-28-07-09,8/11/2025,38521162.8,2
2,4D Lotto,7-0-7-5,8/11/2025,42039.0,20
3,3D Lotto 2PM,8-1-2,8/11/2025,4500.0,594
4,3D Lotto 5PM,4-2-2,8/11/2025,4500.0,329


In [78]:
# 3D Lotto, 2D Lotto, and Suertres Lotto were listed including their timeslot
# For simplicity, they can just be merged as one, like the rest of Lotto Games
print("old games: ", lotto_df["LOTTO GAME"].unique(),'\n')

replace_dict = {"2D Lotto": ['2D Lotto 2PM', '2D Lotto 5PM', '2D Lotto 9PM'],
                "3D Lotto": ['3D Lotto 2PM', '3D Lotto 5PM', '3D Lotto 9PM'],
                "Suertres Lotto": ['Suertres Lotto 11:30AM', 'Suertres Lotto 12:30PM', 'Suertres Lotto 2PM'],
                "EZ2 Lotto": ['EZ2 Lotto 11:30AM', 'EZ2 Lotto 12:30PM', 'EZ2 Lotto 2PM']            
               }

for k, v in replace_dict.items():
    lotto_df["LOTTO GAME"] = lotto_df["LOTTO GAME"].replace(v, k)

print("updated games: ", lotto_df["LOTTO GAME"].unique())

old games:  ['Grand Lotto 6/55' 'Megalotto 6/45' '4D Lotto' '3D Lotto 2PM'
 '3D Lotto 5PM' '3D Lotto 9PM' '2D Lotto 2PM' '2D Lotto 5PM'
 '2D Lotto 9PM' 'Ultra Lotto 6/58' 'Superlotto 6/49' 'Lotto 6/42'
 '6D Lotto' 'Suertres Lotto 11:30AM' 'Suertres Lotto 12:30PM'
 'Suertres Lotto 2PM' 'EZ2 Lotto 2PM' 'EZ2 Lotto 11:30AM'
 'EZ2 Lotto 12:30PM'] 

updated games:  ['Grand Lotto 6/55' 'Megalotto 6/45' '4D Lotto' '3D Lotto' '2D Lotto'
 'Ultra Lotto 6/58' 'Superlotto 6/49' 'Lotto 6/42' '6D Lotto'
 'Suertres Lotto' 'EZ2 Lotto']


In [79]:
# convert 'DRAW DATE' column to datetime
lotto_df['DRAW DATE'] = pd.to_datetime(lotto_df['DRAW DATE'])

# convert 'JACKPOT (PHP)' column to float
lotto_df['JACKPOT (PHP)'] = lotto_df['JACKPOT (PHP)'].str.replace(",", "")
lotto_df['JACKPOT (PHP)'] = pd.to_numeric(lotto_df['JACKPOT (PHP)'])

for column in lotto_df.columns:
    print(f"Column {column} type: ", lotto_df[column].dtype)
          
# print("Column 'DRAW DATE' type: ", lotto_df['DRAW DATE'].dtype)
# print("Column 'JACKPOT (PHP)' type: ", lotto_df['JACKPOT (PHP)'].dtype)

lotto_df.head()

Column LOTTO GAME type:  object
Column COMBINATIONS type:  object
Column DRAW DATE type:  datetime64[ns]
Column JACKPOT (PHP) type:  float64
Column WINNERS type:  int64


Unnamed: 0,LOTTO GAME,COMBINATIONS,DRAW DATE,JACKPOT (PHP),WINNERS
0,Grand Lotto 6/55,40-17-12-29-11-48,2025-08-11,66935255.0,0
1,Megalotto 6/45,40-10-16-28-07-09,2025-08-11,38521162.8,2
2,4D Lotto,7-0-7-5,2025-08-11,42039.0,20
3,3D Lotto,8-1-2,2025-08-11,4500.0,594
4,3D Lotto,4-2-2,2025-08-11,4500.0,329


In [81]:
number_of_winners = lotto_df.groupby("LOTTO GAME")["WINNERS"].agg("sum")
number_of_winners

LOTTO GAME
2D Lotto            2963624
3D Lotto            4901887
4D Lotto              45178
6D Lotto                283
EZ2 Lotto               826
Grand Lotto 6/55        509
Lotto 6/42              230
Megalotto 6/45          184
Suertres Lotto         3533
Superlotto 6/49         113
Ultra Lotto 6/58         48
Name: WINNERS, dtype: int64

Yearly line chart average of total prices
Most common number in each 
