In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from datetime import datetime, timedelta
import warnings
warnings.simplefilter("ignore", category=FutureWarning)

dfBase = pd.read_csv('winning_history_4D_Magnum.csv')
dfBase['DrawDate'] = pd.to_datetime(dfBase['DrawDate'], format='%d/%m/%Y')

# Plotting graph all winning numbers, split by Prize Type

In [None]:
df=dfBase

fig = px.scatter(df, x='DrawDate', y='Number', color='PrizeDesc',
                 title='Winning Numbers Distribution Over the Years',
                 labels={'DrawDate': 'Date', 'Number': 'Number'},
                 hover_data={'DrawDate': '|%B %d, %Y', 'Number': True, 'PrizeDesc': True})
fig.update_traces(marker=dict(size=1))
fig.show()

# Plotting graph all winning numbers, when they got  1st, 2nd or 3rd Prize

In [None]:
df=dfBase

df_filtered = df[df['PrizeDesc'].isin(['1ST PRIZE', '2ND PRIZE', '3RD PRIZE'])]

fig = px.scatter(df_filtered, x='DrawDate', y='Number', color='PrizeDesc',
                 title='Winning Numbers Distribution Over the Years (1st, 2nd, 3rd Prize)',
                 labels={'DrawDate': 'Date', 'Number': 'Number'},
                 hover_data={'DrawDate': '|%B %d, %Y', 'Number': True, 'PrizeDesc': True})
fig.update_traces(marker=dict(size=1))
fig.show()

# Plot Frequency of winnings for each number, split by type of prize

In [None]:
df=dfBase

# Group the data by 'Number' and 'PrizeDesc' and count the frequency of winnings
winning_frequency = df.groupby(['Number', 'PrizeDesc']).size().reset_index(name='Frequency')

fig = px.scatter(winning_frequency, x='Number', y='Frequency', color='PrizeDesc',
             title='Frequency of Winnings for Each Numberm by Prize Desc',
             labels={'Number': 'Number', 'Frequency': 'Frequency', 'PrizeDesc': 'Prize Description'},
             hover_data={'Number': True, 'Frequency': True, 'PrizeDesc': True})
fig.update_traces(marker=dict(size=2))
fig.show()


# Plot Frequency of winnings for each number, split by type of prize - for last 2 years

In [None]:
df=dfBase

df_last_2_years = df[df['DrawDate'] >= datetime.now() - timedelta(days=2*365)]
winning_frequency = df_last_2_years.groupby(['Number', 'PrizeDesc']).size().reset_index(name='Frequency')

fig = px.scatter(winning_frequency, x='Number', y='Frequency', color='PrizeDesc',
             title='Frequency of Winnings for Each Number (Last 2 Years)',
             labels={'Number': 'Number', 'Frequency': 'Frequency', 'PrizeDesc': 'Prize Description'},
             hover_data={'Number': True, 'Frequency': True, 'PrizeDesc': True})
fig.update_traces(marker=dict(size=2))
fig.show()

In [None]:
winning_frequency[winning_frequency['Number']==2114]

# Find winnings for specific numbers

In [None]:
df=dfBase

df['DaysTillNow'] = (datetime.now() - df['DrawDate']).dt.days

def NumberWinHistory(numb):
    # Filter the DataFrame to select rows where the 'Number' column equals 1234
    result = df[df['Number'] == numb]
    # Sort the result based on 'DrawDate'
    result_sorted = result.sort_values(by='DrawDate')
    result_subset = result_sorted[['Number', 'DrawDate', 'PrizeDesc','DaysTillNow']]
    print(result_subset.to_string(index=False))

In [None]:
NumberWinHistory(5515)

## Days since a number won a specific Prize Type

In [None]:
df=dfBase

last_draw_date = df.groupby(['Number', 'PrizeDesc'])['DrawDate'].max().reset_index()

specific_prize_types = ['1ST PRIZE', '2ND PRIZE', '3RD PRIZE', 'CONSOLATION PRIZE', 'SPECIAL PRIZE']
df_filtered = last_draw_date[last_draw_date['PrizeDesc'].isin(specific_prize_types)]

df_filtered['DaysSinceLastWin'] = (datetime.now() - df_filtered['DrawDate']).dt.days

fig = px.scatter(df_filtered, x='Number', y='DaysSinceLastWin', color='PrizeDesc',
                 title='Days Since Last Win for Each Number',
                 labels={'Number': 'Number', 'DaysSinceLastWin': 'Days Since Last Win', 'PrizeDesc': 'Prize Description'},
                 hover_data={'Number': True, 'DaysSinceLastWin': True, 'PrizeDesc': True})
fig.update_traces(marker=dict(size=2))
fig.show()


# Days since number won any prize

In [None]:
df=dfBase

last_draw_date = df.groupby('Number')['DrawDate'].max().reset_index()
last_draw_date['DaysSinceLastWin'] = (datetime.now() - last_draw_date['DrawDate']).dt.days

# Create an interactive scatter plot with tooltips using Plotly
fig = px.scatter(last_draw_date, x='Number', y='DaysSinceLastWin', title='Days Since Last Win for Each Number',
                 labels={'Number': 'Number', 'DaysSinceLastWin': 'Days Since Last Win'})
fig.update_traces(marker=dict(size=2), mode='markers+text', text=last_draw_date['Number'], textposition='top center')
fig.update_layout(hovermode='closest')
fig.show()

In [None]:
last_draw_date.sort_values(by='DaysSinceLastWin', ascending=False)