In [14]:
import sqlite3
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Connect to the SQLite database for training data
train_conn = sqlite3.connect('MegaMillions_Train.db')

# Query data from the database for training
query_train = "SELECT * FROM Interval_Data"
df_train = pd.read_sql_query(query_train, train_conn)
df_train

# Connect to the SQLite database for test data
test_conn = sqlite3.connect('MegaMillions_Test.db')

# Query data from the database for test
query_test = "SELECT * FROM Interval_Data"
df_test = pd.read_sql_query(query_test, test_conn)
df_test

Unnamed: 0,Draw Date,Winning Numbers,Mega Ball,Multiplier
0,10/19/2018,15 23 53 65 70,7,2
1,10/19/2018,15 23 53 65 70,7,2
2,10/19/2018,15 23 53 65 70,7,2
3,10/19/2021,03 12 13 19 52,1,3
4,10/19/2021,03 12 13 19 52,1,3
...,...,...,...,...
628,12/31/2019,30 44 49 53 56,11,3
629,12/31/2019,30 44 49 53 56,11,3
630,12/31/2021,02 05 30 46 61,8,3
631,12/31/2021,02 05 30 46 61,8,3


In [15]:
# Convert 'Draw Date' to datetime
df_train['Draw Date'] = pd.to_datetime(df_train['Draw Date'])

# Splitting the 'Winning Numbers' into separate columns
winning_numbers = df_train['Winning Numbers'].str.split(' ', expand=True)
winning_numbers.columns = [f'Ball_{i+1}' for i in winning_numbers.columns]

# Combine the separated columns with the original DataFrame
df_train = pd.concat([df_train, winning_numbers], axis=1)

# Convert the 'Ball' columns to integers (if they are strings)
ball_columns = [f'Ball_{i+1}' for i in range(5)]  # Assuming 5 white balls
for col in ball_columns:
    df_train[col] = pd.to_numeric(df_train[col])

# Prepare DataFrame for predicting Mega Ball
mega_ball_df = df_train[['Draw Date', 'Mega Ball']].copy()
mega_ball_df.columns = ['ds', 'y']  # Rename columns as required by Prophet

# Create a Prophet model for Mega Ball prediction
model_mega_ball = Prophet()
model_mega_ball.fit(mega_ball_df)

# Make future predictions for Mega Ball
future_mega = model_mega_ball.make_future_dataframe(periods=1) 
forecast_mega = model_mega_ball.predict(future_mega)

# Prepare DataFrames for predicting white balls
white_ball_dfs = []
for i in range(5):  # Assuming 5 white balls
    ball_df = df_train[['Draw Date', f'Ball_{i+1}']].copy()
    ball_df.columns = ['ds', 'y']  # Rename columns as required by Prophet
    white_ball_dfs.append(ball_df)

# Create Prophet models for each white ball and make predictions
models_white_balls = []
forecasts_white_balls = []

for ball_df in white_ball_dfs:
    model = Prophet()
    model.fit(ball_df)
    models_white_balls.append(model)

    future = model.make_future_dataframe(periods=1)
    forecast = model.predict(future)
    forecasts_white_balls.append(forecast)

# Assuming you have the actual values for Mega Ball from your dataset and the corresponding predicted values from the Prophet forecast
actual_values_mega_ball = df_train['Mega Ball']  # Actual values for Mega Ball
predicted_values_mega_ball = forecast_mega['yhat'][-len(actual_values_mega_ball):]  # Predicted values for Mega Ball

# Ensure the lengths match before computing metrics
min_len_mega_ball = min(len(actual_values_mega_ball), len(predicted_values_mega_ball))
actual_values_mega_ball = actual_values_mega_ball[:min_len_mega_ball]
predicted_values_mega_ball = predicted_values_mega_ball[:min_len_mega_ball]

# Calculate MAE, MSE, RMSE for the Mega Ball
mae_mega_ball = mean_absolute_error(actual_values_mega_ball, predicted_values_mega_ball)
mse_mega_ball = mean_squared_error(actual_values_mega_ball, predicted_values_mega_ball)
rmse_mega_ball = np.sqrt(mse_mega_ball)

print(f"MAE for Mega Ball: {mae_mega_ball}")
print(f"MSE for Mega Ball: {mse_mega_ball}")
print(f"RMSE for Mega Ball: {rmse_mega_ball}")

19:08:02 - cmdstanpy - INFO - Chain [1] start processing
19:08:02 - cmdstanpy - INFO - Chain [1] done processing
19:08:05 - cmdstanpy - INFO - Chain [1] start processing
19:08:05 - cmdstanpy - INFO - Chain [1] done processing
19:08:07 - cmdstanpy - INFO - Chain [1] start processing
19:08:07 - cmdstanpy - INFO - Chain [1] done processing
19:08:10 - cmdstanpy - INFO - Chain [1] start processing
19:08:10 - cmdstanpy - INFO - Chain [1] done processing
19:08:13 - cmdstanpy - INFO - Chain [1] start processing
19:08:14 - cmdstanpy - INFO - Chain [1] done processing
19:08:16 - cmdstanpy - INFO - Chain [1] start processing
19:08:17 - cmdstanpy - INFO - Chain [1] done processing


MAE for Mega Ball: 13.822860562918688
MSE for Mega Ball: 253.87203473647287
RMSE for Mega Ball: 15.933362317366441


In [4]:
df_test['Draw Date'] = pd.to_datetime(df_test['Draw Date'])

# Splitting the 'Winning Numbers' into separate columns
test_winning_numbers = df_test['Winning Numbers'].str.split(' ', expand=True)
test_winning_numbers.columns = [f'Ball_{i+1}' for i in test_winning_numbers.columns]

# Combine the separated columns with the original DataFrame
df_test = pd.concat([df_test, test_winning_numbers], axis=1)

# Convert the 'Ball' columns to integers (if they are strings)
test_ball_columns = [f'Ball_{i+1}' for i in range(5)]  # Assuming 5 white balls
for col in test_ball_columns:
    df_test[col] = pd.to_numeric(df_test[col])

# Prepare DataFrame for testing Mega Ball
test_mega_ball_df = df_test[['Draw Date', 'Mega Ball']].copy()
test_mega_ball_df.columns = ['ds', 'y']  # Rename columns as required by Prophet

# Make predictions for Mega Ball using the trained model
test_forecast_mega = model_mega_ball.predict(test_mega_ball_df)

# Assuming you have the actual values for Mega Ball in your test dataset
test_actual_values_mega_ball = df_test['Mega Ball']  # Actual values for Mega Ball in test data
test_predicted_values_mega_ball = test_forecast_mega['yhat'][-len(test_actual_values_mega_ball):]  # Predicted values for Mega Ball in test data

# Ensure lengths match before computing metrics
min_len_test_mega_ball = min(len(test_actual_values_mega_ball), len(test_predicted_values_mega_ball))
test_actual_values_mega_ball = test_actual_values_mega_ball[:min_len_test_mega_ball]
test_predicted_values_mega_ball = test_predicted_values_mega_ball[:min_len_test_mega_ball]

# Calculate MAE, MSE, RMSE for the Mega Ball in the test dataset
test_mae_mega_ball = mean_absolute_error(test_actual_values_mega_ball, test_predicted_values_mega_ball)
test_mse_mega_ball = mean_squared_error(test_actual_values_mega_ball, test_predicted_values_mega_ball)
test_rmse_mega_ball = np.sqrt(test_mse_mega_ball)

print(f"Test MAE for Mega Ball: {test_mae_mega_ball}")
print(f"Test MSE for Mega Ball: {test_mse_mega_ball}")
print(f"Test RMSE for Mega Ball: {test_rmse_mega_ball}")

Test MAE for Mega Ball: 7.7536026107884215
Test MSE for Mega Ball: 91.98328097198008
Test RMSE for Mega Ball: 9.590791467443136


In [40]:
import pandas as pd
from prophet import Prophet
import random
import numpy as nps

# Ensure 'Draw Date' column is in datetime format
df_test['Draw Date'] = pd.to_datetime(df_test['Draw Date'])

# Prepare DataFrames for predicting white balls and Mega Ball
white_ball_dfs = []
for i in range(5):  # Assuming 5 white balls
    ball_df = df_test[['Draw Date', f'Ball_{i+1}']].copy()
    ball_df.columns = ['ds', 'y']  # Rename columns as required by Prophet
    white_ball_dfs.append(ball_df)

mega_ball_df = df_test[['Draw Date', 'Mega Ball']].copy()
mega_ball_df.columns = ['ds', 'y']  # Rename columns as required by Prophet

# Create Prophet models for each white ball and Mega Ball
models_white_balls = []
for ball_df in white_ball_dfs:
    model = Prophet()
    model.fit(ball_df)
    models_white_balls.append(model)

model_mega_ball = Prophet()
model_mega_ball.fit(mega_ball_df)

def add_variation(prediction, std_dev=10):
    # Introduce variation based on a normal distribution with a specified standard deviation
    variation = np.random.normal(loc=0, scale=std_dev)
    return prediction + int(variation)

def generate_sets_for_date(input_date, num_sets):
    all_predictions = []
    while len(all_predictions) < num_sets:
        # Create DataFrames containing the input date
        input_date_df = pd.DataFrame({'ds': [pd.to_datetime(input_date)]})

        # Predict white ball numbers for the input date with variation
        predicted_white_balls = []
        for model in models_white_balls:
            forecast = model.predict(input_date_df)
            predicted_ball = int(round(forecast['yhat'].values[0]))
            predicted_ball = add_variation(predicted_ball)
            predicted_white_balls.append(predicted_ball)

        # Predict Mega Ball number for the input date with variation
        forecast_mega = model_mega_ball.predict(input_date_df)
        predicted_mega_ball = int(round(forecast_mega['yhat'].values[0]))
        predicted_mega_ball = add_variation(predicted_mega_ball)
        
        # Store the predicted white balls and Mega Ball as a set
        predicted_set = (predicted_white_balls, predicted_mega_ball)
        all_predictions.append(predicted_set)

    return all_predictions

def filter_matching_numbers(predictions, correct_white_balls, correct_mega_ball):
    filtered_predictions = []
    for predicted_set in predictions:
        for white_ball in predicted_set[0]:
            if white_ball in correct_white_balls:
                filtered_predictions.append(predicted_set)
                break
        if predicted_set[1] == correct_mega_ball:
            filtered_predictions.append(predicted_set)

    return filtered_predictions

def display_matching_numbers(filtered_predictions, correct_white_balls, correct_mega_ball):
    for i, prediction_set in enumerate(filtered_predictions, start=1):
        matching_white_balls = [white_ball for white_ball in prediction_set[0] if white_ball in correct_white_balls]
        matching_mega_ball = prediction_set[1] if prediction_set[1] == correct_mega_ball else None

        if matching_white_balls or matching_mega_ball:
            print(f"Set {i}: ", end="")

            if matching_white_balls:
                print(f"Matching White Balls - {matching_white_balls}", end="")

            if matching_mega_ball:
                print(f", Matching Mega Ball - {matching_mega_ball}", end="")

            print()

input_date = '2023-11-07'  # Input the date in 'YYYY-MM-DD' format
correct_white_balls = [3, 11, 33, 42, 52]  # Replace with correct white balls for the date
correct_mega_ball = 20  # Replace with correct Mega Ball for the date

predicted_sets = generate_sets_for_date(input_date, num_sets=100)
filtered_predictions = filter_matching_numbers(predicted_sets, correct_white_balls, correct_mega_ball)

# Display the filtered sets of predictions with at least one matching number
display_matching_numbers(filtered_predictions, correct_white_balls, correct_mega_ball)


19:52:34 - cmdstanpy - INFO - Chain [1] start processing
19:52:34 - cmdstanpy - INFO - Chain [1] done processing
19:52:34 - cmdstanpy - INFO - Chain [1] start processing
19:52:35 - cmdstanpy - INFO - Chain [1] done processing
19:52:35 - cmdstanpy - INFO - Chain [1] start processing
19:52:35 - cmdstanpy - INFO - Chain [1] done processing
19:52:35 - cmdstanpy - INFO - Chain [1] start processing
19:52:35 - cmdstanpy - INFO - Chain [1] done processing
19:52:35 - cmdstanpy - INFO - Chain [1] start processing
19:52:35 - cmdstanpy - INFO - Chain [1] done processing
19:52:35 - cmdstanpy - INFO - Chain [1] start processing
19:52:35 - cmdstanpy - INFO - Chain [1] done processing


Set 1: Matching White Balls - [11]
Set 2: Matching White Balls - [33]
Set 3: Matching White Balls - [11, 11]
Set 4: Matching White Balls - [11, 42]
Set 5: Matching White Balls - [42]
Set 6: Matching White Balls - [33]
Set 7: Matching White Balls - [33]
Set 8: Matching White Balls - [33, 42], Matching Mega Ball - 20
Set 9: Matching White Balls - [33, 42], Matching Mega Ball - 20
Set 10: Matching White Balls - [33]
Set 11: Matching White Balls - [52]
Set 12: Matching White Balls - [33]
Set 13: Matching White Balls - [52]
Set 14: Matching White Balls - [11]
Set 15: Matching White Balls - [3]
Set 16: Matching White Balls - [3]
Set 17: Matching White Balls - [52]
Set 18: Matching White Balls - [52]
Set 19: Matching White Balls - [33]
Set 20: Matching White Balls - [11, 52]
Set 21: Matching White Balls - [33], Matching Mega Ball - 20
Set 22: Matching White Balls - [33], Matching Mega Ball - 20
Set 23: , Matching Mega Ball - 20
Set 24: Matching White Balls - [42]
Set 25: Matching White Balls 