In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost import XGBRegressor
import streamlit as st
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

df = pd.read_csv('EuroMillions_numbers.csv',delimiter = ';')

df['Date'] = pd.to_datetime(df['Date'],format= '%Y-%m-%d')

df1 = pd.read_excel('lot_21.xlsx')

from datetime import datetime
import re
def clean_date(i):
    i = str(i)
    i = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', i)
    return datetime.strptime(i, "%A %d %B %Y").strftime("%Y-%m-%d")


df1['Date'] = df1['Date'].apply(str)
df1['Date'] = df1['Date'].apply(clean_date)

df1.rename(columns = {'Money':'Gain'},inplace = True)

df_ex = pd.read_excel('lot_21.xlsx')
df_all = df[~df.Date.isin(df_ex)]
dfx = pd.concat([df1,df_all],ignore_index = True)
dfx = dfx.drop('Gain',axis = 1)
dfx = dfx.drop('Ticket',axis = 1)
dfx = dfx.drop('Winner',axis = 1)
dfx['Date'] = pd.to_datetime(df['Date'],format= '%Y-%m-%d')
dfx = dfx.sort_values(by='Date')
dfx['dayofWeek'] = dfx['Date'].dt.dayofweek
dfx['TimeIndex'] = np.arange(len(dfx))   
for col in ['N1', 'N2', 'N3', 'N4', 'N5', 'E1', 'E2']:
    dfx[f'{col}_lag1'] = dfx[col].shift(1)
    dfx[f'{col}_lag2'] = dfx[col].shift(2)
    dfx[f'{col}_rolling3'] = dfx[col].rolling(window=3).mean()
    dfx[f'{col}_rolling5'] = dfx[col].rolling(window=5).mean()

for col in ['N1', 'N2', 'N3', 'N4', 'N5', 'E1', 'E2']:
    dfx[f'{col}_lag1'] = dfx[col].shift(1)

dfx = dfx.dropna()

X = dfx.drop(columns=['Date', 'N1', 'N2', 'N3', 'N4', 'N5', 'E1', 'E2'])
y = dfx[['N1', 'N2', 'N3', 'N4', 'N5', 'E1', 'E2']]  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

models = {}
predicted_columns = ['Predicted_N1', 'Predicted_N2', 'Predicted_N3', 'Predicted_N4', 'Predicted_N5', 'Predicted_E1', 'Predicted_E2']

for i, column in enumerate(y.columns):
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=1000, learning_rate=0.05, max_depth=4)
    model.fit(X, y[column])
    models[column] = model
    dfx[predicted_columns[i]] = model.predict(X)

# Train and predict for each column
test_predictions = {}
for i, column in enumerate(y.columns):
    model = models[column]
    test_predictions[column] = model.predict(X_test)

# Evaluate RMSE for each column
for i, column in enumerate(y.columns):
    rmse = np.sqrt(mean_squared_error(y_test[column], test_predictions[column]))
    print(f"RMSE for {column}: {rmse}")

# Add predictions to the comparison DataFrame
for i, column in enumerate(y.columns):
    dfx[predicted_columns[i]] = models[column].predict(X)

st.title("Lottery Results: Actual vs Predicted")

# Select date
date_selected = st.selectbox("Select a date to view results:", dfx["Date"].dt.date)

# Filter the data for the selected date
selected_row = dfx[dfx["Date"].dt.date == date_selected]

if not selected_row.empty:
    # Prepare data for row-based display
    display_data = pd.DataFrame({
        "Category": ["Actual", "Predicted"],
        "N1": [selected_row.iloc[0]["N1"], selected_row.iloc[0]["Predicted_N1"]],
        "N2": [selected_row.iloc[0]["N2"], selected_row.iloc[0]["Predicted_N2"]],
        "N3": [selected_row.iloc[0]["N3"], selected_row.iloc[0]["Predicted_N3"]],
        "N4": [selected_row.iloc[0]["N4"], selected_row.iloc[0]["Predicted_N4"]],
        "N5": [selected_row.iloc[0]["N5"], selected_row.iloc[0]["Predicted_N5"]],
        "E1": [selected_row.iloc[0]["E1"], selected_row.iloc[0]["Predicted_E1"]],
        "E2": [selected_row.iloc[0]["E2"], selected_row.iloc[0]["Predicted_E2"]],
    })

    # Display the table
    st.subheader(f"Results for {date_selected}:")
    st.table(display_data)
else:
    st.warning("No data available for the selected date.")