In [32]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
import joblib

# Load the dataset
df = pd.read_csv('lightning_roulette_results.csv')

# Convert Unix timestamp ('when') to datetime
df['Datetime'] = pd.to_datetime(df['when'], unit='s')

# Extract time-based features from the 'Datetime' column
df['Hour'] = df['Datetime'].dt.hour
df['Minute'] = df['Datetime'].dt.minute
df['Second'] = df['Datetime'].dt.second
df['Weekday'] = df['Datetime'].dt.weekday  # 0=Monday, 6=Sunday
df['Month'] = df['Datetime'].dt.month

# Define helper function to get roulette column (1st, 2nd, or 3rd column)
def get_column_from_spin(spin):
    if (spin - 1) % 3 == 0:
        return 1  # 1st column
    elif (spin - 2) % 3 == 0:
        return 2  # 2nd column
    else:
        return 3  # 3rd column

# Apply the column extractor to create a column feature
df['Column'] = df['result'].apply(get_column_from_spin)

# Create target column: the 11th spin's column
df['Column 11th Spin'] = df['Column'].shift(-10)

# Drop rows where target label (Column 11th Spin) is NaN
df = df.dropna(subset=['Column 11th Spin'])

# Function to extract categorical features
def extract_categorical_features(number):
    features = {}
    features['even_odd'] = 1 if number % 2 == 0 else 0  # 1 = Even, 0 = Odd
    features['high_low'] = 1 if 19 <= number <= 36 else 0  # 1 = High, 0 = Low (1-18)
    features['dozen'] = (number - 1) // 12 + 1  # 1 = 1-12, 2 = 13-24, 3 = 25-36
    return features

# Function to extract previous spins features
def extract_previous_spins_features(data, sequence_length=10):
    features = {}
    if len(data) >= sequence_length:
        last_n_spins = data[-sequence_length:]
        features['rolling_mean'] = np.mean(last_n_spins)
        features['rolling_std'] = np.std(last_n_spins)
        features['rolling_min'] = np.min(last_n_spins)
        features['rolling_max'] = np.max(last_n_spins)
    return features

# Create new features based on the result column
def create_features(df, sequence_length=10):
    categorical_features = df['result'].apply(extract_categorical_features).apply(pd.Series)
    
    # Create rolling features manually
    rolling_features = []
    for i in range(sequence_length, len(df)):
        # Extract the last sequence_length spins
        spins_sequence = df['result'].iloc[i-sequence_length:i]
        features = extract_previous_spins_features(spins_sequence, sequence_length)
        rolling_features.append(features)
    
    rolling_features_df = pd.DataFrame(rolling_features)
    
    # Merge categorical and rolling features
    df_features = pd.concat([df.iloc[sequence_length:], categorical_features, rolling_features_df], axis=1)
    
    # Drop NaN values (shouldn't be any after the rolling features extraction)
    df_features = df_features.dropna()
    
    return df_features

# Apply feature engineering
df_features = create_features(df)

# Features (we will use result, Hour, Minute, Second, etc.)
X = df_features[['result', 'Hour', 'Minute', 'Second', 'Weekday', 'Month', 
                 'even_odd', 'high_low', 'dozen', 
                 'rolling_mean', 'rolling_std', 'rolling_min', 'rolling_max']]

# Target column: Column for 11th spin (1st, 2nd, or 3rd column)
y = df_features['Column 11th Spin']

# Adjust labels to zero-indexed (0, 1, 2) for use with sparse categorical crossentropy
y = y - 1  # Now it's 0-indexed (0 = 1st column, 1 = 2nd column, 2 = 3rd column)

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the deep learning model
input_layer = Input(shape=(X_train.shape[1],))
x = Dense(128, activation='relu')(input_layer)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)

# Output layer (for 11th spin's column)
output = Dense(3, activation='softmax')(x)  # Three classes: 0, 1, 2

# Define the model
deep_model = Model(inputs=input_layer, outputs=output)

# Compile the model with sparse categorical crossentropy loss
deep_model.compile(optimizer=Adam(), 
                   loss='sparse_categorical_crossentropy',
                   metrics=['accuracy'])

# Train the deep learning model
deep_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Train additional models (Random Forest, XGBoost, Logistic Regression) for the 11th column
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier(n_estimators=100, random_state=42)
lr_model = LogisticRegression(max_iter=1000)

# Fit these models on the training data
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lr_model.fit(X_train, y_train)

# Get soft voting predictions (probabilities)
dl_probs = deep_model.predict(X_test)
rf_probs = rf_model.predict_proba(X_test)
xgb_probs = xgb_model.predict_proba(X_test)
lr_probs = lr_model.predict_proba(X_test)

# Combine all models' probabilities using soft voting (average)
avg_probs = (dl_probs + rf_probs + xgb_probs + lr_probs) / 4

# For each test sample, find the top 2 most likely columns
top_2_predictions = np.argsort(avg_probs, axis=1)[:, -2:]  # Get indices of top 2 predictions for each sample

# Convert to 1-indexed labels
top_2_predictions = top_2_predictions + 1

# Calculate accuracy: Check if the true column is in the top 2 predicted columns
correct_predictions = 0
for i in range(len(y_test)):
    if (y_test.iloc[i] + 1) in top_2_predictions[i]:
        correct_predictions += 1

accuracy = correct_predictions / len(y_test)

# Print accuracy
print(f"Accuracy for top 2 predictions: {accuracy * 100:.2f}%")

# Save models
deep_model.save('deep_model.h5')  # Save the deep learning model
joblib.dump(rf_model, 'rf_model.pkl')
joblib.dump(xgb_model, 'xgb_model.pkl')
joblib.dump(lr_model, 'lr_model.pkl')
joblib.dump(scaler, 'scaler.pkl')  # Save the scaler for future use

print("Models and scaler have been saved.")


Epoch 1/100
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step - accuracy: 0.9256 - loss: 0.1789 - val_accuracy: 0.9945 - val_loss: 0.0167
Epoch 2/100
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1ms/step - accuracy: 0.9909 - loss: 0.0254 - val_accuracy: 0.9948 - val_loss: 0.0164
Epoch 3/100
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step - accuracy: 0.9928 - loss: 0.0210 - val_accuracy: 0.9948 - val_loss: 0.0160
Epoch 4/100
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1ms/step - accuracy: 0.9936 - loss: 0.0190 - val_accuracy: 0.9921 - val_loss: 0.0163
Epoch 5/100
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1ms/step - accuracy: 0.9933 - loss: 0.0196 - val_accuracy: 0.9948 - val_loss: 0.0160
Epoch 6/100
[1m12500/12500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1ms/step - accuracy: 0.9939 - loss: 0.0181 - val_accuracy: 0.9949 - val_loss:



Accuracy for top 2 predictions: 100.00%
Models and scaler have been saved.


In [33]:
X

Unnamed: 0,result,Hour,Minute,Second,Weekday,Month,even_odd,high_low,dozen,rolling_mean,rolling_std,rolling_min,rolling_max
10,19.0,16.0,1.0,9.0,6.0,1.0,0,1,2,22.3,7.457211,8.0,35.0
11,8.0,16.0,0.0,25.0,6.0,1.0,1,0,1,22.5,7.392564,8.0,35.0
12,16.0,15.0,59.0,41.0,6.0,1.0,1,0,2,24.8,5.963221,16.0,35.0
13,28.0,15.0,58.0,53.0,6.0,1.0,1,1,3,24.9,5.821512,17.0,35.0
14,17.0,15.0,58.0,5.0,6.0,1.0,0,0,2,23.3,6.856384,12.0,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
499975,16.0,15.0,18.0,31.0,6.0,1.0,1,0,2,19.9,8.607555,8.0,33.0
499976,31.0,15.0,17.0,41.0,6.0,1.0,0,1,3,18.6,9.971961,3.0,33.0
499977,21.0,15.0,16.0,57.0,6.0,1.0,0,1,2,17.7,9.187491,3.0,33.0
499978,22.0,15.0,16.0,12.0,6.0,1.0,1,1,2,18.7,10.000500,3.0,33.0


In [34]:
y

10        2.0
11        0.0
12        1.0
13        2.0
14        0.0
         ... 
499975    2.0
499976    0.0
499977    0.0
499978    2.0
499979    2.0
Name: Column 11th Spin, Length: 499970, dtype: float64