In [1]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the dataset
with open('dataset/baccarat_dataset.json') as f:
    data = json.load(f)

# Convert the data into a DataFrame
df = pd.json_normalize(data)

# Display the first few rows of the DataFrame to understand its structure
df.head()

Unnamed: 0,round_id,previous_winners,current_winner,card_sequence.Banker,card_sequence.Player,scores.Banker,scores.Player
0,1,[],Player,"[4C, 7D]","[QD, 2H]",1,2
1,2,[Player],Banker,"[2C, 6C]","[QS, AC]",8,1
2,3,"[Player, Banker]",Banker,"[2S, 3D]","[QH, JH]",5,0
3,4,"[Player, Banker, Banker]",Banker,"[AC, 7H]","[8C, 9D]",8,7
4,5,"[Player, Banker, Banker, Banker]",Player,"[7D, 4C]","[AS, 4D]",1,5


In [2]:
records = []
for round in data:
    if round['previous_winners']:
        features = [1 if winner == 'Player' else 0 if winner == 'Banker' else 2 for winner in round['previous_winners']]
        features += [0] * (10 - len(features))  # Padding to ensure fixed length
        label = 1 if round['current_winner'] == 'Player' else 0 if round['current_winner'] == 'Banker' else 2
        records.append(features + [label])

# Convert to DataFrame
columns = [f'previous_winner_{i+1}' for i in range(10)] + ['current_winner']
df = pd.DataFrame(records, columns=columns)

# Split the data into features and labels
X = df.drop('current_winner', axis=1)
y = df['current_winner']

In [3]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LogisticRegression(max_iter=1000, multi_class='multinomial')
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Model Accuracy: {accuracy * 100:.2f}%')

Model Accuracy: 47.50%




In [4]:
# Example previous winners sequence
previous_winners = ['Player', 'Banker', 'Player', 'Tie', 'Banker', 'Player', 'Player', 'Player', 'Banker', 'Player']
features = [1 if winner == 'Player' else 0 if winner == 'Banker' else 2 for winner in previous_winners]
features += [0] * (10 - len(features))  # Padding to ensure fixed length


# Convert to DataFrame
new_data = pd.DataFrame([features], columns=[f'previous_winner_{i+1}' for i in range(10)])

# Predict probabilities
prediction_probabilities = model.predict_proba(new_data)[0]
winner_probabilities = {
    'Player': prediction_probabilities[1],
    'Banker': prediction_probabilities[0],
    'Tie': prediction_probabilities[2]
}

print('Predicted Winner Probabilities:')
print(f"Player: {winner_probabilities['Player'] * 100:.2f}%")
print(f"Banker: {winner_probabilities['Banker'] * 100:.2f}%")
print(f"Tie: {winner_probabilities['Tie'] * 100:.2f}%")

Predicted Winner Probabilities:
Player: 39.27%
Banker: 41.19%
Tie: 19.54%


In [5]:
#save the model
import joblib
joblib.dump(model, 'baccarat_model.pkl')



['baccarat_model.pkl']

In [6]:
#load the model
model = joblib.load('baccarat_model.pkl')
prediction_probabilities = model.predict_proba(new_data)[0]
winner_probabilities = {
    'Player': prediction_probabilities[1],
    'Banker': prediction_probabilities[0],
    'Tie': prediction_probabilities[2]
}

print('Predicted Winner Probabilities:')
print(f"Player: {winner_probabilities['Player'] * 100:.2f}%")
print(f"Banker: {winner_probabilities['Banker'] * 100:.2f}%")
print(f"Tie: {winner_probabilities['Tie'] * 100:.2f}%")


Predicted Winner Probabilities:
Player: 39.27%
Banker: 41.19%
Tie: 19.54%
