# Dataset Generation

In [None]:
import random
import csv

def generate_missing_words_player_stats():
  """Generates a random missing words player stats sample.

  Returns:
    A dictionary containing the following keys:
      * completion_time: The time it took the player to complete the game.
      * hints: The number of hints the player used.
      * mistakes: The number of mistakes the player made.
      * is_completed: A boolean value indicating whether the player completed the game.
      * previous_level: The player's previous level.
      * level: The player's updated level.
  """

  completion_time = random.uniform(0.1, 1.5)
  hints = random.randint(0, 1)
  mistakes = random.randint(0, 3)
  is_completed = random.randint(0, 1)
  previous_level = random.randint(1, 4)

  # Calculate the level.
  level = previous_level
  if completion_time <= 0.5 and ( mistakes == 0 and hints == 0) and is_completed == 1:
    level = 4
  elif completion_time <= 0.5 and (mistakes != 0 or hints != 0) and is_completed == 1:
    level = 3
  elif completion_time < 1 and is_completed == 0:
    level -= 1

  elif 0.5 < completion_time <= 1 and (mistakes == 0 and hints == 0) and is_completed == 1:
    level = 3
  elif 0.5 < completion_time <= 1 and (mistakes != 0 or hints != 0) and is_completed == 1:
    level = 2
  elif 0.5 < completion_time <= 1 and is_completed == 0:
    level -= 1

  elif 1 < completion_time <= 1.5 and (mistakes == 0 and hints == 0) and is_completed == 1:
    level = 2
  elif 1 < completion_time <= 1.5 and (mistakes != 0 or hints != 0) and is_completed == 1:
    level = 1
  elif 1 < completion_time <= 1.5 and is_completed == 0:
    level -= 1
  else :
    level = 1
    
  return {
    "completion_time": completion_time,
    "hints": hints,
    "mistakes": mistakes,
    "is_completed": is_completed,
    "previous_level": previous_level,
    "level": 1 if level<=0 else level
  }

def generate_missing_words_players_stats_dataset(num_samples):
  """Generates a dataset of missing words players stats samples.

  Args:
    num_samples: The number of samples to generate.

  Returns:
    A list of dictionaries, where each dictionary contains the missing words player stats
    for a single player.
  """

  missing_words_players_stats_dataset = []
  for i in range(num_samples):
    missing_words_players_stats_dataset.append(generate_missing_words_player_stats())
  return missing_words_players_stats_dataset

# Generate a dataset of 100k missing words players stats samples.
missing_words_players_stats_dataset = generate_missing_words_players_stats_dataset(1000000)

df = pd.DataFrame(missing_words_players_stats_dataset)

# Save the DataFrame to a CSV file
df.to_csv("C:\\Users\\faisa\\Documents\\Games_Section\\chess\\CSVs\\missing_words_players_stats.csv", index=False)


# Model Training

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


# Load your dataset
data = pd.read_csv("C:\\Users\\faisa\\Documents\\Games_Section\\chess\\CSVs\\missing_words_players_stats.csv")


# Split the data into features (X) and the target variable (y)
X = data[["completion_time","hints","mistakes","is_completed","previous_level"]]
y = data["level"]
# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Create a LabelEncoder instance
label_encoder = LabelEncoder()

# Fit and transform the labels
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

np.save("missing_words_encoded_labels.npy", label_encoder.classes_)


from keras.regularizers import l2

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(5,)),
    tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(4,activation='softmax')
])


# Implement learning rate scheduling
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=10000, decay_rate=0.9, staircase=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Implement early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Train the model
history = model.fit(
    X_train, y_train_encoded,
    epochs=100,  # Increase the number of epochs
    batch_size=64,  # Adjust batch size
    validation_data=(X_val, y_val_encoded),
    callbacks=[early_stopping],
    verbose=1  # Use verbose mode 2 for more detailed training output
)

# Save the model to a file
model.save_weights("missing_words_weights.keras")

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print(f"Test Accuracy: {test_accuracy}")



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Test Accuracy: 0.9992499947547913


# Model Usage

In [5]:
import tensorflow as tf
from keras.regularizers import l2
import numpy as np

weights_path = {
    "sudoku":"sudoku_weights.keras",
    "missing_words":"missing_words_weights.keras",
    "puzzle":"puzzle_weights.keras" }
classes_path = {
    "sudoku":"sudoku_encoded_labels.npy",
    "missing_words":"missing_words_encoded_labels.npy",
    "puzzle":"puzzle_encoded_labels.npy" }


def predict(features,game):
    """
    features[0] = completion_time 
    features[1] = hints 
    features[2] = mistakes 
    features[3] = is_completed 
    features[4] = previous_level 
    """
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(5,)),
        tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(4,activation='softmax')
    ])
    model.load_weights(weights_path[game])
    features = np.array([features])
    predicted_probs = model.predict(features)
    predicted_class = np.argmax(predicted_probs)
    class_labels = np.load(classes_path[game])
    predicted_level = class_labels[predicted_class]
    return predicted_level


print(predict([0.4 ,0,0,1,3],'missing_words'))


4
