In [2]:

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Activation, BatchNormalization, GlobalAveragePooling2D, Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
# import keras_tuner as kt
import gzip
import json
import math
import numpy as np
import matplotlib.pyplot as plt

def load_puzzle_data(file_path):
    !cd buggle-training-data && git pull
    with gzip.open(file_path, 'rt') as f:
      puzzle_data = json.load(f)
      return puzzle_data

alphabet = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]
char_to_int = dict((c, i) for i, c in enumerate(alphabet))

def one_hot_encode(matrix):
    one_hot_encoded = np.zeros((len(matrix), len(matrix[0]), len(alphabet)))
    for i, row in enumerate(matrix):
        for j, char in enumerate(row):
            one_hot_encoded[i, j, char_to_int[char]] = 1
    return one_hot_encoded

def extract_features(puzzles):
    num_puzzles = len(puzzles)
    matrix_features = np.zeros((num_puzzles, *(4,4), 26))
    outcomes = np.zeros(num_puzzles)

    for idx, puzzle in enumerate(puzzles):
        matrix = puzzle['matrix']
        total_words = puzzle['totalWords']
        matrix_features[idx] = one_hot_encode(matrix)
        outcomes[idx] = total_words
    return np.array(matrix_features), np.array(outcomes)

def preprocess_data(matrix_features, outcomes):
    X_train_val, X_test, y_train_val, y_test = train_test_split(matrix_features, outcomes, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)
    return X_train, X_val, X_test, y_train, y_val, y_test

def augment_data(matrix_features, outcomes):
    augmented_matrices = []
    augmented_outcomes = []

    for idx, matrix in enumerate(matrix_features):
        matrix_90 = np.rot90(matrix)
        # matrix_180 = np.rot90(matrix, k=2)
        # matrix_270 = np.rot90(matrix, k=3)

        # matrix_flipX = np.fliplr(matrix)
        # matrix_flipY = np.flipud(matrix)

        # matrix_diagonal_tl_br = np.transpose(matrix, axes=(1, 0, 2))  # Top-left to bottom-right diagonal flip
        # matrix_diagonal_tr_bl = np.transpose(np.fliplr(matrix), axes=(1, 0, 2))  # Top-right to bottom-left diagonal flip

        transformations = [
            matrix,
            matrix_90,
            # matrix_180,
            # matrix_270,
            # matrix_flipX,
            # matrix_flipY,
            # matrix_diagonal_tl_br,
            # matrix_diagonal_tr_bl,
        ]

        unique_transformations = []
        for transform in transformations:
            if not any(np.array_equal(transform, unique) for unique in unique_transformations):
                unique_transformations.append(transform)

        augmented_matrices.extend([np.array(transform) for transform in unique_transformations])
        augmented_outcomes.extend([outcomes[idx]] * len(unique_transformations))
    return np.array(augmented_matrices), np.array(augmented_outcomes)

def build_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Conv2D(64, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        GlobalAveragePooling2D(),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.4),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mean_absolute_error', RootMeanSquaredError()])
    model.summary()
    return model

def main():
    puzzle_data = load_puzzle_data('buggle-training-data/training_data/training_data-1250000.gz')
    print(f"Got {len(puzzle_data)} puzzles.")
    matrix_features, outcomes = extract_features(puzzle_data)
    print(f"Loaded {len(matrix_features)} matrix features.")
    print(f"Loaded {len(outcomes)} outcomes.")

    # augmented_matrix_features, augmented_outcomes = augment_data(matrix_features, outcomes)
    # X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(augmented_matrix_features, augmented_outcomes)
    X_train, X_val, X_test, y_train, y_val, y_test = preprocess_data(matrix_features, outcomes)
    print(f"Preprocessed {len(X_train)} training examples, {len(X_val)} validation examples, and {len(X_test)} test examples.")
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")

    model = build_model(X_train.shape[1:])
    print("Model built.")

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0001, verbose=1)

    history = model.fit(
      X_train, y_train,
      epochs=50,
      batch_size=64,
      validation_data=(X_val, y_val),
      verbose=1,
      callbacks=[early_stopping, reduce_lr]
    )

    test_results = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Loss: {test_results[0]}, Test MAE: {test_results[1]}")

    plt.figure(figsize=(10, 6))
    plt.plot(history.history['mean_absolute_error'], label='MAE (training data)')
    plt.plot(history.history['val_mean_absolute_error'], label='MAE (validation data)')
    plt.title('MAE for Puzzle Prediction')
    plt.ylabel('MAE value')
    plt.xlabel('No. epoch')
    plt.legend(loc="upper right")
    plt.show()

    model.save('buggle-training-data/models/prediction_model-15.keras')
    print('Model saved.')

if __name__ == '__main__':
    main()

ModuleNotFoundError: No module named 'tensorflow'