In [284]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, DMatrix
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from datasets import load_dataset

In [3]:
minds = load_dataset("PolyAI/minds14"
                     , name="en-AU"
                     , split="train"
                     , trust_remote_code=True)

Downloading data:   0%|          | 0.00/471M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [281]:
def get_img_1darray_from_text(text):
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 5
    color = (255, 255, 255)  # White color
    thickness = 15
    # Define text properties
    (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
    img = np.zeros((2*text_height, text_width, 3), dtype=np.uint8)

    position = (0,int(1.5*text_height))

    # Draw the text on the image
    cv2.putText(img, text, position, font, font_scale, color, thickness, cv2.LINE_AA)
    
#     # Display the image using Matplotlib
#     plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # Convert BGR to RGB
#     plt.axis('off')  # Turn off axis numbers and ticks
#     plt.show()
    
    return np.where(np.all(img == 0, axis=-1), 0, 1).flatten()
    


In [294]:
data = [get_img_1darray_from_text(minds["transcription"][0])]

# Create DMatrix
dmatrix = DMatrix(data)

In [None]:
# Load dataset
X = data.data
y = data.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Q-learning parameters
num_episodes = 100
learning_rate = 0.1
discount_factor = 0.95
exploration_prob = 1.0
exploration_decay = 0.99
min_exploration_prob = 0.01

# Define the action space (number of trees, max depth)
max_trees = 100
max_depth = 10
actions = [(n_trees, depth) for n_trees in range(1, max_trees + 1, 10) for depth in range(1, max_depth + 1)]

# Initialize Q-table
Q_table = np.zeros((len(actions),))

def train_model(n_trees, max_depth):
    """Train XGBoost model and return the RMSE as a negative reward."""
    params = {n_estimators=n_trees
              , max_depth=max_depth
              , random_state=42}
    model = MultiOutputRegressor(XGBRegressor(params))
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    return -rmse  # We want to minimize RMSE

# Q-learning loop
for episode in range(num_episodes):
    state = 0  # Start with the first action
    if np.random.rand() < exploration_prob:
        action_index = np.random.choice(len(actions))  # Explore
    else:
        action_index = np.argmax(Q_table)  # Exploit

    n_trees, max_depth = actions[action_index]
    
    # Train the model and get the reward
    reward = train_model(n_trees, max_depth)
    
    # Update Q-value
    best_next_action = np.argmax(Q_table)
    Q_table[action_index] += learning_rate * (reward + discount_factor * Q_table[best_next_action] - Q_table[action_index])
    
    # Decay exploration probability
    exploration_prob = max(min_exploration_prob, exploration_prob * exploration_decay)

    # Print progress
    if episode % 10 == 0:
        print(f"Episode {episode}: Trees={n_trees}, Depth={max_depth}, Reward={reward:.4f}")

# Find the best hyperparameters
best_action_index = np.argmax(Q_table)
best_n_trees, best_max_depth = actions[best_action_index]
print(f"Best hyperparameters: Trees={best_n_trees}, Depth={best_max_depth}")