### This notebook contains code for our implementation of the Deep Q Network (DQN) and Genetic Algorithm (GA) methods, as well as video clips of our best trained models.

Note: Our models take too long to train and run full games so we are just including our functions but not calling them.

We did not include our Proximal Policy Optimization (PPO) model because it didn't work but its code in /src

# Deep Q Network

### Dependencies

In [10]:
from random import random, randint, sample
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import cv2
from collections import deque

current_folder = os.getcwd()
tetris_folder = os.path.join(current_folder, 'src','dqn')
sys.path.append(tetris_folder)
from modified_tetris import Tetris

### Neural Network

In [11]:
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()

        self.conv1 = nn.Sequential(nn.Linear(4, 64), nn.ReLU(inplace=True))
        self.conv2 = nn.Sequential(nn.Linear(64, 64), nn.ReLU(inplace=True))
        self.conv3 = nn.Sequential(nn.Linear(64, 1))

        self._create_weights()

    def _create_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        return x

### Training (not called)

In [12]:

WIDTH = 10  # Width of board
HEIGHT = 20  # Height of board
BLOCK_SIZE = 30  # Block size when rendering
BATCH_SIZE = 512  # High batch size
LEARNING_RATE = 1e-3
GAMMA = 0.99
INITIAL_EPSILON = 1.0
FINAL_EPSILON = 1e-3
NUM_DECAY_EPOCHS = 1800
NUM_EPOCHS = 3000
SAVE_INTERVAL = 50
REPLAY_MEMORY_SIZE = 28000

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

import matplotlib.pyplot as plt

def train():
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)

    env = Tetris(width=WIDTH, height=HEIGHT, block_size=BLOCK_SIZE)
    model = DQN().to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.MSELoss()

    state = env.reset().to(DEVICE)
    replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
    epoch = 0

    # Data for plotting
    epoch_scores = []

    while epoch < NUM_EPOCHS:
        next_steps = env.get_next_states()
        # Epsilon Greedy
        epsilon = FINAL_EPSILON + (max(NUM_DECAY_EPOCHS - epoch, 0) * 
                                   (INITIAL_EPSILON - FINAL_EPSILON) / NUM_DECAY_EPOCHS)
        next_actions, next_states = zip(*next_steps.items())
        next_states = torch.stack(next_states).to(DEVICE)

        model.eval()
        with torch.no_grad():
            predictions = model(next_states)[:, 0]
        model.train()

        if random() <= epsilon:
            index = randint(0, len(next_steps) - 1)
        else:
            index = torch.argmax(predictions).item()

        next_state = next_states[index, :]
        action = next_actions[index]

        reward, done = env.step(action, render=False)
        next_state = next_state.to(DEVICE)
        replay_memory.append([state, reward, next_state, done])

        if done:
            final_score = env.score
            final_tetrominoes = env.tetrominoes
            final_cleared_lines = env.cleared_lines
            state = env.reset().to(DEVICE)

            # Store score for plotting
            epoch_scores.append(final_score)
        else:
            state = next_state
            continue

        if len(replay_memory) < REPLAY_MEMORY_SIZE / 10:
            continue

        epoch += 1
        batch = sample(replay_memory, min(len(replay_memory), BATCH_SIZE))
        state_batch, reward_batch, next_state_batch, done_batch = zip(*batch)
        state_batch = torch.stack(tuple(state for state in state_batch)).to(DEVICE)
        reward_batch = torch.from_numpy(np.array(reward_batch, dtype=np.float32)[:, None]).to(DEVICE)
        next_state_batch = torch.stack(tuple(state for state in next_state_batch)).to(DEVICE)

        q_values = model(state_batch)
        model.eval()
        with torch.no_grad():
            next_prediction_batch = model(next_state_batch)
        model.train()

        # Compute the target Q-values for each transition
        y_values = [
            reward if done else reward + GAMMA * prediction
            for reward, done, prediction in zip(reward_batch, done_batch, next_prediction_batch)
        ]
        y_tensor = torch.tensor(y_values, dtype=torch.float32, device=DEVICE)
        y_batch = y_tensor[:, None]

        optimizer.zero_grad()
        loss = criterion(q_values, y_batch)
        loss.backward()
        optimizer.step()

        print("Epoch: {}/{}, Action: {}, Score: {}, Tetrominoes {}, Cleared lines: {}".format(
            epoch,
            NUM_EPOCHS,
            action,
            final_score,
            final_tetrominoes,
            final_cleared_lines))

        if epoch > 0 and epoch % SAVE_INTERVAL == 0:
            torch.save(model.state_dict(), "saved_model.pth")

    torch.save(model.state_dict(), "saved_model.pth")

    # Plotting the scores
    plt.figure(figsize=(10, 6))
    plt.plot(range(len(epoch_scores)), epoch_scores, label="Score")
    plt.xlabel("Epochs")
    plt.ylabel("Score")
    plt.title("Score vs. Epochs")
    plt.legend()
    plt.grid()
    plt.show()

    return model

### Evaluation (not called)

In [13]:
FPS = 300
def evaluate_model(model, num_games=10):
    model.eval()
    env = Tetris(width=10, height=20, block_size=30)

    total_score = 0
    total_tetrominoes = 0
    total_lines_cleared = 0

    for game in range(num_games):
        _ = env.reset().to(DEVICE)
        game_score = 0
        game_tetrominoes = 0
        game_lines_cleared = 0

        while True:
            next_steps = env.get_next_states()
            next_actions, next_states = zip(*next_steps.items())
            next_states = torch.stack(next_states).to(DEVICE)

            with torch.no_grad():
                predictions = model(next_states)[:, 0]
            best_action_index = torch.argmax(predictions).item()
            action = next_actions[best_action_index]

            _, done = env.step(action,render=False)

            game_score = env.score
            game_tetrominoes = env.tetrominoes
            game_lines_cleared = env.cleared_lines

            if done:
                break

        # Accumulate totals
        total_score += game_score
        total_tetrominoes += game_tetrominoes
        total_lines_cleared += game_lines_cleared

        print(f"Game {game + 1}/{num_games} - Score: {game_score}, Tetrominoes: {game_tetrominoes}, Lines Cleared: {game_lines_cleared}")

    # Calculate averages
    avg_score = total_score / num_games
    avg_tetrominoes = total_tetrominoes / num_games
    avg_lines_cleared = total_lines_cleared / num_games

    print(f"\nEvaluation Results:")
    print(f"Average Score: {avg_score}")
    print(f"Average Tetrominoes: {avg_tetrominoes}")
    print(f"Average Lines Cleared: {avg_lines_cleared}")

    return avg_score, avg_tetrominoes, avg_lines_cleared


##### If you want to evaluate our best DQN model, uncomment the last line in the following cell. It will take more than one minute though.

In [20]:
current_folder = os.getcwd()
saved_model = os.path.join(current_folder, 'src', 'dqn', 'Trained_Models','adaptation2.pth')

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent_tetris = DQN().to(DEVICE)
agent_tetris.load_state_dict(torch.load(saved_model))
#avg_score, avg_tetrominoes, avg_lines_cleared = evaluate_model(agent_tetris)

  agent_tetris.load_state_dict(torch.load(saved_model))


Game 1/10 - Score: 419008, Tetrominoes: 83830, Lines Cleared: 33518


KeyboardInterrupt: 

In [16]:
current_folder = os.getcwd()
from IPython.display import Video

video_path = os.path.join(current_folder, 'src', 'dqn', 'dqn_model.mp4')
Video(video_path, embed=True, width=600)

# Genetic Algorithm

### Dependencies