## Training GPT-2 Model on Path Sequences to Perform Navigation Tasks
This script trains/fine-tunes a GPT-2 model on path sequences in artificial grid environments with randomly generated nouns representing locations on a grid.

This script has the following parts:

* Generation of path sequences in artificial environments
Optimal paths and suboptimal paths are separately generated in the training data. For optimal paths, the shortest paths from any starting and ending locations are generated, therefore the training data includes all the possible optimal paths. For suboptimal paths, only a subset is randomly selected, because the number of possible suboptimal paths greatly outnumber optimal paths as the grid size increases. 

* Train/fine-tune GPT-2 model on the generated path sequences




#### Installation / imports:

In [1]:
# Connect to Google drive
from google.colab import drive
drive.mount('/content/drive')
%cd "/content/drive/My Drive/modelling_spatial_navigation/gpt2small_dual_mode_v12_6000_4by4grids"

Mounted at /content/drive
/content/drive/My Drive/modelling_spatial_navigation/gpt2small_dual_mode_v12_6000_4by4grids


In [None]:
# python and pip version check
! which python
! which pip
! python --version

In [None]:
! pip install git+https://github.com/huggingface/transformers --upgrade
! pip install accelerate evaluate wonderwords simpletransformers --upgrade
! pip install huggingface_hub --upgrade

### Generate training and test data.

In [None]:
import random
import pandas as pd
import networkx as nx
import logging
from random import shuffle
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import random
import string
import os
import re
import glob
import torch
from wonderwords import RandomWord
import os
import gc
import pickle
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
from itertools import permutations
import logging
from random import shuffle
from matplotlib import pyplot as plt
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import math

os.environ["WANDB_DISABLED"] = "true"

Random_Word = RandomWord()

# Function to generate a random grid of nouns
def create_unique_random_grid(nouns, size):
    random_nouns = random.sample(nouns, size * size)
    return [random_nouns[i * size:(i + 1) * size] for i in range(size)]

# Function to generate all start-end combinations for a given grid
def generate_start_end_permutations(size):
    combinations = []
    for start_x in range(size):
        for start_y in range(size):
            for end_x in range(size):
                for end_y in range(size):
                    if (start_x, start_y) != (end_x, end_y):  # Exclude combinations where start == end
                        combinations.append(((start_x, start_y), (end_x, end_y)))
    return combinations

# Function to generate all possible optimal paths for given start-end combinations
def generate_path_permutations(combinations):
    paths = []
    for start, end in combinations:
        horizontal_steps = abs(end[1] - start[1])  # Difference in columns
        vertical_steps = abs(end[0] - start[0])    # Difference in rows
        steps = ['H'] * horizontal_steps + ['V'] * vertical_steps

        unique_permutations = set(permutations(steps))

        for perm in unique_permutations:
            path = [start]
            current_pos = list(start)
            for step in perm:
                if step == 'H':
                    current_pos[1] += 1 if end[1] > start[1] else -1
                else:  # 'V'
                    current_pos[0] += 1 if end[0] > start[0] else -1
                path.append(tuple(current_pos))
            # Append each path to the paths list
            paths.append({'start': start, 'end': end, 'path': path})

    return paths

# Function to generate nouns
def generate_nouns(size):
    Random_Word = RandomWord()
    nouns = set()
    while len(nouns) < size * size:
        # Fetching a random noun and replacing spaces with underscores
        word = Random_Word.word(include_parts_of_speech=["nouns"]).replace(" ", "_")
        if word:  # Ensure that 'None' isn't added to the set if no word is returned
            nouns.add(word)
    return list(nouns)

def shuffle_stimuli(stimuli):
    random.shuffle(stimuli)
    return stimuli

def get_direction(prev_coord, coord):
    if coord[0] == prev_coord[0]:
        if coord[1] > prev_coord[1]:
            return "R"
        else:
            return "L"
    else:
        if coord[0] > prev_coord[0]:
            return "D"
        else:
            return "U"

def generate_all_suboptimal_paths(size):

    def is_valid_move(start, end):
        """Check if the move from start to end is valid (adjacent cells)."""
        return abs(start[0] - end[0]) + abs(start[1] - end[1]) == 1

    def is_suboptimal(path, start, end):
        """Check if the path is suboptimal (longer than the Manhattan distance)."""
        optimal_length = abs(end[0] - start[0]) + abs(end[1] - start[1])
        return len(path) > optimal_length + 1  # Must be longer than the optimal path

    def generate_all_possible_paths(size):
        """Generate all possible valid paths for a given grid size."""
        coords = [(x, y) for x in range(size) for y in range(size)]
        all_paths = []
        for start in coords:
            for end in coords:
                if start != end:
                    queue = [[start]]
                    while queue:
                        path = queue.pop(0)
                        current_pos = path[-1]
                        if current_pos == end:
                            if is_suboptimal(path, start, end):
                                all_paths.append(path)
                        else:
                            for move in [(0, 1), (1, 0), (0, -1), (-1, 0)]:  # Right, Down, Left, Up
                                next_pos = (current_pos[0] + move[0], current_pos[1] + move[1])
                                if (0 <= next_pos[0] < size and 0 <= next_pos[1] < size and
                                    next_pos not in path and is_valid_move(current_pos, next_pos)):
                                    queue.append(path + [next_pos])
        return all_paths

    """Generate all suboptimal paths for a given grid size."""
    suboptimal_paths = []
    all_possible_paths = generate_all_possible_paths(size)
    for path in all_possible_paths:
        suboptimal_paths.append({'start': path[0], 'end': path[-1], 'path': path})
    return suboptimal_paths

def get_movement_options(coord, grid_size, prev_coord):
    options = ["U", "D", "L", "R"]
    moves = {
        "U": (coord[0] - 1, coord[1]),
        "D": (coord[0] + 1, coord[1]),
        "L": (coord[0], coord[1] - 1),
        "R": (coord[0], coord[1] + 1)
    }
    valid_moves = []
    for option in options:
        move = moves[option]
        if 0 <= move[0] < grid_size and 0 <= move[1] < grid_size and move != prev_coord:
            valid_moves.append(option)
        else:
            valid_moves.append("NA")
    return f"[{' '.join(valid_moves)}]"

def generate_shortest_path_string(grid, path_coords, probability):
    if not path_coords:
        return ""

    size = len(grid)
    start, end = path_coords[0], path_coords[-1]
    path_string = f"MODE: Shortest, START: {grid[start[0]][start[1]]}, END: {grid[end[0]][end[1]]}, PATH: "
    prev_coord = None
    first_step = True

    for i, coord in enumerate(path_coords):
        if coord == start:
            path_string += grid[start[0]][start[1]]
        else:
            direction = get_direction(prev_coord, coord)
            if first_step:
                movement_options = get_movement_options(prev_coord, size, path_coords[i - 2] if i > 1 else None)
                path_string += f" {movement_options} {direction}"
                first_step = False
            else:
                path_string += f" {direction}"

            if coord == end:
                path_string += f" {grid[coord[0]][coord[1]]}"
            else:
                if random.random() < probability:
                    path_string += f" {grid[coord[0]][coord[1]]}"
                else:
                    path_string += " FORGOT"

        prev_coord = coord

    return path_string

def generate_foraging_path_string(grid, path_coords, probability):
    if not path_coords:
        return ""

    size = len(grid)
    start, end = path_coords[0], path_coords[-1]
    path_string = f"MODE: Foraging, START: {grid[start[0]][start[1]]}, END: {grid[end[0]][end[1]]}, PATH: "
    prev_coord = None
    first_step = True

    for i, coord in enumerate(path_coords):
        if coord == start:
            path_string += grid[start[0]][start[1]]
        else:
            direction = get_direction(prev_coord, coord)
            if first_step:
                movement_options = get_movement_options(prev_coord, size, path_coords[i - 2] if i > 1 else None)
                path_string += f" {movement_options} {direction}"
                first_step = False
            else:
                path_string += f" {direction}"

            if coord == end:
                path_string += f" {grid[coord[0]][coord[1]]}"
            else:
                if random.random() < probability:
                    path_string += f" {grid[coord[0]][coord[1]]}"
                else:
                    path_string += " FORGOT"

        prev_coord = coord

    return path_string

size = 4  # Grid size
shortest_paths_iterations = 1 # How many times each optimal path is included in the training data
num_suboptimal = 744 # How many suboptimal paths to be randomly selected for a single grid environment
probability = 0.5 # The probability of forgetting intermediate locations in a given path

training_strs = []
for i in range(3000): # Number of grid environments in training data
    nouns_list = generate_nouns(size)
    grid = create_unique_random_grid(nouns_list, size)
    start_end_combinations = generate_start_end_permutations(size)

    shortest_paths = generate_path_permutations(start_end_combinations)
    suboptimal_paths = generate_all_suboptimal_paths(size)

    generated_path_strings = []

    # Add all shortest paths for a specified number of times (shortest_paths_iterations) to the list of generated path strings
    for _ in range(shortest_paths_iterations):
        for path in shortest_paths:
            generated_path_strings.append(generate_shortest_path_string(grid, path['path'], probability))

    # From all possible subtoptimal paths, randomly select a specified number and add to the list
    selected_paths = random.sample(suboptimal_paths, num_suboptimal)
    for path in selected_paths:
        generated_path_strings.append(generate_foraging_path_string(grid, path['path'], probability))

    # Shuffle the list of generated path strings
    generated_path_strings = shuffle_stimuli(generated_path_strings)

    # Append the generated path strings to the training strings list
    training_strs += generated_path_strings

    # Print the iteration number
    if i % 100 == 0:
        print(f"Iteration {i} completed.")

testing_strs = []
for i in range(30): # Number of grid environments in testing data
    nouns_list = generate_nouns(size)
    grid = create_unique_random_grid(nouns_list, size)
    start_end_combinations = generate_start_end_permutations(size)

    shortest_paths = generate_path_permutations(start_end_combinations)
    suboptimal_paths = generate_all_suboptimal_paths(size)

    generated_path_strings = []

    # Add all shortest paths for a specified number of times (shortest_paths_iterations) to the list of generated path strings
    for _ in range(shortest_paths_iterations):
        for path in shortest_paths:
            generated_path_strings.append(generate_shortest_path_string(grid, path['path'], probability))

    # From all possible subtoptimal paths, randomly select a specified number and add to the list
    selected_paths = random.sample(suboptimal_paths, num_suboptimal)
    for path in selected_paths:
        generated_path_strings.append(generate_foraging_path_string(grid, path['path'], probability))

    # Shuffle the list of generated path strings
    generated_path_strings = shuffle_stimuli(generated_path_strings)

    # Append the generated path strings to the testing strings list
    testing_strs += generated_path_strings

# Print the total number of training and testing path strings
print(f"\n{len(training_strs)} paths generated for pre-training.")
print(f"{len(testing_strs)} paths generated for testing.")

Write training and testing data into .txt files, and count number of tokens:

In [7]:
!rm -rf spatial_model
!mkdir spatial_model

text_file = open("spatial_model/train.txt", "w")
n = text_file.write('\n'.join(training_strs))
text_file.close()

text_file = open("spatial_model/test.txt", "w")
n = text_file.write('\n'.join(testing_strs))
text_file.close()


### Train/Fine-Tune GPT-2 Model

The function below runs a script to fine-tune a gpt-2 model on the arbitrary stimuli.

The name_or_path argument is which model to fine-tune from. In the pre-training stage, this will be set to 'gpt2'.

In [None]:
import random
import pandas as pd
import networkx as nx
import logging
from random import shuffle
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import random
import string
import os
import re
import glob
import torch
from wonderwords import RandomWord
import os
import gc
import pickle
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
from itertools import permutations
import logging
from random import shuffle
from matplotlib import pyplot as plt
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import math

os.environ["WANDB_DISABLED"] = "true"

# Check CUDA, MPS, and CPU availability
if torch.cuda.is_available():
    print("CUDA available. Using CUDA.")
    device = torch.device("cuda")

elif torch.backends.mps.is_available():
    print("MPS available. Using MPS.")
    device = torch.device("mps")

else:
    print("CUDA and MPS not available. Using CPU.")
    device = torch.device("cpu")

In [None]:
torch.cuda.empty_cache()
gc.collect()

In [4]:
def train_model_script(name_or_path='spatial_model',
                       num_epochs=1,
                       output_dir='./clm_script',
                       save_steps=10000,
                       lr=5e-05,
                       train_batch_size=1,
                       eval_batch_size=1,
                       block_size=1024):
    torch.cuda.empty_cache()
    gc.collect()
    ! python ./run_clm.py \
        --model_name_or_path {name_or_path} \
        --train_file {os.path.join(output_dir, 'train.txt')} \
        --validation_file {os.path.join(output_dir, 'test.txt')} \
        --per_device_train_batch_size {train_batch_size} \
        --per_device_eval_batch_size {eval_batch_size} \
        --do_train \
        --do_eval \
        --output_dir {output_dir} \
        --overwrite_output_dir \
        --num_train_epochs {num_epochs} \
        --save_strategy 'steps' \
        --save_steps {save_steps} \
        --learning_rate {lr} \
        --block_size {block_size}


In [None]:
# Train GPT-2 model for one epoch
train_model_script(name_or_path='gpt2',
                   num_epochs=1,
                   output_dir='./spatial_model',
                   save_steps=10000,
                   lr=5e-05,
                   train_batch_size=1,
                   eval_batch_size=1,
                   block_size=1024)

If resume training is needed

In [None]:
def resume_training_script(name_or_path='./spatial_model',
                           num_epochs=4,  # Number of additional epochs
                           output_dir='./clm_script',
                           lr=5e-05,
                           train_batch_size=1,
                           eval_batch_size=1,
                           block_size=1024):
    torch.cuda.empty_cache()
    gc.collect()
    ! python ./run_clm.py \
        --model_name_or_path {name_or_path} \
        --train_file {os.path.join(output_dir, 'train.txt')} \
        --validation_file {os.path.join(output_dir, 'test.txt')} \
        --per_device_train_batch_size {train_batch_size} \
        --per_device_eval_batch_size {eval_batch_size} \
        --do_train \
        --do_eval \
        --output_dir {output_dir} \
        --overwrite_output_dir \
        --num_train_epochs {num_epochs} \
        --save_strategy 'epoch' \
        --learning_rate {lr} \
        --block_size {block_size} \
        --resume_from_checkpoint {name_or_path}


In [None]:
# Resume training GPT-2 Large model for more epochs
resume_training_script(name_or_path='./spatial_model',
                       num_epochs=1,  # Train for 2 additional epochs
                       output_dir='./spatial_model_2',
                       lr=5e-05,
                       train_batch_size=1,
                       eval_batch_size=1,
                       block_size=1024)


#### Save Training Log File

In [1]:
# Path to the directory in your Google Drive where you want to save the file
file_path = 'training_output.log'

# Open the file in write mode and write the copied output
with open(file_path, 'w') as file:
    file.write("""



""")
