# RWKV CodeParrot + Memory tune
This model is a custom model containing
- 24 layers
- 2048 embedding size

And follows up on the memory tuned 4 model, and applies code training

# Basic Setup

In [None]:
# First lets setup the various directories, and get the model we need
!mkdir -p ../../../../model/
!mkdir -p ../../../../datapath/
!mkdir -p ../../../../checkpoint/
!cd ../../../../model/ && wget -nc https://huggingface.co/picocreator/memory-size-experiment-for-rwkv/resolve/main/TokenShift-C-Tune4.pth
!ls -alh ../../../../model/TokenShift-C-Tune4.pth

# The various other stages, if you want to skip stuff

In [None]:
DEEPSPEED_STRAT="deepspeed_stage_1"
GPU_DEVICES="[0,1,2,3]"
ENABLE_WANDB=True
WANDB_PREFIX="CodeShift-C"

print("DEEPSPEED_STRAT:", DEEPSPEED_STRAT)
print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v4neo/"))
INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v4neo/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("INFERENCE_DIR:", INFERENCE_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

## CodeParrot training

In [None]:
# Lets preload the requried dataset
!cd "{TRAINER_DIR}" && \
    python3 preload_dataset.py "{NOTEBOOK_DIR}/CodeShift-C-Memory-Parrot.yaml"

In [None]:
# Start the foundation model training
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python new_train.py fit \
        -c "{NOTEBOOK_DIR}/CodeShift-C-Memory-Parrot.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Memory-Parrot (ctx=4096, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}" 

In [None]:
# Lets export the model from the checkpoint
!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "../checkpoint/CodeShift-C-Memory-Parrot/last.ckpt" "../model/CodeShift-C-Memory-Parrot.pth"
!cd "{TRAINER_DIR}" && ls -alh "../model/CodeShift-C-Memory-Parrot.pth"

In [None]:
# # Lets do a quick dragon prompt validation
!cd "{INFERENCE_DIR}" && python3 dragon_test.py ../model/CodeShift-C-Memory-Parrot.pth "cuda fp32"

In [None]:
# Lets do a quick memory test (let see if this behaviour is removed)
!python3 ../memory_script/eval_model_memory_guided.py "{PROJECT_DIR}/model/CodeShift-C-Memory-Parrot.pth"