----
## Setup

**Configure Colab File System**

Make sure you shortcut the base folder to MyDrive!
> `your_folder` -> `Organize` -> `Add shortcut`



![](https://drive.google.com/uc?export=view&id=1sxFMcaAAWTGfFYyTlmvFZmEgfNdeU0Ks)

<br></br>


----
If you want to let your colab keep running while you're gone, copy-paste this into the editor console:
> - `option` + `command` + `i`
- `copy-paste` the snippet into the *console*, then hit `enter`
- check for printouts at 60s mark, then run your job

```javascript
function ClickConnect(){
    console.log("Click #1");
    document.querySelector("colab-toolbar-button#connect").click();
}
setInterval(ClickConnect,60000);
```

<br></br>


In [8]:
# @title Colab Setup
ROOT='/content/drive/MyDrive/W210 Capstone - Lyric Generation with Melody/loaf/'
import importlib

# add root to system path
import sys
sys.path.append(ROOT)

# pytorch env vars
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

# filter out warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

'''
install_if_needed():
  - Install packages using pip if they are not already installed.
'''
def install_if_needed(package_names: list[str]):
    """

    """
    print('installing packages')
    if isinstance(package_names, str):
        package_names = [package_names]

    newly_installed = []
    for package_name in package_names:
        try:
            importlib.import_module(package_name)
            print(f"- {package_name} is already installed.")
        except ImportError:
            !pip install --quiet {package_name} --upgrade &> /dev/null
            newly_installed.append(package_name)

    print(f"- installed {', '.join(newly_installed)}")
    print(f"- imported {', '.join([x for x in package_names if x not in newly_installed])}")


'''
running_in_colab():
    - env check
'''
def running_in_colab():
    try:
        import google.colab

        return True
    except ImportError:
        return False


'''
mount_to_drive():
    - Mount notebook to colab file directory
'''
def mount_to_drive():
    if running_in_colab():
      print('\nmounting to drive')
      from google.colab import drive
      drive.mount("/content/drive", force_remount=True)


'''
unpack_elasticsearch():
    - Get elasticsearch jar file if necessary
'''
def unpack_elasticsearch():
  """
  """
  ![[ -d /content/elasticsearch-8.11.1 ]] && echo "- elasticsearch is already installed" \
                                           || wget "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.11.1-linux-x86_64.tar.gz" \
                                                && tar -xzf "elasticsearch-8.11.1-linux-x86_64.tar.gz" 1> /dev/null && echo "- elasticsearch has been installed"


'''
force_install_torch():
    - reinstall torch to avoid weird error https://stackoverflow.com/a/77199918
'''
def force_install_torch():
  %pip install torch --force-reinstall --index-url https://download.pytorch.org/whl/nightly/cu117



In [11]:
# @title Install Dependencies
%%capture
# rip
mount_to_drive()
install_if_needed([
# "faiss",
# "dill",
"accelerate",
"apache-beam",
"datasets",
# "elasticsearch",
# "faiss_gpu",
"evaluate",
"keybert"
# "nlp",
"pynvml",
"python-dotenv",
"rouge_score",
"sentencepiece",
"sentence-transformers",
"transformers",
"yake",
# "git+https://github.com/huggingface/transformers", # source install for checkpointing
# "pretty_midi",
# "pypianoroll",
])

In [10]:
# @title Load Modules
%%capture
from src.config import get_model_configs
from src.train import run_training_pipeline
from src.evaluate import generate_and_evaluate, print_eval_summary
from src.gpu import clear_gpu, print_gpu_utilization
%reload_ext autoreload
%autoreload 2

# load our .env file as a dict
from dotenv import dotenv_values
dotenv = dotenv_values(f"{ROOT}/.env")

ModuleNotFoundError: No module named 'keybert'

---
# Run Experiments

In [5]:
# @title Runner Flags
run_experiments = True
run_evaluation = False
run_eda = False

In [6]:
# @title Set Model Configs
cfg = get_model_configs(
                        user='adamjweintraut',
                        model_type='bart',
                        pretrain='facebook/bart-large', # 'adamjweintraut/bart-finetuned-lyrlen-64-lines', # len_{syllables} encodings
                        variant='8line',
                        dataset='kwsylchunk',
                        max_length=64,
                        epochs=3,
                        tensor_type='pt',
                        dotenv=dotenv,
                        resume_from_checkpoint=False
                      )


NameError: name 'get_model_configs' is not defined

In [None]:
# @title Run Training
if run_experiments:
  model, trainer, tokenizer, genconfig = run_training_pipeline(cfg)



---





---





---





---





---





---



In [None]:
# @markdown ## check current config
print(cfg)

In [None]:
# @title Generate & Evaluate
if run_evaluation:
  eval = generate_and_evaluate(cfg, n_examples=15)

In [None]:
# @title Show Eval Stats
if run_evaluation:
  print_eval_summary(eval)

<br>

<br>

<br>

<br>

<br>