<a href="https://colab.research.google.com/github/blainemartin/ml_training_prep/blob/main/MythoMax_Lora_Trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MythoMax LLM Trainer

This tool configures a text-generation-webui environment with a training dataset
prepopulated from Google Drive (from a previous Notebook run). Runs the Web UI with MythoMax model loaded to facilitate Lora creation.

Requires A100 GPU


In [None]:
#@title 1.0 Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
#@title 1.1 GPU Check { display-mode: "form" }


gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
#@title 1.2 Memory Check { display-mode: "form" }


from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
# @title 2.0 Prep File Structure

%cd /content
!git clone https://github.com/oobabooga/text-generation-webui
#!git clone https://github.com/camenduru/text-generation-webui

from google.colab import drive
import zipfile

#Mount Google Drive
drive.mount('/content/drive')

# specify the zip file name
file_name = "/content/drive/MyDrive/ST_ML_Training_Set.zip" # @param {type:"string"}

# opening the zip file in READ mode
with zipfile.ZipFile(file_name, 'r') as zip_ref:
    # extracting all the files
    print('Extracting all the files now...')
    zip_ref.extractall('/content/text-generation-webui/training/datasets')
    print('Done!')

In [None]:
#@title 3.0 Install & Run Text Generation WebUI

%cd /content
#@markdown If unsure about the branch, write "main" or leave it blank.
import torch
from pathlib import Path
if Path.cwd().name != 'text-generation-webui':
  print("Installing the webui...")
  %cd text-generation-webui

  torver = torch.__version__
  print(f"TORCH: {torver}")
  is_cuda118 = '+cu118' in torver  # 2.1.0+cu118

  textgen_requirements = open('requirements.txt').read().splitlines()
  if is_cuda118:
      textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]
  with open('temp_requirements.txt', 'w') as file:
      file.write('\n'.join(textgen_requirements))

  !pip install -r extensions/openai/requirements.txt --upgrade
  !pip install -r temp_requirements.txt --upgrade
  !pip install chromadb

  print("\033[1;32;1m\n --> If you see a warning about \"previously imported packages\", just ignore it.\033[0;37;0m")
  print("\033[1;32;1m\n --> There is no need to restart the runtime.\n\033[0;37;0m")

  try:
    import flash_attn
  except:
    !pip uninstall -y flash_attn

# Parameters
model_url = "https://huggingface.co/Gryphe/MythoMax-L2-13b" #@param {type:"string"}
branch = "main" #@param {type:"string"}
command_line_flags = "--use_double_quant --extension superboogav2 Training_PRO --gpu-memory 39" #@param {type:"string"}
api = False #@param {type:"boolean"}

if api:
  for param in ['--api', '--public-api']:
    if param not in command_line_flags:
      command_line_flags += f" {param}"

model_url = model_url.strip()
if model_url != "":
    if not model_url.startswith('http'):
        model_url = 'https://huggingface.co/' + model_url

    # Download the model
    url_parts = model_url.strip('/').strip().split('/')
    output_folder = f"{url_parts[-2]}_{url_parts[-1]}"
    branch = branch.strip('"\' ')
    if branch.strip() not in ['', 'main']:
        output_folder += f"_{branch}"
        !python download-model.py {model_url} --branch {branch}
    else:
        !python download-model.py {model_url}
else:
    output_folder = ""

# Start the web UI
cmd = f"python server.py --share"
if output_folder != "":
    cmd += f" --model {output_folder}"
cmd += f" {command_line_flags}"
print(cmd)
!$cmd

"""
%cd /content
!apt-get -y install -qq aria2
%cd /content/text-generation-webui
!pip install -q -r requirements.txt

!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/added_tokens.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o added_tokens.json
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/config.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o config.json
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/generation_config.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o generation_config.json
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/resolve/main/model.safetensors -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o model.safetensors
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/quantize_config.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o quantize_config.json
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/special_tokens_map.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o special_tokens_map.json
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/tokenizer.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o tokenizer.json
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/resolve/main/tokenizer.model -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o tokenizer.model
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/4bit/MythoMax-L2-13B-GPTQ/raw/main/tokenizer_config.json -d /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ -o tokenizer_config.json

!echo "dark_theme: true" > /content/settings.yaml
!echo "chat_style: wpp" >> /content/settings.yaml
!echo "mode: chat" >> /content/settings.yaml
!echo "preset: Midnight Enigma" >> /content/settings.yaml

%cd /content/text-generation-webui
!python server.py --share --settings /content/settings.yaml --model /content/text-generation-webui/models/MythoMax-L2-13B-GPTQ
"""