# Tiny LLama Voice in C

### Links
* [karpathy github](https://github.com/karpathy/llama2.c)
* [TinyLLamas](https://huggingface.co/karpathy/tinyllamas)
* [TinyStories](https://huggingface.co/datasets/roneneldan/TinyStories)
* [ESP32-S3](https://www.amazon.com/gp/product/B0CHYHGYRH?th=1)
* [SAM tts](https://github.com/s-macke/SAM/tree/master)
  * [jakes fork](https://github.com/jake-g/SAM-colab)


In [None]:

from IPython.display import Audio
import io
import sys
import os
import textwrap

import re


In [None]:
%ls

[0m[01;34mllama2.c[0m/  out.wav  [01;34mSAM-colab[0m/  [01;34msample_data[0m/  sing.wav  sing_word.wav  stories260K.bin  tok512.bin


In [None]:
#@title TinyLlama Clone Project and Build
%%time
llama_root = 'llama2.c'
if not os.path.exists(llama_root):
  !git clone https://github.com/karpathy/llama2.c.git
%cd {llama_root}
!make runfast
%cd ..
LLM_BIN=f'./{llama_root}/run'

/content/llama2.c
gcc -Ofast -o run run.c -lm
gcc -Ofast -o runq runq.c -lm
/content


In [None]:

#@title Speech Synth Clone and Build
%%time
# 1980s edition, slightly adjusted to work in colab without SDL
sam_root = 'SAM-colab'
if not os.path.exists(sam_root):
  !git clone https://github.com/jake-g/SAM
%cd SAM-colab
!make
%cd ..
SPEECH_BIN = f'./{sam_root}/sam'

/content/SAM-colab
make: 'sam' is up to date.
/content


In [None]:
%ls

[0m[01;34mllama2.c[0m/  out.wav  [01;34mSAM-colab[0m/  [01;34msample_data[0m/  sing.wav  sing_word.wav  stories260K.bin  tok512.bin


In [None]:
#@title Download Model

def remove_quotes_and_newlines(text):
    text = re.sub(r'[\"\'`]', '', text)  # Remove quotes and backticks
    text = re.sub(r'\n', ' ', text)  # Remove new lines
    return text

def download_file(url, save_path, overwrite=False):
  if os.path.exists(save_path) and not overwrite:
    print(f" Skipping: {save_path} already exists")
  else:
    print(f" Fetch: {url}")
    !wget -q $url -O $save_path


def get_model_files(model, overwrite=True):
  """Downloads and returns the model and tokenizer file paths (if applicable)."""
  tinyllama_url = "https://huggingface.co/karpathy/tinyllamas/resolve/main/"
  aidreams_url = "https://github.com/mc9625/esp32-llm/raw/refs/heads/main/data/"
  model_files = {
      "stories260K": {
          "model_path": tinyllama_url + "stories260K/stories260K.bin",
          "tokenizer_path": tinyllama_url + "stories260K/tok512.bin",
      },
      "stories15M": {
          "model_path": tinyllama_url + "stories15M.bin",
      },
      "stories42M": {
          "model_path": tinyllama_url + "stories42M.bin",
      },
      "stories110M": {
          "model_path": tinyllama_url + "stories110M.bin",
      },
      "aidreams260K": {
          "model_path":     aidreams_url + "aidreams260K.bin",
          "tokenizer_path": aidreams_url + "tok512.bin",

      },

  }
  print(f'Downloading {model}...')
  model_info = model_files.get(model)
  if not model_info:
    raise ValueError(f"Error: Model '{model}' not found in the configuration.")

  model_file = f"{model}.bin"
  model_url = model_info["model_path"]
  download_file(model_url, model_file , overwrite)

  tokenizer_file = ""
  if "tokenizer_path" in model_info:
    tokenizer_url = model_info["tokenizer_path"]
    tokenizer_file = os.path.basename(tokenizer_url)
    download_file(tokenizer_url, tokenizer_file, overwrite)

  return model_file, tokenizer_file



model = "stories260K"  #@param ["stories260K", "stories15M", "stories42M", "stories110M", "aidreams260K"]
model_file, tokenizer_file = get_model_files(model, overwrite=True)

print("Existing model files:")
%ls -la *.bin

Downloading stories260K...
 Skipping: stories260K.bin already exists
 Skipping: tok512.bin already exists


In [None]:
#@title Run Model, Generate Output
%%time
def run_model(model_file, tokenizer_file, prompt, max_token=256, temperature=0.8, top_p=0.9, width=80, verbose=True):
  """Runs the story generation and returns output and metadata."""
  cmd = f'{LLM_BIN} {model_file} -t {temperature} -p {top_p} -n {max_token} -i "{prompt}"'
  if tokenizer_file:
    cmd += f" -z {tokenizer_file}"
  if verbose:
    print(f"Params:\nModel: {model_file}, Max Tokens: {max_token}, Temperature: {temperature}, Top p: {top_p}, Prompt: {prompt}, Tokenizer: {tokenizer_file}\n")

    print(f'Run Command:\n{cmd}\n')
    output = !{cmd}
    wrapped_output = textwrap.fill("\n".join(output), width=width)
    print(f'Run Output:\n{wrapped_output}')
  tok_p_s = output.pop()
  tok_p_s = float(tok_p_s.split(': ')[-1])
  return "\n".join(output), tok_p_s


max_token = 96 #@param {type:"slider", min:32, max:1024, step:32}
temperature = 0.8 #@param {type:"slider", min:0.0, max:1, step:0.05}
top_p = 0.9 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
prompt = "Donald Trump is" #@param {type:"string"}

output, tps = run_model(model_file, tokenizer_file, prompt, max_token, temperature, top_p, width=80)



Params:
Model: stories260K.bin, Max Tokens: 96, Temperature: 0.8, Top p: 0.9, Prompt: Donald Trump is, Tokenizer: tok512.bin

Run Command:
./llama2.c/run stories260K.bin -t 0.8 -p 0.9 -n 96 -i "Donald Trump is" -z tok512.bin

Run Output:
Donald Trump is hard. He sees a strong string in the woods. He likes to climb
the string and pretend she was pretty. He liked to sing and jump in the string.
One day, Dad found a big stru achieved tok/s: 6785.714286


In [None]:
#@title Speak Generated Output

PRESETS = {
    "Elf": {"speed": 72, "pitch": 64, "throat": 110, "mouth": 160},
    "Little Robot": {"speed": 92, "pitch": 60, "throat": 190, "mouth": 190},
    "Stuffy Guy": {"speed": 82, "pitch": 72, "throat": 110, "mouth": 105},
    "Little Old Lady": {"speed": 82, "pitch": 32, "throat": 145, "mouth": 145},
    "Extra-Terrestrial": {"speed": 100, "pitch": 64, "throat": 150, "mouth": 200},
    "SAM": {"speed": 72, "pitch": 64, "throat": 128, "mouth": 128},
    "robo-jake": {'pitch': 212, 'speed': 126, 'throat': 116, 'mouth': 80},
}

def speak(text, pitch=64, speed=100, throat=140, mouth=140, wav_out='speak.wav', preset=None, autoplay=True):
    if preset:
      print(f'Using speaker preset params: {preset}')
      params = PRESETS.get(preset)
      speed, pitch, throat, mouth = params.values()
    else:
      params = {'pitch': pitch, 'speed': speed, 'throat': throat, 'mouth': mouth}
      print(f'Using custom speaker params: {params}')
    text = remove_quotes_and_newlines(text)
    print('Script:\n'+textwrap.fill(text))
    cmd = f'{SPEECH_BIN} -pitch {pitch} -speed {speed} -throat {throat} -mouth {mouth} -wav {wav_out} "{text}"'
    !{cmd}
    display(Audio(wav_out, autoplay=autoplay))

# Speak first ~20 words
speed=96 #@param {type:"slider", min:0, max:256, step:2}
pitch=88  #@param {type:"slider", min:0, max:256, step:2}
throat=116 #@param {type:"slider", min:0, max:256, step:2}
mouth=124 #@param {type:"slider", min:0, max:256, step:2}
speak(output, pitch, speed, throat, mouth)


Using custom speaker params: {'pitch': 88, 'speed': 96, 'throat': 116, 'mouth': 124}
Script:
Donald Trump is hard. He sees a strong string in the woods. He likes
to climb the string and pretend she was pretty. He liked to sing and
jump in the string. One day, Dad found a big stru


In [None]:
# @title Speaker Presets so far...
first_sentence = 0
num_sentences = 2
for speaker in PRESETS.keys():
  print(f'\n' + 80*'*')
  # shorten output to contain a few sentences
  sentences = '. '.join(output.split('.')[first_sentence:(first_sentence+num_sentences+1)])
  speak(sentences, preset=speaker, autoplay=False)



********************************************************************************
Using speaker preset: Elf
Script:
Donald Trump is hard.  He sees a strong string in the woods.  He likes
to climb the string and pretend she was pretty



********************************************************************************
Using speaker preset: Little Robot
Script:
Donald Trump is hard.  He sees a strong string in the woods.  He likes
to climb the string and pretend she was pretty



********************************************************************************
Using speaker preset: Stuffy Guy
Script:
Donald Trump is hard.  He sees a strong string in the woods.  He likes
to climb the string and pretend she was pretty



********************************************************************************
Using speaker preset: Little Old Lady
Script:
Donald Trump is hard.  He sees a strong string in the woods.  He likes
to climb the string and pretend she was pretty



********************************************************************************
Using speaker preset: Extra-Terrestrial
Script:
Donald Trump is hard.  He sees a strong string in the woods.  He likes
to climb the string and pretend she was pretty



********************************************************************************
Using speaker preset: SAM
Script:
Donald Trump is hard.  He sees a strong string in the woods.  He likes
to climb the string and pretend she was pretty


In [None]:
# @title yep
# can we make an llm output phonetically spelt words? like this


def sing_word(pitch, word, wav_out='sing_word.wav'):
  print(word)
  cmd = f'{SPEECH_BIN} -wav {wav_out} -sing -pitch {pitch} -phonetic {word}'
  !{cmd}
  display(Audio(wav_out, autoplay=True))
  input("(press enter) ")

def oh_say_can_you_c():
  # oh say can you C program
  sing_word(64, "ohohoh")
  sing_word(76, "ohohoh")
  sing_word(96, "sehehehehehehehehehey")
  sing_word(76, "kaeaeaeaeaeaeaeaeaen")
  sing_word(64, "yuxuxuxuxuxuxw")
  sing_word(48, "siyiyiyiyiyiyiyiyiyiyiyiyiyiyiyiyiyiy")
  sing_word(38, "baaaaay")
  sing_word(42, "dhaaaxaxaxax")
  sing_word(48, "daoaoaoaoaoaoaonz")
  sing_word(76, "erererererererer")
  sing_word(68, "liyiyiyiyiyiyiyiyiy")
  sing_word(64, "laaaaaaaaaaaaaaaaaaaaaaaaayt")
  sing_word(64, "whahahaht")
  sing_word(64, "sohohuw")
  sing_word(38, "praaaaaaaaaaaaaaaauwd")
  sing_word(42, "liyiyiy")
  sing_word(48, "wiyiyiyiyiyiyiyiyiy")
  sing_word(51, "/heheheheheheheheheheheheheheheheheheyld")


oh_say_can_you_c()

ohohoh


(press enter) 
ohohoh


(press enter) 
sehehehehehehehehehey


(press enter) 
kaeaeaeaeaeaeaeaeaen


(press enter) 
yuxuxuxuxuxuxw


(press enter) 
siyiyiyiyiyiyiyiyiyiyiyiyiyiyiyiyiyiy


(press enter) 
baaaaay


(press enter) 
dhaaaxaxaxax


(press enter) 
daoaoaoaoaoaoaonz


(press enter) 
erererererererer


(press enter) 
liyiyiyiyiyiyiyiyiy


(press enter) 
laaaaaaaaaaaaaaaaaaaaaaaaayt


(press enter) 
whahahaht


(press enter) 
sohohuw


(press enter) 
praaaaaaaaaaaaaaaauwd


(press enter) 
liyiyiy


(press enter) 
wiyiyiyiyiyiyiyiyiy


(press enter) 
/heheheheheheheheheheheheheheheheheheyld


(press enter) 


In [None]:
#@title Speech Synthesis Binary Help

!{SPEECH_BIN} -h

usage: sam [options] Word1 Word2 ....
options
	-phonetic 		enters phonetic mode. (see below)
	-pitch number		set pitch value (default=64)
	-speed number		set speed value (default=72)
	-throat number		set throat value (default=128)
	-mouth number		set mouth value (default=128)
	-wav filename		output to wav instead of libsdl
	-sing			special treatment of pitch
	-debug			print additional debug messages

     VOWELS                            VOICED CONSONANTS	
IY           f(ee)t                    R        red		
IH           p(i)n                     L        allow		
EH           beg                       W        away		
AE           Sam                       W        whale		
AA           pot                       Y        you		
AH           b(u)dget                  M        Sam		
AO           t(al)k                    N        man		
OH           cone                      NX       so(ng)		
UH           book                      B        bad		
UX           l(oo)t                    D    

# Other

In [None]:
# @title Google tts service

!pip install --upgrade gtts
from gtts import gTTS
def gspeak(text, language='en', output='speak.mp3'):
    """Dictates the given text using gTTS."""
    # try:
    tts = gTTS(text=text, lang=language)
    tts.save(output)
    display(Audio(output))



In [None]:
#@title Run Meta's Llama 2 models

#@markdown input your huggingface [access token](https://huggingface.co/settings/tokens) to download Meta's Llama 2 models.

from huggingface_hub import snapshot_download

token = "replace your huggingface access token" #@param {type:"string"}
path = snapshot_download(repo_id="meta-llama/Llama-2-7b",cache_dir="Llama-2-7b", use_auth_token=token)

!python export.py llama2_7b.bin --meta-llama $path

print("./run llama2_7b.bin\n")
!./run llama2_7b.bin

Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

LICENSE.txt:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/22.3k [00:00<?, ?B/s]

GatedRepoError: 401 Client Error. (Request ID: Root=1-672d5178-33198dcd7a7abaa86d76d41d;675476fe-0165-4070-b504-9d283478ee9d)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b/resolve/69656aac4cb47911a639f5890ff35b41ceb82e98/.gitattributes.
Access to model meta-llama/Llama-2-7b is restricted. You must have access to it and be authenticated to access it. Please log in.