In [3]:
from llama_cpp import Llama
import numpy as np
from matplotlib import pyplot as plt
import ctypes
import torch
from torch.nn import functional as F

In [4]:
model_bin = "/media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/vicuna-13B-1.1-GPTQ-4bit-128g-GGML/vicuna-13B-1.1-GPTQ-4bit-32g.GGML.bin"

LLM = Llama(model_path=model_bin, n_ctx=2048, n_threads=7, use_mmap=False, n_batch=512)

llama.cpp: loading model from /media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/vicuna-13B-1.1-GPTQ-4bit-128g-GGML/vicuna-13B-1.1-GPTQ-4bit-32g.GGML.bin
llama_model_load_internal: format     = ggjt v1 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 4 (mostly Q4_1, some F16)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size = 9934493.73 KB
llama_model_load_internal: mem required  = 11749.65 MB (+ 1608.00 MB per state)
....................................................................................................
llama_init_from_file: kv s

In [5]:
def get_logits_and_probs(prompt):
    tokens = LLM.tokenize(prompt.encode('utf-8'))
    LLM.reset()
    LLM.eval(tokens)
    logits = LLM._logits()
    logits = np.array(logits[0])
    probs=F.softmax(torch.from_numpy(logits)).numpy()
    return logits, probs

In [6]:
def plot_logits(logits, bins=50):
    counts, bins = np.histogram(logits, bins=bins)
    plt.stairs(counts, bins)
    plt.title('Histogram of logits')

In [7]:
def plot_probs(probs, bins=1000, bins_to_skip=2):
    counts, bins = np.histogram(probs, bins=bins)
    # Observing that the vast majority of tokens are in the first bin, i.e. have incredibly small probability.
    print('There are {} tokens outside the {} smallest bins.'.format(len(np.where(probs>bins[bins_to_skip])[0]), bins_to_skip))
    # Don't plot the smallest bins since they are not interesting and dwarf all other bins
    plt.stairs(counts[bins_to_skip:], bins[bins_to_skip:])
    plt.title('Histogram of Token Probabilities')

In [8]:
def plot_most_likely_tokens(probs, num_tokens_to_plot=10):
    ind = np.argpartition(probs, -num_tokens_to_plot)[-num_tokens_to_plot:]

    likely_tokens = []
    token_probs = []

    for token in ind:
        c_token = [(ctypes.c_int)(*[token])]
        print('Token: {}, Prob: {}'.format(LLM.detokenize(c_token), probs[token]))
        # print('Token: {} Detokenized: {}'.format(token, LLM.detokenize(c_token)))
        likely_tokens.append(LLM.detokenize(c_token))
        token_probs.append(probs[token])

    plt.bar(likely_tokens, token_probs)
    plt.title('Bar chart of 10 most likely tokens')

In [37]:
prompt = """HUMAN: You a creative scifi worldbuilding assistant. You are tasked with randomly generating a galaxy. First generate an astronomical description of the galaxy, noting its morphology and a few distinguishing characteristics. Then output the name of the galaxy. Format your answer as JSON with the following fields: ["galaxy_description", "galaxy_name"].

ASSISTANT:"""
print(prompt)

tokens = LLM.tokenize(prompt.encode('utf-8'))
count = 0
for token in LLM.generate(tokens, top_k=40, top_p=0.8, temp=1.2, repeat_penalty=1.1):
    print(LLM.detokenize([token]).decode("utf-8") , end='')
    count += 1
    if count >= 200:
        break

HUMAN: You a creative scifi worldbuilding assistant. You are tasked with randomly generating a galaxy. First generate an astronomical description of the galaxy, noting its morphology and a few distinguishing characteristics. Then output the name of the galaxy. Format your answer as JSON with the following fields: ["galaxy_description", "galaxy_name"].

ASSISTANT:
 {
"galaxy\_description": "The galaxy is a barred spiral galaxy with a prominent central bulge and two major arms. It has a diameter of approximately 100,000 light-years and contains over 400 billion stars. The galaxy is surrounded by a dense halo of dark matter and a thin disk of gas and dust. The disk is divided into four main regions: the bulge, the spiral arms, the inter-arm region, and the halo. The galaxy is home to a variety of celestial objects, including stars, planets, nebulas, black holes, and satellite galaxies.",
"galaxy\_name": "The Milky Way"
} package com.example.coolweather.app;

import android.RssDataSource;


In [25]:
prompt = """HUMAN: You a creative scifi worldbuilding assistant.
You are tasked with randomly generating a galaxy. Your output should be in a JSON format. The fields of the JSON should be: ["galaxy_name", "galaxy_morphology", "description", "notable_features"].  Fill in each of the fields creatively, using scientific and astronomical terms. Be creative. Make the galaxy unique and unusual.

ASSISTANT:{"""
print(prompt)

tokens = LLM.tokenize(prompt.encode('utf-8'))
count = 0
for token in LLM.generate(tokens, top_k=40, top_p=0.95, temp=1.2, repeat_penalty=1.1):
    print(LLM.detokenize([token]).decode("utf-8") , end='')
    count += 1
    if count >= 100:
        break

HUMAN: You a creative scifi worldbuilding assistant.
You are tasked with randomly generating a galaxy. Your output should be in a JSON format. The fields of the JSON should be: ["galaxy_name", "galaxy_morphology", "description", "notable_features"].  Fill in each of the fields creatively, using scientific and astronomical terms. Be creative. Make the galaxy unique and unusual.

ASSISTANT:{

"galaxy\_name": "Nebulae Ophiuchi",
"galaxy\_morphology": "Barred Spiral Galaxy",
"description": "Nebulae O

KeyboardInterrupt: 