In [3]:
import json
import os
os.environ["LLAMA_CPP_LIB"] = "/home/brian/github/llama.cpp/libllama.so"
from llama_cpp import Llama, Completion, LlamaTokenizer

In [4]:
# let's translate the text of the first chapter
with open("../data/books/three_body/chapters/1.json", "r") as f:
    chapter_1_json = json.loads(f.read())


In [5]:
paragraphs = chapter_1_json["paragraphs"]

In [6]:
llm = Llama(
    model_path="/home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin",
    n_ctx=4096,
    n_gpu_layers=30
)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 5.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 3773.79 MB (+ 2048.00 MB per state)
llama_new_context_with_mo

In [5]:
def translate_paragraph(text, llm, use_example=None, method=None):
    """
    Translation function that supports Completion and ChatCompletion
    """
    model_path = "/home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin"

    # ChatCompletion
    system_prompt="Translate from Chinese to English, only reply in English 把中文翻译成英文，只用英文回答问题"
    n_ctx = 4096
    llm = Llama(model_path=model_path, n_ctx=n_ctx)

    # message components
    messages = []

    # system prompt
    system_prompt = {"role": "system", "content": system_prompt}
    messages.append(system_prompt)

    # examples to help the completion get a better sense of how to translate
    if use_example:
        messages.append({"role": "user", "content": "在那个已被忘却的日子里，它的世界颠覆了。泥土飞走，出现了一条又深又宽的峡谷，然后泥土又轰隆隆地飞回来，峡谷消失了，在原来峡谷的尽头出现了一座黑色的孤峰。其实，在这片广阔的疆域上，这种事常常发生，泥土飞走又飞回，峡谷出现又消失，然后是孤峰降临，好像是给每次灾变打上一个醒目的标记。褐蚁和几百个同族带着幸存的蚁后向太阳落下的方向走了一段路，建立了新的帝国。"})
        messages.append({"role": "assistant", "content": "In that forgotten day, its world was turned upside down. The soil flew away and a deep and wide canyon appeared, then the soil flew back and the canyon disappeared, at the end of which appeared a black peak. In fact, such things often happened in this vast domain, where the soil flew away and flew back, then there was a canyon, then a peak, as if to mark each disaster. The brown ants and hundreds of their kind with surviving queens walked towards the direction where the sun set and established a new empire."})

    messages.append({"role": "user", "content": text})

    # ChatCompletion
    if method != "completion":
        response = llm.create_chat_completion(
            messages=messages,
            max_tokens=5000,
            model=model_path
        )
        print(response["choices"][0]["message"]["content"])
        return response

    # Completion (simple completion)
    else:
        completion_prompt = f"### Chinese:\n你好\n\n### English:\nHello\n\n### Chinese:\n${text}\n\n### English:\n"
        stop = ["\n","###"]
        response = llm.create_completion(max_tokens=5000,model=model_path, prompt=completion_prompt, stop=stop)
        print(response["choices"][0]["text"])
        return response

In [6]:
tokenizer = LlamaTokenizer(llama=llm)

In [7]:
TEXT="在那个已被忘却的日子里，它的世界颠覆了。泥土飞走，出现了一条又深又宽的峡谷，然后泥土又轰隆隆地飞回来，峡谷消失了，在原来峡谷的尽头出现了一座黑色的孤峰。其实，在这片广阔的疆域上，这种事常常发生，泥土飞走又飞回，峡谷出现又消失，然后是孤峰降临，好像是给每次灾变打上一个醒目的标记。褐蚁和几百个同族带着幸存的蚁后向太阳落下的方向走了一段路，建立了新的帝国。"
tokens = tokenizer.encode(TEXT)

In [8]:
print(str(tokens[:4]) + " ...")

[1, 30505, 32380, 36812] ...


In [9]:
for token in tokens:
    text = tokenizer.decode([token])
    print(text, end=" ")

 在 那个 已被 忘 却 的日子 里 ， 它的 世界 颠覆 了 。 泥 土 飞 走 ， 出现了 一条 又 深 又 宽 的 峡谷 ， 然后 泥 土 又 轰 隆 隆 地 飞 回来 ， 峡谷 消失 了 ， 在 原来 峡谷 的 尽头 出现了 一座 黑色 的 孤 峰 。 其实 ， 在这 片 广阔 的 疆 域 上 ， 这种事 常常 发生 ， 泥 土 飞 走 又 飞 回 ， 峡谷 出现 又 消失 ， 然后 是 孤 峰 降临 ， 好像是 给 每次 灾 变 打 上 一个 醒目 的 标记 。 褐 蚁 和 几百 个 同 族 带着 幸 存 的 蚁 后 向 太阳 落 下的 方向 走了 一段 路 ， 建立了 新的 帝国 。 

In [10]:
translations = []
for paragraph in paragraphs[:2]:
    translation = translate_paragraph(paragraph, llm=llm, method="completion")
    cn_text = translation["choices"][0]["text"]
    p = {}
    p["EN"] = paragraph
    p["CN"] = cn_text
    translations.append(p)

print(translations)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The word "basement" is a plain one, which can't be very fashionable or exciting, but it conveys our emotion and confidence in constructing the futuristic fantastic skyscraper in China accurately. Therefore, we use it as the name of this original collection.



llama_print_timings:        load time =  2690.92 ms
llama_print_timings:      sample time =    28.63 ms /    65 runs   (    0.44 ms per token,  2270.35 tokens per second)
llama_print_timings: prompt eval time =  2690.61 ms /    75 tokens (   35.87 ms per token,    27.87 tokens per second)
llama_print_timings:        eval time =  5577.53 ms /    64 runs   (   87.15 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  8392.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the past ten years, science fiction has developed rapidly. Wang Jiankang, Liu Xiangxin, He Hongwei, and Han Song are just a few of the many science fiction writers who have published many works loved by readers and filled with innovative and exploratory value. The leading journal of science fiction has also gone from one traditional publication, Science Fiction World, to encompassing various levels of readers. At the same time, the market environment for science fiction has improved as major bookstores in provincial capitals have finally created a domain specifically for science fiction literature.



llama_print_timings:        load time =   737.99 ms
llama_print_timings:      sample time =    56.39 ms /   125 runs   (    0.45 ms per token,  2216.67 tokens per second)
llama_print_timings: prompt eval time =   737.95 ms /   129 tokens (    5.72 ms per token,   174.81 tokens per second)
llama_print_timings:        eval time = 10717.76 ms /   124 runs   (   86.43 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11704.82 ms


[{'EN': '“基石”是个平实的词，不够“炫”，却能够准确传达我们对构建中的中国科幻繁华巨厦的情感与信心，因此，我们用它来作为这套原创丛书的名字。', 'CN': 'The word "basement" is a plain one, which can\'t be very fashionable or exciting, but it conveys our emotion and confidence in constructing the futuristic fantastic skyscraper in China accurately. Therefore, we use it as the name of this original collection.'}, {'EN': '最近十年，是科幻创作飞速发展的十年。王晋康、刘慈欣、何宏伟、韩松等一大批科幻作家发表了大量深受读者喜爱、极具开拓与探索价值的科幻佳作。科幻文学的龙头期刊更是从一本传统的《科幻世界》，发展壮大成为涵盖各个读者层的系列刊物。与此同时，科幻文学的市场环境也有了改善，省会级城市的大型书店里终于有了属于科幻的领地。', 'CN': 'In the past ten years, science fiction has developed rapidly. Wang Jiankang, Liu Xiangxin, He Hongwei, and Han Song are just a few of the many science fiction writers who have published many works loved by readers and filled with innovative and exploratory value. The leading journal of science fiction has also gone from one traditional publication, Science Fiction World, to encompassing various levels of readers. At the same time, the market environment for science fiction has 

In [12]:
# let's write a function that can add English translations to our file for a given chapter in a book
import json

def translate_chapter(llm, book_path, chapter_number):
    # open the file
    with open(f"../data/books/{book_path}/chapters/{chapter_number}.json", "r") as f:
        chapter = json.loads(f.read())
        translated_paragraphs = []
        for paragraph in chapter["paragraphs"]:
            translated_paragraph = translate_paragraph(paragraph, llm=llm, method="completion")
            translated_paragraphs.append(translated_paragraph["choices"][0]["text"])

        chapter["translated_paragraphs"] = translated_paragraphs

    with open(f"../data/books/{book_path}/chapters/{chapter_number}.json", "w") as f:
        json.dump(chapter, f, ensure_ascii=False)

    print(f"translated {len(translated_paragraphs)}.")

In [48]:
translate_chapter(llm, "three_body", 1)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The word "basement" is a plain one, which can't be very fashionable or exciting, but it conveys our emotion and confidence in constructing the futuristic fantastic skyscraper in China accurately. Therefore, we use it as the name of this original collection.



llama_print_timings:        load time =   657.11 ms
llama_print_timings:      sample time =    30.00 ms /    65 runs   (    0.46 ms per token,  2166.31 tokens per second)
llama_print_timings: prompt eval time =   657.08 ms /    75 tokens (    8.76 ms per token,   114.14 tokens per second)
llama_print_timings:        eval time =  5626.07 ms /    64 runs   (   87.91 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  6419.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the past ten years, science fiction has developed rapidly. Wang Jiankang, Liu Xiangxin, He Hongwei, and Han Song are just a few of the many science fiction writers who have published many works loved by readers and filled with innovative and exploratory value. The leading journal of science fiction has also gone from one traditional publication, Science Fiction World, to encompassing various levels of readers. At the same time, the market environment for science fiction has improved as major bookstores in provincial capitals have finally created a domain specifically for science fiction literature.



llama_print_timings:        load time =   724.70 ms
llama_print_timings:      sample time =    55.76 ms /   125 runs   (    0.45 ms per token,  2241.67 tokens per second)
llama_print_timings: prompt eval time =   724.66 ms /   129 tokens (    5.62 ms per token,   178.02 tokens per second)
llama_print_timings:        eval time = 10869.87 ms /   124 runs   (   87.66 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 11854.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Even though people still ask about the gap between Chinese and American science fiction, but now the answer is quite different than ten years ago. In many works (which are no longer those silly stories with few techniques and limitations of color, imagination), this comparison has become like steak for someone's potato dish. The gap is obvious -- but more accurately it should be called "difference" -- but can no longer be ranked. There's a sense of taste to it, which is the sign of maturity in Chinese science fiction.



llama_print_timings:        load time =   712.19 ms
llama_print_timings:      sample time =    50.75 ms /   114 runs   (    0.45 ms per token,  2246.39 tokens per second)
llama_print_timings: prompt eval time =   712.15 ms /   123 tokens (    5.79 ms per token,   172.72 tokens per second)
llama_print_timings:        eval time =  9639.38 ms /   113 runs   (   85.30 ms per token,    11.72 tokens per second)
llama_print_timings:       total time = 10583.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The gap between Chinese and American science fiction is actually the gap in marketization. U.S. science fiction has formed a complete industrial chain from magazine to book, to movie and games, as well as toys. Our publishing industry, on the other hand, still remains in this kind of state: reader's reading demand cannot be met while publishers complain about the low volume of sales of science fiction books. The result is that we have very few science fiction writers who make money from their works, but many who write for love. This is not a situation that responsible publishers would like to see.



llama_print_timings:        load time =   737.86 ms
llama_print_timings:      sample time =    56.78 ms /   125 runs   (    0.45 ms per token,  2201.36 tokens per second)
llama_print_timings: prompt eval time =   737.83 ms /   133 tokens (    5.55 ms per token,   180.26 tokens per second)
llama_print_timings:        eval time = 10715.34 ms /   124 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11714.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the most influential professional science fiction publishing institution in China, Science Fiction World has been dedicated to promoting science fiction comprehensively. Publishing science fiction books is one of our priorities. For China's science fiction industry to take a long-term outlook, a pragmatic spirit and more market-oriented means are required. So we keep an eye on the future while taking actions in specific "blocks".



llama_print_timings:        load time =   674.61 ms
llama_print_timings:      sample time =    41.50 ms /    90 runs   (    0.46 ms per token,  2168.78 tokens per second)
llama_print_timings: prompt eval time =   674.56 ms /    99 tokens (    6.81 ms per token,   146.76 tokens per second)
llama_print_timings:        eval time =  7465.91 ms /    89 runs   (   83.89 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  8331.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's important to note, because there are no limits on the type of stone you can use for a foundation. Because building a building requires various kinds of stone.



llama_print_timings:        load time =   604.99 ms
llama_print_timings:      sample time =    15.60 ms /    35 runs   (    0.45 ms per token,  2244.17 tokens per second)
llama_print_timings: prompt eval time =   604.95 ms /    56 tokens (   10.80 ms per token,    92.57 tokens per second)
llama_print_timings:        eval time =  2781.95 ms /    34 runs   (   81.82 ms per token,    12.22 tokens per second)
llama_print_timings:       total time =  3458.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We are full of expectation for such a building.
translated 7.



llama_print_timings:        load time =   595.52 ms
llama_print_timings:      sample time =     4.90 ms /    11 runs   (    0.45 ms per token,  2244.44 tokens per second)
llama_print_timings: prompt eval time =   595.48 ms /    50 tokens (   11.91 ms per token,    83.97 tokens per second)
llama_print_timings:        eval time =   835.16 ms /    10 runs   (   83.52 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  1452.44 ms


In [13]:
translate_chapter(llm, "three_body", 2)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

终于,《三体》可以和科幻朋友们见面了.我们都没有想到,连载的形式出现.之前,就题材问题与编辑们仔细商讨过.看来没有什么大的问题,但没想到今年是文化大革命三十周年纪念,单行本一时无法出版，只能这样。



llama_print_timings:        load time =   721.45 ms
llama_print_timings:      sample time =    41.77 ms /    60 runs   (    0.70 ms per token,  1436.37 tokens per second)
llama_print_timings: prompt eval time =   721.41 ms /    90 tokens (    8.02 ms per token,   124.76 tokens per second)
llama_print_timings:        eval time =  5558.33 ms /    59 runs   (   94.21 ms per token,    10.61 tokens per second)
llama_print_timings:       total time =  6454.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The book is not actually about the Cultural Revolution, but it has a lingering spiritual ghost from the period that only makes up a fraction of its content.



llama_print_timings:        load time =   613.51 ms
llama_print_timings:      sample time =    14.50 ms /    33 runs   (    0.44 ms per token,  2276.18 tokens per second)
llama_print_timings: prompt eval time =   613.47 ms /    59 tokens (   10.40 ms per token,    96.17 tokens per second)
llama_print_timings:        eval time =  2744.67 ms /    32 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3419.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Although this book is not a sequel to "Ball Lightning", it can be considered the continuation of the world in which the events of that story took place. The physicist in the story appears, but no longer has much significance. Others disappear forever; Lin Yun died, although sometimes I wonder if she could have lived and become the protagonist in the end?



llama_print_timings:        load time =   679.55 ms
llama_print_timings:      sample time =    36.61 ms /    81 runs   (    0.45 ms per token,  2212.75 tokens per second)
llama_print_timings: prompt eval time =   679.51 ms /    94 tokens (    7.23 ms per token,   138.34 tokens per second)
llama_print_timings:        eval time =  6830.67 ms /    80 runs   (   85.38 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  7671.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is the first volume of a series called "The Farthest Stars" and can be thought of as the beginning of a longer story.



llama_print_timings:        load time =   607.63 ms
llama_print_timings:      sample time =    13.35 ms /    30 runs   (    0.44 ms per token,  2247.70 tokens per second)
llama_print_timings: prompt eval time =   607.59 ms /    50 tokens (   12.15 ms per token,    82.29 tokens per second)
llama_print_timings:        eval time =  2515.36 ms /    29 runs   (   86.74 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  3181.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is a story about betrayal, but also a story about survival and death. Sometimes, it may be more of an issue to choose between loyalty and betrayal than life or death itself.



llama_print_timings:        load time =   630.68 ms
llama_print_timings:      sample time =    19.85 ms /    44 runs   (    0.45 ms per token,  2216.18 tokens per second)
llama_print_timings: prompt eval time =   630.65 ms /    55 tokens (   11.47 ms per token,    87.21 tokens per second)
llama_print_timings:        eval time =  3687.27 ms /    43 runs   (   85.75 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4406.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Insanity and paranoia will eventually exert what kind of power within human civilization? The cold, starry sky will how question the morality in people's hearts?



llama_print_timings:        load time =   637.66 ms
llama_print_timings:      sample time =    17.14 ms /    39 runs   (    0.44 ms per token,  2275.38 tokens per second)
llama_print_timings: prompt eval time =   637.61 ms /    57 tokens (   11.19 ms per token,    89.40 tokens per second)
llama_print_timings:        eval time =  3230.57 ms /    38 runs   (   85.01 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  3943.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The author attempts to re-tell the legend of a civilization that has been destroyed and rebirthed 200 times on the scale of light years.



llama_print_timings:        load time =   609.74 ms
llama_print_timings:      sample time =    15.62 ms /    35 runs   (    0.45 ms per token,  2240.57 tokens per second)
llama_print_timings: prompt eval time =   609.72 ms /    57 tokens (   10.70 ms per token,    93.49 tokens per second)
llama_print_timings:        eval time =  2924.50 ms /    34 runs   (   86.01 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3602.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

My friends will see that this first installment is not really a science fiction novel, but this book is not showing the same as it does in this period. It is less science-fiction and more ethereal than Ball Lightning, I hope you can be patient enough to keep going, because the following stories will change greatly.



llama_print_timings:        load time =   655.42 ms
llama_print_timings:      sample time =    31.25 ms /    69 runs   (    0.45 ms per token,  2207.79 tokens per second)
llama_print_timings: prompt eval time =   655.38 ms /    81 tokens (    8.09 ms per token,   123.59 tokens per second)
llama_print_timings:        eval time =  5941.80 ms /    68 runs   (   87.38 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  6736.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the coming months, readers will travel through my mental journey of the past year, frankly speaking, I don't know what you will see on this dark and mysterious path. But being able to walk together with sci-fi lovers for so long, I am also grateful.



llama_print_timings:        load time =   659.11 ms
llama_print_timings:      sample time =    27.61 ms /    61 runs   (    0.45 ms per token,  2209.02 tokens per second)
llama_print_timings: prompt eval time =   659.05 ms /    86 tokens (    7.66 ms per token,   130.49 tokens per second)
llama_print_timings:        eval time =  5232.48 ms /    60 runs   (   87.21 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  6012.74 ms


translated 9.


In [14]:
translate_chapter(llm, "three_body", 3)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

China, 1967.



llama_print_timings:        load time =   611.88 ms
llama_print_timings:      sample time =     4.40 ms /    10 runs   (    0.44 ms per token,  2273.24 tokens per second)
llama_print_timings: prompt eval time =   611.85 ms /    37 tokens (   16.54 ms per token,    60.47 tokens per second)
llama_print_timings:        eval time =   775.21 ms /     9 runs   (   86.13 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  1408.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The attacks on the headquarters of “Four-Twenty-Eighth Army” by the “Red Unification” lasted for two days. Their flags fluttered around the building like the seeds seeking the fire. The commander of the “Red Unification” was anxious and uneasy, fearing not their defenders, who were only about two hundred and fifteen soldiers of “Four-Twenty-Eighth Army”, but those more than ten large dynamite mines inside the building. He did not see them, but could feel their presence like a magnet, and he knew that if those switches were turned on, they would cause great damage with all their might. However, those young red guards were fierce and crazy, just like wolves gathered around fire ashes. They were very different from the first-generation Red Guards who had matured in the rain and wind.



llama_print_timings:        load time =   926.57 ms
llama_print_timings:      sample time =    88.25 ms /   187 runs   (    0.47 ms per token,  2118.96 tokens per second)
llama_print_timings: prompt eval time =   926.52 ms /   231 tokens (    4.01 ms per token,   249.32 tokens per second)
llama_print_timings:        eval time = 16692.00 ms /   186 runs   (   89.74 ms per token,    11.14 tokens per second)
llama_print_timings:       total time = 18006.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The top of the building appeared a tiny figure, that beautiful girl waving a "Four·Twenty-Eight" flag. Her appearance immediately attracted a chaotic sound of shooting guns, the weapons were varied from ancient US carbines to Czech machine guns and Thrifty 80s, as well as new models of standard rifles and assault guns—the latter was stolen from the military after "The Eighth Issue Editorial Opinion" published by "Red Flag" magazine in August 1967. Along with those slug shotgun and big swords, they constituted a concentrate history of modern times… The people playing this game on the top of the building had done it many times before, and the girl who came out this time believed that she was lucky as well. She was waving her flag, sometimes shouting slogans through her喇叭筒 or throwing fliers down to the street. They always could escape bullets without any losses. Today, the girl with flaming youth believes a magnificent ideal world would be born in her blood… Until then, she fell from the


llama_print_timings:        load time =  1319.01 ms
llama_print_timings:      sample time =   126.96 ms /   272 runs   (    0.47 ms per token,  2142.42 tokens per second)
llama_print_timings: prompt eval time =  1318.95 ms /   397 tokens (    3.32 ms per token,   301.00 tokens per second)
llama_print_timings:        eval time = 24804.82 ms /   271 runs   (   91.53 ms per token,    10.93 tokens per second)
llama_print_timings:       total time = 26692.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The soldiers of the Red Union cheered and several people ran down to the ground, hoisting up the tiny body of a four.28 flag bearer. They flung it toward the gate of the main building, where the only remaining pieces of iron bar were hung like swords from the door. The young body hung there for a while before it was shot off with guns fired by red guards practicing shooting on her body. The bullets rained down like soft rain on her fragile body, but her beautiful eyes still looked at the blue sky of nineteen sixty-seven, without any pain in them. She waved her sleeves as if to wipe away the rain droplets that had landed on her. Finally, one half of her head was shot off, leaving behind only her beautiful eyes that still gazed at the blue sky of nineteen sixty-seven with passion and longing.



llama_print_timings:        load time =   939.39 ms
llama_print_timings:      sample time =    88.03 ms /   196 runs   (    0.45 ms per token,  2226.46 tokens per second)
llama_print_timings: prompt eval time =   939.35 ms /   234 tokens (    4.01 ms per token,   249.11 tokens per second)
llama_print_timings:        eval time = 17057.32 ms /   195 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 18388.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She was still luckier than many others, at least she died for her ideals in the passionate struggle.



llama_print_timings:        load time =   612.15 ms
llama_print_timings:      sample time =    10.84 ms /    24 runs   (    0.45 ms per token,  2213.41 tokens per second)
llama_print_timings: prompt eval time =   612.12 ms /    54 tokens (   11.34 ms per token,    88.22 tokens per second)
llama_print_timings:        eval time =  1923.55 ms /    23 runs   (   83.63 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  2583.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The hotspots are like thousands of parallel CPUs, connecting the Cultural Revolution to one another and flooding the city with an invisible tsunami that permeates into every tiny corner and crevice.



llama_print_timings:        load time =   643.93 ms
llama_print_timings:      sample time =    19.01 ms /    43 runs   (    0.44 ms per token,  2261.49 tokens per second)
llama_print_timings: prompt eval time =   643.89 ms /    76 tokens (    8.47 ms per token,   118.03 tokens per second)
llama_print_timings:        eval time =  3539.50 ms /    42 runs   (   84.27 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  4267.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the sprawling campus of a famous university on the outskirts of the city, a huge gathering of thousands of people had been participating in a purging rally that lasted nearly two hours. In this period of contradictions and rivalries, every place had its complex opposition factions struggling with each other, but the target of the purging rally today was an academic authority without any objections from anyone; they could only simultaneously suffer from the harsh attacks from all sides.



llama_print_timings:        load time =   806.84 ms
llama_print_timings:      sample time =    46.87 ms /   105 runs   (    0.45 ms per token,  2240.33 tokens per second)
llama_print_timings: prompt eval time =   806.80 ms /   166 tokens (    4.86 ms per token,   205.75 tokens per second)
llama_print_timings:        eval time =  8970.51 ms /   104 runs   (   86.25 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  9983.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The reactionaries have their own characteristics compared to the other demons and ghosts: when the first attack comes, they often display a high and stubborn arrogance; and in 40 days at the capital, more than 1700 victims were killed, and many opted for an even faster way of escaping madness. Including Lao She, Wu Yan, Gui Bo Zan, Fu Lei, Qu Jia, Yi Huan, Hai Muer among others, they ended their lives with the life that once impressed people.



llama_print_timings:        load time =   762.97 ms
llama_print_timings:      sample time =    54.93 ms /   121 runs   (    0.45 ms per token,  2202.68 tokens per second)
llama_print_timings: prompt eval time =   762.92 ms /   141 tokens (    5.41 ms per token,   184.82 tokens per second)
llama_print_timings:        eval time = 10381.34 ms /   120 runs   (   86.51 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 11390.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The survivors of this stage gradually numb themselves with the continuous hardships and suffering, forming a protective psychological shell that prevents them from collapsing under constant torment. During interrogations they often slip into a half-sleep state and can only be woken up by an ominous threat; then, a significant number of people enter the third stage in which long-lasting critique reinforces sharp political images like molten metal, completely destroys their well-established ideological edifices, and they truly believe themselves guilty while shedding tears for the damage they have caused on the great cause. However, this stage is also the least exciting to the Red Guards because only those in the first stage can excite their overexcited nerves like a red cloth held in the hand of a bullfighter.



llama_print_timings:        load time =   996.46 ms
llama_print_timings:      sample time =    87.11 ms /   181 runs   (    0.48 ms per token,  2077.86 tokens per second)
llama_print_timings: prompt eval time =   996.42 ms /   258 tokens (    3.86 ms per token,   258.93 tokens per second)
llama_print_timings:        eval time = 16047.29 ms /   180 runs   (   89.15 ms per token,    11.22 tokens per second)
llama_print_timings:       total time = 17439.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   995.64 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   995.62 ms /    29 tokens (   34.33 ms per token,    29.13 tokens per second)
llama_print_timings:        eval time =    82.79 ms /     1 runs   (   82.79 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =  1082.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lei Zhetai had lived through the Cultural Revolution and continued to be in the first stage. He did not confess, he didn't commit suicide, nor was he numb. When this physicist professor was on the stage of criticism, his face expressed a crystal-clear message: let me bear even heavier burdens! The Red Guards made him bear heavy things but they weren't crosses. Other critics wore high straw hats that were held up by bamboo frames, while he wore a steel hat as thick as a finger and had a black sign of his name on it with an arrow pointing to the corner.



llama_print_timings:        load time =   838.65 ms
llama_print_timings:      sample time =    62.58 ms /   140 runs   (    0.45 ms per token,  2237.10 tokens per second)
llama_print_timings: prompt eval time =   838.61 ms /   182 tokens (    4.61 ms per token,   217.03 tokens per second)
llama_print_timings:        eval time = 12258.15 ms /   139 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 13381.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the Red Guards escorting Yelutai to the stage were leading out, there were six of them - two male and four female. They both walked with confident stride and had a mature appearance like their teacher in the Department of Physics' Theoretical Physics at Beijing University. Those four female students, who are in Grade 2 of High School, looked even younger than their seniority as they all stood there with an armed belt around their waist, surrounded by a flame of green fire. Yelutai's appearance excited the lower level crowd, which had been feeling a little weak and listless just before. The chanting voice rang out again and again like a new tide.



llama_print_timings:        load time =   801.63 ms
llama_print_timings:      sample time =    69.18 ms /   152 runs   (    0.46 ms per token,  2197.26 tokens per second)
llama_print_timings: prompt eval time =   801.57 ms /   168 tokens (    4.77 ms per token,   209.59 tokens per second)
llama_print_timings:        eval time = 13247.61 ms /   151 runs   (   87.73 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 14365.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the chanting stopped, one of the male Red Guards turned to the target of his criticism, “Lei Zhetai, you are a master in various forms of mechanics. You should see how great this unstoppable force is and how stubborn it is. There will be no more nonsense from me today. Let me ask you these questions now: Did you add unnecessary relativistic content to the foundational courses at the Siwei-Siwo period?”



llama_print_timings:        load time =   702.98 ms
llama_print_timings:      sample time =    44.73 ms /   100 runs   (    0.45 ms per token,  2235.49 tokens per second)
llama_print_timings: prompt eval time =   702.95 ms /   121 tokens (    5.81 ms per token,   172.13 tokens per second)
llama_print_timings:        eval time =  8436.79 ms /    99 runs   (   85.22 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  9339.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei said, "Relativity has become a classical theory of physics. It is hard to understand it without learning about it."



llama_print_timings:        load time =   601.37 ms
llama_print_timings:      sample time =    12.46 ms /    27 runs   (    0.46 ms per token,  2166.24 tokens per second)
llama_print_timings: prompt eval time =   601.33 ms /    53 tokens (   11.35 ms per token,    88.14 tokens per second)
llama_print_timings:        eval time =  2234.62 ms /    26 runs   (   85.95 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2893.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The woman red guard next to you yelled, "You're nonsense! Albert Einstein is a reactionary academic authority. He has milk on his mind and goes to make atom bombs for imperialism ! If we want to build up the revolutionary science, we have to fight against relativity theory that represents the bourgeoisie black flag!"



llama_print_timings:        load time =   692.71 ms
llama_print_timings:      sample time =    34.28 ms /    76 runs   (    0.45 ms per token,  2217.29 tokens per second)
llama_print_timings: prompt eval time =   692.66 ms /    95 tokens (    7.29 ms per token,   137.15 tokens per second)
llama_print_timings:        eval time =  6507.19 ms /    75 runs   (   86.76 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  7353.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaf Chetai was silent, enduring the pain of his high-back hat and iron plate on his chest. He would not respond to unnecessary questions, which was what was going on in his mind as he stared at the girl who spoke boldly in front of teachers and students alike. Behind him, one of his students also slightly frowned. The girl speaking had obviously come prepared with her criticism and it would be too simple just by speaking some slogans to defeat Leaf Chetai. They decided to use the new weapon they had prepared today, which led another student to wave his hand in front of the class.



llama_print_timings:        load time =   764.39 ms
llama_print_timings:      sample time =    61.53 ms /   135 runs   (    0.46 ms per token,  2194.16 tokens per second)
llama_print_timings: prompt eval time =   764.35 ms /   142 tokens (    5.38 ms per token,   185.78 tokens per second)
llama_print_timings:        eval time = 11698.85 ms /   134 runs   (   87.30 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 12742.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Le Shaolin, the wife of Professor Ye Zhetai and a physicist in the same clan as him, stood up from her seat in the front row and walked to the stage. She was wearing an unflattering grass green outfit that seemed like she was trying to be fashionable like Mao's Red Guards, but people familiar with Le Shaolin saw it as a disturbing change from her former elaborate qipao attire when giving lectures.



llama_print_timings:        load time =   689.44 ms
llama_print_timings:      sample time =    54.98 ms /   103 runs   (    0.53 ms per token,  1873.48 tokens per second)
llama_print_timings: prompt eval time =   689.41 ms /    92 tokens (    7.49 ms per token,   133.45 tokens per second)
llama_print_timings:        eval time =  9156.19 ms /   102 runs   (   89.77 ms per token,    11.14 tokens per second)
llama_print_timings:       total time = 10086.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Xiaolin, ” Yi Ze Te says, his voice strong and determined. “I never thought you would stand up against me like this.”



llama_print_timings:        load time =   908.08 ms
llama_print_timings:      sample time =    14.17 ms /    32 runs   (    0.44 ms per token,  2258.45 tokens per second)
llama_print_timings: prompt eval time =   908.03 ms /   219 tokens (    4.15 ms per token,   241.18 tokens per second)
llama_print_timings:        eval time =  2726.07 ms /    31 runs   (   87.94 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  3697.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he listened to his wife's speech, Ye Chetai smiled bitterly. Lin, have I deceived you? In fact, you were always a mystery to me. Once, when I complimented your extraordinary talent--the old man escaped the disaster early and was lucky to praise your father for this--he shook his head and said that my daughter wouldn't be able to achieve anything in academia; then he added another important word: Linlin is so smart, but she won't make it without a basic understanding of theory.



llama_print_timings:        load time =   746.47 ms
llama_print_timings:      sample time =    52.39 ms /   117 runs   (    0.45 ms per token,  2233.38 tokens per second)
llama_print_timings: prompt eval time =   746.42 ms /   134 tokens (    5.57 ms per token,   179.52 tokens per second)
llama_print_timings:        eval time = 10041.47 ms /   116 runs   (   86.56 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 11026.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Years later, I realized the profound meaning of these words. Lin, you are really intelligent. You smelt out the political trend in the field of knowledge even earlier than years ago and took some forward-thinking steps such as changing the names of most laws and constants in physics teaching to avoid using “the same old jargon”. For example, Ohm's Law is changed to Resistance Law, and Maxwell's Equation to Electromagnetic Equation. You explained to students that all scientific achievements are the reflections of the wisdom of the masses, but despite this, you were not accepted by “the revolutionary mainstream”. Look at where you are now; you have no right to hold a red sash and wear it on your sleeve as a scholar as well as having no right to hold a copy of a selected collection of Mao Zedong's speeches.



llama_print_timings:        load time =   895.84 ms
llama_print_timings:      sample time =    93.26 ms /   191 runs   (    0.49 ms per token,  2048.04 tokens per second)
llama_print_timings: prompt eval time =   895.82 ms /   219 tokens (    4.09 ms per token,   244.47 tokens per second)
llama_print_timings:        eval time = 17174.03 ms /   190 runs   (   90.39 ms per token,    11.06 tokens per second)
llama_print_timings:       total time = 18496.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When I talk about Einstein, you have more to say. In December 1922, Einstein visited Shanghai and your father was assigned as one of the hosts for his visit. You told me many times that my father learned science from Einstein directly, and you chose physics as a specialty due to the influence of your father, so Einste



llama_print_timings:        load time =   727.06 ms
llama_print_timings:      sample time =    33.46 ms /    75 runs   (    0.45 ms per token,  2241.42 tokens per second)
llama_print_timings: prompt eval time =   727.03 ms /   119 tokens (    6.11 ms per token,   163.68 tokens per second)
llama_print_timings:        eval time =  6454.67 ms /    74 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  7329.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I later learned that your father had told you a kind lie, he only had one short interaction with Einstein. That was on November l3th, 1922 morning. Your father accompanied him for a walk in Nanjing Road and it seems like there were also other people with him, such as Shanghai University president Yu Ri Zhang, manager Cao Ganyi of the DaGongBao newspaper. They passed by an ongoing pit construction site and Einstein stopped at a young stone-cutting worker who stood with his hands in front of his face and was shrinking from the cold wind. Einstein asked your father how much money he made a day, and after listening to your father's answer, Einstein stood still for a while and watched the boy doing his work without taking a puff on his pipe. According to your father's account, Einstein remained silent after learning about it, looking at the young stone-cutting worker who stood numbly in the cold wind and ignored his pipe even though it was already put out. He said this with a deep sigh: In 


llama_print_timings:        load time =   990.50 ms
llama_print_timings:      sample time =   118.62 ms /   258 runs   (    0.46 ms per token,  2175.05 tokens per second)
llama_print_timings: prompt eval time =   990.45 ms /   257 tokens (    3.85 ms per token,   259.48 tokens per second)
llama_print_timings:        eval time = 22844.91 ms /   257 runs   (   88.89 ms per token,    11.25 tokens per second)
llama_print_timings:       total time = 24380.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Bow your head!” a male Red Guards shouted. It might be an ounce of pity for his former student that the teacher still refused to bend over, and it is reasonable to lower one's head when accused by a Red Guard. But Ye Chetai had to maintain his posture with his thin neck and heavy steel cap as if he could hold them up on his own. In fact, however, his shoulders were already trembling.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Bow your head! You rebel! ” Next to him, a female Red Guardsman removes her belt and swings it towards Ye Chetai, the metal buckle of which strikes his head with precision. But soon the blackish purplish shape becomes blurred by blood, then disappears entirely. He shakes himself and stands still again.



llama_print_timings:        load time =   676.84 ms
llama_print_timings:      sample time =    36.29 ms /    80 runs   (    0.45 ms per token,  2204.40 tokens per second)
llama_print_timings: prompt eval time =   676.79 ms /   104 tokens (    6.51 ms per token,   153.67 tokens per second)
llama_print_timings:        eval time =  6812.48 ms /    79 runs   (   86.23 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  7651.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The male Red Guards interrogated Ye Chetai, "In the teaching of quantum mechanics, you have spread many reactionary views as well!" After that he nodded to Sholun for her to go on.



llama_print_timings:        load time =   651.00 ms
llama_print_timings:      sample time =    22.05 ms /    48 runs   (    0.46 ms per token,  2177.27 tokens per second)
llama_print_timings: prompt eval time =   650.95 ms /    71 tokens (    9.17 ms per token,   109.07 tokens per second)
llama_print_timings:        eval time =  4032.16 ms /    47 runs   (   85.79 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4783.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shaolin was eager to continue. She had to keep talking, or her crumbling spirit would collapse entirely. “Ye Chetai, you cannot deny this! You have propagandized reactionary views to students multiple times!”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

"This is the most widely accepted explanation for these results, " said Ye Zezaitai, speaking calmly after such a blow.



llama_print_timings:        load time =   645.78 ms
llama_print_timings:      sample time =    13.20 ms /    29 runs   (    0.46 ms per token,  2196.30 tokens per second)
llama_print_timings: prompt eval time =   645.75 ms /    73 tokens (    8.85 ms per token,   113.05 tokens per second)
llama_print_timings:        eval time =  2373.04 ms /    28 runs   (   84.75 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  3076.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This interpretation argues that collapse is caused by external observation, which is another form of agnosticism and a very extreme one!



llama_print_timings:        load time =   626.98 ms
llama_print_timings:      sample time =    12.27 ms /    28 runs   (    0.44 ms per token,  2281.62 tokens per second)
llama_print_timings: prompt eval time =   626.94 ms /    66 tokens (    9.50 ms per token,   105.27 tokens per second)
llama_print_timings:        eval time =  2315.43 ms /    27 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  2996.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Jietai asked, "Is philosophy guiding experiments or are experiments guiding philosophy?" This sudden counterattack made the critics of his ideas stunned.



llama_print_timings:        load time =   620.56 ms
llama_print_timings:      sample time =    16.44 ms /    36 runs   (    0.46 ms per token,  2189.78 tokens per second)
llama_print_timings: prompt eval time =   620.52 ms /    60 tokens (   10.34 ms per token,    96.69 tokens per second)
llama_print_timings:        eval time =  3084.98 ms /    35 runs   (   88.14 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  3779.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, it is correct Marxist philosophy that guides scientific experimentation!



llama_print_timings:        load time =   592.32 ms
llama_print_timings:      sample time =     7.48 ms /    17 runs   (    0.44 ms per token,  2273.94 tokens per second)
llama_print_timings: prompt eval time =   592.28 ms /    46 tokens (   12.88 ms per token,    77.67 tokens per second)
llama_print_timings:        eval time =  1363.81 ms /    16 runs   (   85.24 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1989.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This equals to say that the correct philosophy comes from the sky, which contradicts Marxist principles of understanding nature.



llama_print_timings:        load time =   623.90 ms
llama_print_timings:      sample time =    11.26 ms /    24 runs   (    0.47 ms per token,  2132.39 tokens per second)
llama_print_timings: prompt eval time =   623.86 ms /    61 tokens (   10.23 ms per token,    97.78 tokens per second)
llama_print_timings:        eval time =  1995.03 ms /    23 runs   (   86.74 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2669.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sholing and the two university red guards were speechless, unlike college and middle school red guards who are unable to reason. But the four female junior commanders of Affiliated Middle School had their own "impossible to defeat" revolutionary style. Just now that girl knocked Le Ye Ze Tai a belt strap, while the other three girls also抡 up their belt straps and revolted as well. When they participated in a revolutionary action, they must at least be as revolutionary as each other. The two male red guards did not interfere, if now they intervened, they would also have been suspected of being unrevolutionary.



llama_print_timings:        load time =   760.94 ms
llama_print_timings:      sample time =    64.16 ms /   143 runs   (    0.45 ms per token,  2228.70 tokens per second)
llama_print_timings: prompt eval time =   760.90 ms /   144 tokens (    5.28 ms per token,   189.25 tokens per second)
llama_print_timings:        eval time = 12344.26 ms /   142 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 13393.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You are still spreading the Big Bang Theory, which is one of the most reactionary theories among all scientific theories! " A male Red Guards attempted to change the topic.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Maybe in the future this theory will be overthrown, but two cosmological discoveries made in the 20th century - Hubble redshift and the cosmic microwave background radiation - have made the big bang theory so far the most credible theoretical model for the origin of the universe.



llama_print_timings:        load time =   640.79 ms
llama_print_timings:      sample time =    28.05 ms /    64 runs   (    0.44 ms per token,  2281.97 tokens per second)
llama_print_timings: prompt eval time =   640.76 ms /    73 tokens (    8.78 ms per token,   113.93 tokens per second)
llama_print_timings:        eval time =  5348.67 ms /    63 runs   (   84.90 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6114.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Huh!?绍琳 shouted and then began to talk about the big bang, naturally forgetting to analyze its reactionary nature. But the theory of super new is so attractive that even the cleverest girl asked, "Even time started from that singularity?"


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Nothing, ” said Ye Zhetai, speaking like a little girl. He looked kindly at her and struggled to keep his hat on and not move because of the crippling injury to his leg.



llama_print_timings:        load time =   631.41 ms
llama_print_timings:      sample time =    21.16 ms /    44 runs   (    0.48 ms per token,  2079.69 tokens per second)
llama_print_timings: prompt eval time =   631.37 ms /    72 tokens (    8.77 ms per token,   114.04 tokens per second)
llama_print_timings:        eval time =  3757.97 ms /    43 runs   (   87.39 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  4483.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What... nothing?! Anti-Communist scum! Anti-Communist to the highest degree!” The girl was horrified and cried out for help, which she got promptly.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

This leaves a place for God.



llama_print_timings:        load time =   598.49 ms
llama_print_timings:      sample time =     3.82 ms /     8 runs   (    0.48 ms per token,  2095.34 tokens per second)
llama_print_timings: prompt eval time =   598.45 ms /    49 tokens (   12.21 ms per token,    81.88 tokens per second)
llama_print_timings:        eval time =   600.57 ms /     7 runs   (   85.80 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  1216.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving aside the question of whether or not there is a God, I find that it is very interesting to see how different cultures and people perceive faith and morality differently. The discussion on whether religion is beneficial or harmful to society also varies across different societies. Some argue that religious values and beliefs are beneficial because they promote moral behavior, while others maintain that religious convictions are destructive because of the conflicts they create within societies.



llama_print_timings:        load time =   649.18 ms
llama_print_timings:      sample time =    44.07 ms /    97 runs   (    0.45 ms per token,  2201.04 tokens per second)
llama_print_timings: prompt eval time =   649.14 ms /    68 tokens (    9.55 ms per token,   104.75 tokens per second)
llama_print_timings:        eval time =  8269.81 ms /    96 runs   (   86.14 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  9115.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know.



llama_print_timings:        load time =   586.31 ms
llama_print_timings:      sample time =     3.37 ms /     7 runs   (    0.48 ms per token,  2079.00 tokens per second)
llama_print_timings: prompt eval time =   586.27 ms /    33 tokens (   17.77 ms per token,    56.29 tokens per second)
llama_print_timings:        eval time =   521.16 ms /     6 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1122.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You said what!



llama_print_timings:        load time =   598.11 ms
llama_print_timings:      sample time =     2.23 ms /     5 runs   (    0.45 ms per token,  2242.15 tokens per second)
llama_print_timings: prompt eval time =   598.06 ms /    34 tokens (   17.59 ms per token,    56.85 tokens per second)
llama_print_timings:        eval time =   333.99 ms /     4 runs   (   83.50 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =   941.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't know, if God is the ultimate consciousness outside of our universe, I don't know whether it exists or not. And then from a positive viewpoint, there's no solid evidence to support its existence according to science either.” 但实际上，在那个噩梦般的时刻，叶哲泰已经倾向于不相信上帝的存在了。



llama_print_timings:        load time =   651.28 ms
llama_print_timings:      sample time =    33.33 ms /    74 runs   (    0.45 ms per token,  2220.42 tokens per second)
llama_print_timings: prompt eval time =   651.24 ms /    79 tokens (    8.24 ms per token,   121.31 tokens per second)
llama_print_timings:        eval time =  6217.51 ms /    73 runs   (   85.17 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  7018.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This outrageous remark caused a commotion in the entire hall and, under the leadership of one of the Red Guards on stage, there were more rounds of chants.



llama_print_timings:        load time =   624.87 ms
llama_print_timings:      sample time =    18.18 ms /    38 runs   (    0.48 ms per token,  2090.32 tokens per second)
llama_print_timings: prompt eval time =   624.84 ms /    61 tokens (   10.24 ms per token,    97.62 tokens per second)
llama_print_timings:        eval time =  3246.67 ms /    37 runs   (   87.75 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  3954.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Knock down the reactionary academic authority, Ye Chetai!"



llama_print_timings:        load time =   593.05 ms
llama_print_timings:      sample time =     7.88 ms /    17 runs   (    0.46 ms per token,  2157.36 tokens per second)
llama_print_timings: prompt eval time =   593.01 ms /    42 tokens (   14.12 ms per token,    70.83 tokens per second)
llama_print_timings:        eval time =  1384.09 ms /    16 runs   (   86.51 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  2013.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fight all reactionary academic authorities!



llama_print_timings:        load time =   590.55 ms
llama_print_timings:      sample time =     3.97 ms /     9 runs   (    0.44 ms per token,  2268.15 tokens per second)
llama_print_timings: prompt eval time =   590.50 ms /    40 tokens (   14.76 ms per token,    67.74 tokens per second)
llama_print_timings:        eval time =   679.32 ms /     8 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1286.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fight all reactionary theories!



llama_print_timings:        load time =   590.21 ms
llama_print_timings:      sample time =     3.55 ms /     8 runs   (    0.44 ms per token,  2251.62 tokens per second)
llama_print_timings: prompt eval time =   590.19 ms /    40 tokens (   14.75 ms per token,    67.78 tokens per second)
llama_print_timings:        eval time =   601.02 ms /     7 runs   (   85.86 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1206.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

……



llama_print_timings:        load time =  1064.04 ms
llama_print_timings:      sample time =     1.43 ms /     3 runs   (    0.48 ms per token,  2094.97 tokens per second)
llama_print_timings: prompt eval time =  1063.99 ms /    31 tokens (   34.32 ms per token,    29.14 tokens per second)
llama_print_timings:        eval time =   170.57 ms /     2 runs   (   85.28 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1241.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The girl yelled after the slogan "God does not exist, all religions are mental tools for pacifying people by the ruling class!" was silenced.



llama_print_timings:        load time =   639.12 ms
llama_print_timings:      sample time =    17.04 ms /    37 runs   (    0.46 ms per token,  2170.85 tokens per second)
llama_print_timings: prompt eval time =   639.08 ms /    65 tokens (    9.83 ms per token,   101.71 tokens per second)
llama_print_timings:        eval time =  3090.83 ms /    36 runs   (   85.86 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3805.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This view is one-sided,” said Ye Chetai calmly.



llama_print_timings:        load time =   593.16 ms
llama_print_timings:      sample time =     7.95 ms /    18 runs   (    0.44 ms per token,  2263.30 tokens per second)
llama_print_timings: prompt eval time =   593.12 ms /    44 tokens (   13.48 ms per token,    74.18 tokens per second)
llama_print_timings:        eval time =  1438.12 ms /    17 runs   (   84.60 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  2066.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the Red Guards got angry, they immediately made a judgment on this dangerous enemy. All language was useless in front of him. She seized the strap and her three comrades followed her, Yeqe Tai was tall, so these 14-year-old girls could only swing their tapes upwards to hit him on his head, but after a few strikes, he had dropped off his thick cap that offered some protection. Then they started to beat him with the tape and his body - he finally fell down in which encouraged them even more and they continued fighting for their beliefs, their ideals, and their glorious historical mission as well as being proud of themselves…



llama_print_timings:        load time =   865.29 ms
llama_print_timings:      sample time =    68.27 ms /   145 runs   (    0.47 ms per token,  2123.95 tokens per second)
llama_print_timings: prompt eval time =   865.25 ms /   200 tokens (    4.33 ms per token,   231.15 tokens per second)
llama_print_timings:        eval time = 12585.37 ms /   144 runs   (   87.40 ms per token,    11.44 tokens per second)
llama_print_timings:       total time = 13756.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The two students finally made up their minds and shouted these words, then ran forward together to separate the four girls who had gone insane.



llama_print_timings:        load time =   641.20 ms
llama_print_timings:      sample time =    13.48 ms /    30 runs   (    0.45 ms per token,  2226.18 tokens per second)
llama_print_timings: prompt eval time =   641.15 ms /    74 tokens (    8.66 ms per token,   115.42 tokens per second)
llama_print_timings:        eval time =  2498.05 ms /    29 runs   (   86.14 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3199.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But it was too late. The physicist lay still on the ground, his eyelids half-opened as a red snake slowly slithered down from his head and slowly trailed on to a nearby empty box, leaving behind a rhythmic "ta ta ta" sound as it dripped onto its destination with each step.



llama_print_timings:        load time =   725.99 ms
llama_print_timings:      sample time =    32.49 ms /    73 runs   (    0.45 ms per token,  2247.05 tokens per second)
llama_print_timings: prompt eval time =   725.96 ms /   124 tokens (    5.85 ms per token,   170.81 tokens per second)
llama_print_timings:        eval time =  6210.24 ms /    72 runs   (   86.25 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  7082.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A sudden laughter broke the silence, and this sound was so terrifying that everyone fled in panic. Soon the hall emptied out, leaving just a young girl still standing on the stage.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

She is the daughter of Yi Chetai, named Ye Wenjie.



llama_print_timings:        load time =   586.22 ms
llama_print_timings:      sample time =     8.43 ms /    19 runs   (    0.44 ms per token,  2253.86 tokens per second)
llama_print_timings: prompt eval time =   586.18 ms /    38 tokens (   15.43 ms per token,    64.83 tokens per second)
llama_print_timings:        eval time =  1569.47 ms /    18 runs   (   87.19 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2193.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =  1022.01 ms
llama_print_timings:      sample time =    94.84 ms /   159 runs   (    0.60 ms per token,  1676.53 tokens per second)
llama_print_timings: prompt eval time =  1021.97 ms /   270 tokens (    3.79 ms per token,   264.20 tokens per second)
llama_print_timings:        eval time = 14851.36 ms /   158 runs   (   94.00 ms per token,    10.64 tokens per second)
llama_print_timings:       total time = 16306.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$As she left the empty, desolate playground, walking down the path to her house, she heard a series of giggles from her own window. They were coming from that woman who had once called herself her mother. She turned away without saying a word, letting her feet lead her somewhere else.



llama_print_timings:        load time =   695.13 ms
llama_print_timings:      sample time =    29.00 ms /    64 runs   (    0.45 ms per token,  2207.05 tokens per second)
llama_print_timings: prompt eval time =   695.09 ms /   101 tokens (    6.88 ms per token,   145.30 tokens per second)
llama_print_timings:        eval time =  5575.49 ms /    63 runs   (   88.50 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  6401.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She finally found herself in front of the door to her friend Neng's home. During their four years at university, Neng had been her teacher and closest friend, who she spent two years studying as a graduate student in astrophysics. After reading about space exploration, to the present day when the school closed down, Neng had always been her most close person, other than her father. Neng had studied abroad at Cambridge University, his home filled with beautiful European books, paintings and records, a piano; there were also many exquisite pipe tobacco tins on their delicate wooden shelves, each one containing the roots of the Mediterranean sea bamboo or Turkey sea bubble stone, in which Neng's father gave her a pipe that has been soaking up his wisdom. These smoking pipes were like the presence of Neng, who had never mentioned anything about him before, but this small, warm and elegant world was a haven for her to escape the storm in the real world. But that happened before Neng's home 


llama_print_timings:        load time =  1027.13 ms
llama_print_timings:      sample time =   132.17 ms /   292 runs   (    0.45 ms per token,  2209.23 tokens per second)
llama_print_timings: prompt eval time =  1027.10 ms /   277 tokens (    3.71 ms per token,   269.69 tokens per second)
llama_print_timings:        eval time = 25996.31 ms /   291 runs   (   89.33 ms per token,    11.19 tokens per second)
llama_print_timings:       total time = 27620.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

叶文洁 opened the door to Neng Wan's house and found that after being ransacked, the messy room had become tidied up. Those damaged paintings that had been tore down were put back on the wall with clean pictured frames, the knocked-down piano also stood erect in its original spot, though it was unable to be played because it had been broken… Neng Wan stood by her writing desk. She touched her forehead, face and hands, all of which were cold. In fact, Ye Wenjie noticed an empty sleeping pill bottle on the writing desk. She stood still for a while, then turned around and left without saying anything, feeling no sadness anymore. She felt like a counter was set to zero when exposed to excessive radiation… However, before she opened the door, she looked back at Neng Wan with a last glance. It was found that Neng teacher had put on lipstick and worn high heels.



llama_print_timings:        load time =  1012.53 ms
llama_print_timings:      sample time =    98.43 ms /   214 runs   (    0.46 ms per token,  2174.13 tokens per second)
llama_print_timings: prompt eval time =  1012.48 ms /   264 tokens (    3.84 ms per token,   260.75 tokens per second)
llama_print_timings:        eval time = 18866.08 ms /   213 runs   (   88.57 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 20326.80 ms


translated 58.


In [15]:
translate_chapter(llm, "three_body", 4)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

In two years, Great Xing'an Mountain.



llama_print_timings:        load time =   594.99 ms
llama_print_timings:      sample time =     5.73 ms /    12 runs   (    0.48 ms per token,  2092.78 tokens per second)
llama_print_timings: prompt eval time =   594.96 ms /    37 tokens (   16.08 ms per token,    62.19 tokens per second)
llama_print_timings:        eval time =   940.77 ms /    11 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1562.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Sunshine and rain, day and night. "



llama_print_timings:        load time =   589.29 ms
llama_print_timings:      sample time =     6.89 ms /    14 runs   (    0.49 ms per token,  2031.05 tokens per second)
llama_print_timings: prompt eval time =   589.27 ms /    37 tokens (   15.93 ms per token,    62.79 tokens per second)
llama_print_timings:        eval time =  1086.38 ms /    13 runs   (   83.57 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  1705.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

With the loud song, a tall pine tree like the Batang temple was knocked down. Ye Wenjie felt the earth trembled slightly. She took her axes and short saws to remove branches from the giant tree. When she was working on this huge tree, she often thought that this giant is her father. Two years ago on a sad night in the morgue, when she was preparing her father's corpse, he felt the sense of these expansive tree bark is just like her father's wounds on his body.



llama_print_timings:        load time =   783.21 ms
llama_print_timings:      sample time =    83.86 ms /   118 runs   (    0.71 ms per token,  1407.02 tokens per second)
llama_print_timings: prompt eval time =   783.17 ms /   157 tokens (    4.99 ms per token,   200.47 tokens per second)
llama_print_timings:        eval time = 10991.10 ms /   117 runs   (   93.94 ms per token,    10.64 tokens per second)
llama_print_timings:       total time = 12144.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The 41,000 men of the 6 divisions of Inner Mongolia Production Construction Corps are scattered across this vast expanse of forests and grasslands. When they arrived in the world, many of them were filled with a romantic expectation: if the Soviet Union began to march its tank formations across the border into China, they would immediately arm themselves and create a first barrier against imperialism on behalf of the nation. This was actually one of their strategic considerations when forming the corps. But, in fact, what they yearned for seemed just like distant mountains running wild in the sky; it was clearly visible, but could not be reached. So, instead of burning fires, these young people soon discovered that with this vast world, the biggest cities were nothing more than sheepfolds. In the vast expanse of forests and grasslands between, burning their youth would not bring any benefits like a stack of cow dung does. But it was their fate to burn; they are the generation that burn


llama_print_timings:        load time =  1045.00 ms
llama_print_timings:      sample time =   117.60 ms /   265 runs   (    0.44 ms per token,  2253.46 tokens per second)
llama_print_timings: prompt eval time =  1044.95 ms /   289 tokens (    3.62 ms per token,   276.57 tokens per second)
llama_print_timings:        eval time = 23609.57 ms /   264 runs   (   89.43 ms per token,    11.18 tokens per second)
llama_print_timings:       total time = 25175.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

叶文洁 saw the felling only in a manner of crazy. High and straight, the Korean pine trees, the Cedar tree with its lifelong greenness, the towering Bai Hua trees, the Yanshan trees soaring up to the clouds, the Sibieria cold-hardy poplar trees, as well as black birches, zetmaki willows, sea willow and mongolian oaks. Where they passed, there is only a pile of tree trunks.



llama_print_timings:        load time =   757.27 ms
llama_print_timings:      sample time =    49.66 ms /   112 runs   (    0.44 ms per token,  2255.56 tokens per second)
llama_print_timings: prompt eval time =   757.24 ms /   129 tokens (    5.87 ms per token,   170.36 tokens per second)
llama_print_timings:        eval time =  9509.28 ms /   111 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time = 10484.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The well-trimmed larch was being dragged away by a crawler tractor. At the other end of the trunk, Ye Wenjie gently caressed the newly sawed cut surface. She often subconsciously did that, thinking it was a huge wound that she could feel the pain of the tree. Suddenly, she noticed that on the nearby tree stump, there was also a hand in touching it, which conveyed trembling from its soul. The hands were white and belonged to a male. Ye Wenjie looked up and saw that the man touching the tree stump is Bao Moxin, a thin young man wearing glasses who is a reporter for the People's Liberation Army newspaper, only arriving in the regiment yesterday to interview. Ye Wenjie read his articles and found them very good, with an aspirational and sensitive style that did not fit the boorish environment of this vast farm operation. She could never forget it.



llama_print_timings:        load time =   892.13 ms
llama_print_timings:      sample time =    96.41 ms /   215 runs   (    0.45 ms per token,  2229.97 tokens per second)
llama_print_timings: prompt eval time =   892.09 ms /   215 tokens (    4.15 ms per token,   241.01 tokens per second)
llama_print_timings:        eval time = 18770.67 ms /   214 runs   (   87.71 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 20097.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Masteel, come here. White Mullin called a young man nearby and he came over. White journalists asked him, "How old is this tree?"



llama_print_timings:        load time =   675.14 ms
llama_print_timings:      sample time =    15.53 ms /    35 runs   (    0.44 ms per token,  2253.99 tokens per second)
llama_print_timings: prompt eval time =   675.11 ms /    83 tokens (    8.13 ms per token,   122.94 tokens per second)
llama_print_timings:        eval time =  2932.81 ms /    34 runs   (   86.26 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3676.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   593.30 ms
llama_print_timings:      sample time =     0.92 ms /     2 runs   (    0.46 ms per token,  2164.50 tokens per second)
llama_print_timings: prompt eval time =   593.26 ms /    46 tokens (   12.90 ms per token,    77.54 tokens per second)
llama_print_timings:        eval time =    85.56 ms /     1 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =   682.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I counted, it's three hundred and thirty-plus years old. How long did you use the saw to cut it down?



llama_print_timings:        load time =   580.44 ms
llama_print_timings:      sample time =    12.27 ms /    28 runs   (    0.44 ms per token,  2281.99 tokens per second)
llama_print_timings: prompt eval time =   580.40 ms /    47 tokens (   12.35 ms per token,    80.98 tokens per second)
llama_print_timings:        eval time =  2350.35 ms /    27 runs   (   87.05 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  2984.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It won’t be more than ten minutes, I’ll tell you when I find out where I am going to be assigned, and the red flag will follow me wherever I go,” said Ma. He looked excited and pleased that he was getting media attention. It was a great honor to be featured in the communications report of the “Big Production Daily.”



llama_print_timings:        load time =   672.71 ms
llama_print_timings:      sample time =    42.18 ms /    75 runs   (    0.56 ms per token,  1778.30 tokens per second)
llama_print_timings: prompt eval time =   672.66 ms /    89 tokens (    7.56 ms per token,   132.31 tokens per second)
llama_print_timings:        eval time =  6454.87 ms /    74 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  7311.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

For three hundred and thirty years, ten generations of people have come and gone. When it was still a seed in the Ming Dynasty, it experienced so many storms and saw so many things in those long years. Yet you can cut it down in minutes with ease; does that make you feel anything?



llama_print_timings:        load time =   626.95 ms
llama_print_timings:      sample time =    29.89 ms /    66 runs   (    0.45 ms per token,  2207.95 tokens per second)
llama_print_timings: prompt eval time =   626.91 ms /    77 tokens (    8.14 ms per token,   122.82 tokens per second)
llama_print_timings:        eval time =  5680.76 ms /    65 runs   (   87.40 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  6439.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You want me to feel what?” Ma Gang stared for a moment. “Not just one tree. We have more than enough of those here, with older pines in abundance.”



llama_print_timings:        load time =   636.85 ms
llama_print_timings:      sample time =    18.53 ms /    40 runs   (    0.46 ms per token,  2158.43 tokens per second)
llama_print_timings: prompt eval time =   636.80 ms /    66 tokens (    9.65 ms per token,   103.64 tokens per second)
llama_print_timings:        eval time =  3247.01 ms /    39 runs   (   83.26 ms per token,    12.01 tokens per second)
llama_print_timings:       total time =  3965.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Go ahead and do your own thing.” White Mow Lin shook his head and sighed softly sitting on a tree stump.



llama_print_timings:        load time =   596.76 ms
llama_print_timings:      sample time =    14.16 ms /    29 runs   (    0.49 ms per token,  2048.02 tokens per second)
llama_print_timings: prompt eval time =   596.72 ms /    54 tokens (   11.05 ms per token,    90.50 tokens per second)
llama_print_timings:        eval time =  2446.08 ms /    28 runs   (   87.36 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3107.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He also shook his head, disappointed that the reporter did not report on his interest. "Intellectuals are always so annoying," he said, glancing at Lei Wenjie nearby who it seemed he had included in his remark as well.



llama_print_timings:        load time =   642.95 ms
llama_print_timings:      sample time =    25.38 ms /    56 runs   (    0.45 ms per token,  2206.46 tokens per second)
llama_print_timings: prompt eval time =   642.91 ms /    75 tokens (    8.57 ms per token,   116.66 tokens per second)
llama_print_timings:        eval time =  4824.33 ms /    55 runs   (   87.72 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  5579.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The huge tree had been dragged away, and the stone and root had scratched the bark of it. The thick layer of leaf litter on the ground made a deep crack and soon there was water seeping out from it. The stagnant water, dark red in color like blood, filled the depression created by the fallen tree.



llama_print_timings:        load time =   687.58 ms
llama_print_timings:      sample time =    37.12 ms /    72 runs   (    0.52 ms per token,  1939.55 tokens per second)
llama_print_timings: prompt eval time =   687.55 ms /    98 tokens (    7.02 ms per token,   142.54 tokens per second)
llama_print_timings:        eval time =  6388.36 ms /    71 runs   (   89.98 ms per token,    11.11 tokens per second)
llama_print_timings:       total time =  7237.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Leaf, come here and rest a bit,” said Bai Mu Lin pointed to the empty side of the tree trunk. Leaf really did feel tired and walked over to sit down next to the reporter on his back.



llama_print_timings:        load time =   640.53 ms
llama_print_timings:      sample time =    22.05 ms /    49 runs   (    0.45 ms per token,  2222.12 tokens per second)
llama_print_timings: prompt eval time =   640.49 ms /    76 tokens (    8.43 ms per token,   118.66 tokens per second)
llama_print_timings:        eval time =  4260.38 ms /    48 runs   (   88.76 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  4996.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

For a while, Bai Moxin remained silent. Then he said, "I can see you have the same feeling as me. There are only we two in this feeling."


llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 4299.79 MB (+ 2048.00 MB per state)
llama_model_load_internal: offloading 0 repeating layers to GPU
llama_mo

Yan Jie remained silent, and Bai Mowen expected her not to respond. Yan Jie was usually quiet and seldom interacted with people, so some people who were new mistakenly thought that she was a mute.



llama_print_timings:        load time =   628.83 ms
llama_print_timings:      sample time =    22.59 ms /    51 runs   (    0.44 ms per token,  2257.14 tokens per second)
llama_print_timings: prompt eval time =   628.79 ms /    67 tokens (    9.38 ms per token,   106.55 tokens per second)
llama_print_timings:        eval time =  4280.96 ms /    50 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  5010.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But then, when I arrived at the same location a year later for the stationery business trip, I found that this small forest area was like another world! I had a deep sense of pity and regret. The water source had been depleted by the excessive use of the water in the river nearby and the soil had also become extremely poor. The only way to prevent further exploitation is to increase reforestation efforts and protect natural resources.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Where did you get this idea?” Yue Wenjie asked, but she didn't reveal whether she agreed or disagreed with the idea. She could speak already made him feel very grateful.


llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 4299.79 MB (+ 2048.00 MB per state)
llama_model_load_internal: offloading 0 repeating layers to GPU
llama_mo

$“I just read a book that really touched me…Can you read English?” Seeing Minjie nodded, Baishu Lin took out a blue-covered book from his bag and passed it to Minjie. “This is the book that was published in 1962, which has a big influence in the western world.” Minjie turned around and received the book when she saw that its title was Silent Spring, written by Rachel Carson. “Where did you get it?” she whispered.



llama_print_timings:        load time =   735.63 ms
llama_print_timings:      sample time =    50.34 ms /   112 runs   (    0.45 ms per token,  2224.74 tokens per second)
llama_print_timings: prompt eval time =   735.60 ms /   130 tokens (    5.66 ms per token,   176.73 tokens per second)
llama_print_timings:        eval time =  9464.83 ms /   111 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time = 10419.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The book has caught the attention of our superiors and we've been asked to prepare an internal reference for it. I will be translating the parts about the forest from Chinese into English.



llama_print_timings:        load time =   611.11 ms
llama_print_timings:      sample time =    18.22 ms /    40 runs   (    0.46 ms per token,  2195.39 tokens per second)
llama_print_timings: prompt eval time =   611.07 ms /    51 tokens (   11.98 ms per token,    83.46 tokens per second)
llama_print_timings:        eval time =  3297.43 ms /    39 runs   (   84.55 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  3988.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She turned the page and was immediately drawn in by the brief introduction, describing a quiet village dying from poisonous pesticide. The simple language hinted at the author's concerns about it all.



llama_print_timings:        load time =   635.39 ms
llama_print_timings:      sample time =    18.47 ms /    42 runs   (    0.44 ms per token,  2274.08 tokens per second)
llama_print_timings: prompt eval time =   635.33 ms /    77 tokens (    8.25 ms per token,   121.20 tokens per second)
llama_print_timings:        eval time =  3404.81 ms /    41 runs   (   83.04 ms per token,    12.04 tokens per second)
llama_print_timings:       total time =  4121.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I want to write a letter to the Central Committee, reflecting on this irresponsible behavior of the construction bureau.



llama_print_timings:        load time =   613.41 ms
llama_print_timings:      sample time =    11.50 ms /    26 runs   (    0.44 ms per token,  2260.87 tokens per second)
llama_print_timings: prompt eval time =   613.37 ms /    52 tokens (   11.80 ms per token,    84.78 tokens per second)
llama_print_timings:        eval time =  2133.60 ms /    25 runs   (   85.34 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  2796.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving her head down for a long time, she only understood his words at last and didn't say anything.



llama_print_timings:        load time =   609.43 ms
llama_print_timings:      sample time =    11.03 ms /    25 runs   (    0.44 ms per token,  2267.57 tokens per second)
llama_print_timings: prompt eval time =   609.41 ms /    52 tokens (   11.72 ms per token,    85.33 tokens per second)
llama_print_timings:        eval time =  2011.79 ms /    24 runs   (   83.82 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  2669.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You want to look at it, but you should not let others see. This thing... you know...” White Moli said and then looked around again before standing up and departing.



llama_print_timings:        load time =   628.29 ms
llama_print_timings:      sample time =    18.34 ms /    40 runs   (    0.46 ms per token,  2181.62 tokens per second)
llama_print_timings: prompt eval time =   628.23 ms /    65 tokens (    9.67 ms per token,   103.47 tokens per second)
llama_print_timings:        eval time =  3265.36 ms /    39 runs   (   83.73 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  3973.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In her final years, Yelu Wenjie recalled the impact of Silent Spring on her own life. Before this, the human evil had imprinted an irreparable scar in her youthful soul. But it was through this book that she first reasoned about humanity's dark side. This should have been a book with a limited theme—describing the harm of pesticide abuse to the environment—but author Rachel Carson's unique perspective, which was so shocking for Ye Wenjie, made her see that the behavior of using pesticides (at least in her eyes) was no different than "the Cultural Revolution", causing as much damage to our world. How many seemingly normal and justified human behaviors are actually evil?



llama_print_timings:        load time =   857.32 ms
llama_print_timings:      sample time =    74.25 ms /   165 runs   (    0.45 ms per token,  2222.10 tokens per second)
llama_print_timings: prompt eval time =   857.28 ms /   205 tokens (    4.18 ms per token,   239.13 tokens per second)
llama_print_timings:        eval time = 14529.08 ms /   164 runs   (   88.59 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 15714.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thinking further, a conclusion makes her shudder with fear and sink into the abyss: Perhaps, the relationship between humanity and evil is like the relationship between the ocean and iceberg, which is made of the same substance. It is only that the iceberg appears distinct from the whole body of water due to its different form… The real morality awareness in humans is impossible, just as they cannot lift their hair away from the earth with their own strength. To achieve this, only supernatural forces can be used.



llama_print_timings:        load time =   762.11 ms
llama_print_timings:      sample time =    48.88 ms /   111 runs   (    0.44 ms per token,  2270.73 tokens per second)
llama_print_timings: prompt eval time =   762.07 ms /   140 tokens (    5.44 ms per token,   183.71 tokens per second)
llama_print_timings:        eval time =  9578.59 ms /   110 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 10556.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This idea decided Yvonne's life.



llama_print_timings:        load time =   588.45 ms
llama_print_timings:      sample time =     4.84 ms /    11 runs   (    0.44 ms per token,  2272.26 tokens per second)
llama_print_timings: prompt eval time =   588.41 ms /    39 tokens (   15.09 ms per token,    66.28 tokens per second)
llama_print_timings:        eval time =   877.21 ms /    10 runs   (   87.72 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  1486.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Four days later, Ye Wenjie went to return the book. Bai Moxin stayed in the only guestroom in the regiment and woke up when he saw her. He quickly got up when he saw her.


llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 4299.79 MB (+ 2048.00 MB per state)
llama_model_load_internal: offloading 0 repeating layers to GPU
llama_mo

“Are you working today?” Wenjie asked.



llama_print_timings:        load time =   591.00 ms
llama_print_timings:      sample time =     5.70 ms /    13 runs   (    0.44 ms per token,  2279.90 tokens per second)
llama_print_timings: prompt eval time =   590.95 ms /    40 tokens (   14.77 ms per token,    67.69 tokens per second)
llama_print_timings:        eval time =  1016.99 ms /    12 runs   (   84.75 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  1632.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We’ve been in the company for a long time now, and can’t keep changing battalion all the time. We need to participate in work activities, be able to do ‘three combinations’ after all. Oh, we are doing our labor in Radar Hill where there are so many trees, and I’m afraid of catching瘴气 due to the deep underground rotted leaves.” Bai Mu Lin said.



llama_print_timings:        load time =   666.51 ms
llama_print_timings:      sample time =    40.57 ms /    92 runs   (    0.44 ms per token,  2267.57 tokens per second)
llama_print_timings: prompt eval time =   666.46 ms /    87 tokens (    7.66 ms per token,   130.54 tokens per second)
llama_print_timings:        eval time =  7710.25 ms /    91 runs   (   84.73 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  8555.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Radar Peak?” Wen Jie heard the name and was very surprised.



llama_print_timings:        load time =   594.26 ms
llama_print_timings:      sample time =     8.35 ms /    19 runs   (    0.44 ms per token,  2276.27 tokens per second)
llama_print_timings: prompt eval time =   594.23 ms /    42 tokens (   14.15 ms per token,    70.68 tokens per second)
llama_print_timings:        eval time =  1524.13 ms /    18 runs   (   84.67 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2154.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, it's an urgent assignment from the group. We need to set up a perimeter patrol around it.”



llama_print_timings:        load time =   599.29 ms
llama_print_timings:      sample time =    12.24 ms /    28 runs   (    0.44 ms per token,  2287.96 tokens per second)
llama_print_timings: prompt eval time =   599.24 ms /    50 tokens (   11.98 ms per token,    83.44 tokens per second)
llama_print_timings:        eval time =  2295.04 ms /    27 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2947.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The radar peak is a mysterious place. That steep and peculiar mountain peak originally had no name, only because of the huge parabolic antenna on its summit. In fact, anyone with common knowledge would know that it's not an antenna; although it changes direction every day, but has never sustained rotation in succession. The antenna produces a low-pitched buzzing sound in the wind, and is heard from far away. The men of the team only knew that it was a military base; local people said that three years ago when building the base, huge human resources had been mobilized to erect a high-voltage line to mount a parabolic antenna on its summit, and open a road to the peak leading to a large amount of materials being transported up by the road. But after the base was built, they decided to demolish that road and only left a small path that can be traversed through the forest; there are often helicopters landing and taking off on the peak.



llama_print_timings:        load time =   862.34 ms
llama_print_timings:      sample time =    98.87 ms /   219 runs   (    0.45 ms per token,  2215.10 tokens per second)
llama_print_timings: prompt eval time =   862.29 ms /   199 tokens (    4.33 ms per token,   230.78 tokens per second)
llama_print_timings:        eval time = 19239.31 ms /   218 runs   (   88.25 ms per token,    11.33 tokens per second)
llama_print_timings:       total time = 20546.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That antenna is not always visible. When the wind is too strong, it will be destroyed and when it stands up again, many strange things happen: Animals in the woods become nervous, birds start flying out in large groups, and people also experience unexplained symptoms such as headaches and dizziness. In addition, locals report that they lose more hair around the radar peak than usual.



llama_print_timings:        load time =   714.82 ms
llama_print_timings:      sample time =    38.95 ms /    87 runs   (    0.45 ms per token,  2233.80 tokens per second)
llama_print_timings: prompt eval time =   714.77 ms /   109 tokens (    6.56 ms per token,   152.50 tokens per second)
llama_print_timings:        eval time =  7478.70 ms /    86 runs   (   86.96 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  8367.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =  1038.02 ms
llama_print_timings:      sample time =   113.97 ms /   241 runs   (    0.47 ms per token,  2114.54 tokens per second)
llama_print_timings: prompt eval time =  1037.98 ms /   294 tokens (    3.53 ms per token,   283.24 tokens per second)
llama_print_timings:        eval time = 21588.53 ms /   240 runs   (   89.95 ms per token,    11.12 tokens per second)
llama_print_timings:       total time = 23137.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He took the book carefully and put it under his pillow, at the same time taking out some pages filled with densely written notes. He handed them to Wen Jie, “Is this letter ready?”



llama_print_timings:        load time =   638.34 ms
llama_print_timings:      sample time =    20.13 ms /    44 runs   (    0.46 ms per token,  2186.23 tokens per second)
llama_print_timings: prompt eval time =   638.29 ms /    77 tokens (    8.29 ms per token,   120.63 tokens per second)
llama_print_timings:        eval time =  3702.93 ms /    43 runs   (   86.11 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  4430.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sure. What can I help you with?



llama_print_timings:        load time =   606.44 ms
llama_print_timings:      sample time =     4.82 ms /    11 runs   (    0.44 ms per token,  2280.27 tokens per second)
llama_print_timings: prompt eval time =   606.40 ms /    33 tokens (   18.38 ms per token,    54.42 tokens per second)
llama_print_timings:        eval time =   854.57 ms /    10 runs   (   85.46 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  1482.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I told you to write a letter to the central government.” The handwriting on the paper was scrawled and difficult for Ye Wenjie to read. But after reading it, she could clearly see the sound argumentation and the rich contents: from the historical rich mountains turned into bare hills due to the destruction of vegetation, to the severe increase in the silt content of the Yellow River, there is a conclusion that shows the serious consequences of building an army of immigrants in Inner Mongolia. Finally, Ye Wenjie noticed that his writing was so similar to that of The Quiet Days that it made her, who had a background in science, feel very comfortable.



llama_print_timings:        load time =   774.66 ms
llama_print_timings:      sample time =    64.19 ms /   144 runs   (    0.45 ms per token,  2243.51 tokens per second)
llama_print_timings: prompt eval time =   774.62 ms /   156 tokens (    4.97 ms per token,   201.39 tokens per second)
llama_print_timings:        eval time = 12400.44 ms /   143 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 13461.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She sincerely applauded his writing.



llama_print_timings:        load time =   591.41 ms
llama_print_timings:      sample time =     5.31 ms /    12 runs   (    0.44 ms per token,  2261.16 tokens per second)
llama_print_timings: prompt eval time =   591.37 ms /    42 tokens (   14.08 ms per token,    71.02 tokens per second)
llama_print_timings:        eval time =   925.19 ms /    11 runs   (   84.11 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  1538.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Bai Mu Lei nodded, "I have sent it." said to the new paper to copy, but shaking hands, a word of which can also write. First people with oil sawed all, may even be no food porridge, let alone write a letter.



llama_print_timings:        load time =   670.35 ms
llama_print_timings:      sample time =    27.39 ms /    58 runs   (    0.47 ms per token,  2117.25 tokens per second)
llama_print_timings: prompt eval time =   670.31 ms /    91 tokens (    7.37 ms per token,   135.76 tokens per second)
llama_print_timings:        eval time =  4953.63 ms /    57 runs   (   86.91 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  5743.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I'll do it for you.” said Ye Wenjie, taking the pen from Bai Moxin.



llama_print_timings:        load time =   616.95 ms
llama_print_timings:      sample time =    11.95 ms /    27 runs   (    0.44 ms per token,  2258.85 tokens per second)
llama_print_timings: prompt eval time =   616.90 ms /    54 tokens (   11.42 ms per token,    87.53 tokens per second)
llama_print_timings:        eval time =  2210.07 ms /    26 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2878.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You write very well.” White Moli looked at the first line of the copy of the letter on the paper and said. He gave Vinci a glass of water, but he still shook his hand when he filled it. Vinci quickly moved the paper to avoid dropping any more.



llama_print_timings:        load time =   646.15 ms
llama_print_timings:      sample time =    27.09 ms /    60 runs   (    0.45 ms per token,  2214.76 tokens per second)
llama_print_timings: prompt eval time =   646.11 ms /    81 tokens (    7.98 ms per token,   125.37 tokens per second)
llama_print_timings:        eval time =  4963.21 ms /    59 runs   (   84.12 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  5727.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Are you studying physics?" White Moxin asked.



llama_print_timings:        load time =   596.31 ms
llama_print_timings:      sample time =     5.95 ms /    13 runs   (    0.46 ms per token,  2186.71 tokens per second)
llama_print_timings: prompt eval time =   596.27 ms /    41 tokens (   14.54 ms per token,    68.76 tokens per second)
llama_print_timings:        eval time =   986.17 ms /    12 runs   (   82.18 ms per token,    12.17 tokens per second)
llama_print_timings:       total time =  1608.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Astrophysics is useless now.” Wenjie replied without looking up.



llama_print_timings:        load time =   592.94 ms
llama_print_timings:      sample time =     8.35 ms /    19 runs   (    0.44 ms per token,  2275.72 tokens per second)
llama_print_timings: prompt eval time =   592.90 ms /    48 tokens (   12.35 ms per token,    80.96 tokens per second)
llama_print_timings:        eval time =  1531.41 ms /    18 runs   (   85.08 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2161.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That’s researching stars, how can it be useless? Now universities have reopened, but graduate programs are no longer accepting applicants like you with such advanced abilities, so, o…”



llama_print_timings:        load time =   634.27 ms
llama_print_timings:      sample time =    20.72 ms /    46 runs   (    0.45 ms per token,  2220.40 tokens per second)
llama_print_timings: prompt eval time =   634.22 ms /    66 tokens (    9.61 ms per token,   104.06 tokens per second)
llama_print_timings:        eval time =  3870.57 ms /    45 runs   (   86.01 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  4595.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But Wen Jie didn't answer, she just focused on writing down the words. She wouldn't tell Baizhilin that she was lucky to have been admitted to a construction army division. In reality, there wasn't much for her to say or feel about it either.



llama_print_timings:        load time =   637.61 ms
llama_print_timings:      sample time =    28.63 ms /    63 runs   (    0.45 ms per token,  2200.18 tokens per second)
llama_print_timings: prompt eval time =   637.58 ms /    70 tokens (    9.11 ms per token,   109.79 tokens per second)
llama_print_timings:        eval time =  5233.47 ms /    62 runs   (   84.41 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  5997.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The room was quiet, and only the scratching of an ink pen on paper could be heard. Wenjie could smell the sawdust from a nearby reporter's clothes. She had the feeling that it was her first time having a warm sensation and being relaxed about the world around her since her father passed away.



llama_print_timings:        load time =   666.81 ms
llama_print_timings:      sample time =    32.99 ms /    72 runs   (    0.46 ms per token,  2182.28 tokens per second)
llama_print_timings: prompt eval time =   666.77 ms /    88 tokens (    7.58 ms per token,   131.98 tokens per second)
llama_print_timings:        eval time =  6068.29 ms /    71 runs   (   85.47 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  6879.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She finished the letter after about an hour, and then wrote the address and the recipient on the envelope as instructed by White Mistlin. After getting up to leave, she said: “Give me your coat, I'll wash it for you.” She was surprised at her own actions.



llama_print_timings:        load time =   676.56 ms
llama_print_timings:      sample time =    29.26 ms /    65 runs   (    0.45 ms per token,  2221.54 tokens per second)
llama_print_timings: prompt eval time =   676.51 ms /    93 tokens (    7.27 ms per token,   137.47 tokens per second)
llama_print_timings:        eval time =  5464.75 ms /    64 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  6269.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, that’s not the case!” White Moxin shook his hand repeatedly and said, “The female soldiers of your construction battalion are doing men's work all day. Please go back and rest! The mounting begins at 6 o'clock tomorrow. Oh, Wenjie, I'll be returning to the brigade headquarters tomorrow, maybe I can help you.”



llama_print_timings:        load time =   680.07 ms
llama_print_timings:      sample time =    68.75 ms /    85 runs   (    0.81 ms per token,  1236.31 tokens per second)
llama_print_timings: prompt eval time =   680.03 ms /   107 tokens (    6.36 ms per token,   157.35 tokens per second)
llama_print_timings:        eval time =  8101.18 ms /    84 runs   (   96.44 ms per token,    10.37 tokens per second)
llama_print_timings:       total time =  9087.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thanks, but I think this is fine. It's quiet here.



llama_print_timings:        load time =   604.41 ms
llama_print_timings:      sample time =     7.51 ms /    16 runs   (    0.47 ms per token,  2130.21 tokens per second)
llama_print_timings: prompt eval time =   604.37 ms /    59 tokens (   10.24 ms per token,    97.62 tokens per second)
llama_print_timings:        eval time =  1257.20 ms /    15 runs   (   83.81 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1894.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Are you avoiding something?



llama_print_timings:        load time =   617.40 ms
llama_print_timings:      sample time =     3.38 ms /     7 runs   (    0.48 ms per token,  2073.46 tokens per second)
llama_print_timings: prompt eval time =   617.36 ms /    36 tokens (   17.15 ms per token,    58.31 tokens per second)
llama_print_timings:        eval time =   512.50 ms /     6 runs   (   85.42 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1144.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm leaving.



llama_print_timings:        load time =   597.99 ms
llama_print_timings:      sample time =     2.62 ms /     6 runs   (    0.44 ms per token,  2287.46 tokens per second)
llama_print_timings: prompt eval time =   597.96 ms /    44 tokens (   13.59 ms per token,    73.58 tokens per second)
llama_print_timings:        eval time =   440.48 ms /     5 runs   (   88.10 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  1049.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Bai Muling looked at her slender figure disappearing in the moonlight and then, he looked up at the radar peak of the distant mountains which saw a giant antenna standing slowly, emitting cold light.



llama_print_timings:        load time =   657.79 ms
llama_print_timings:      sample time =    20.76 ms /    46 runs   (    0.45 ms per token,  2216.12 tokens per second)
llama_print_timings: prompt eval time =   657.75 ms /    82 tokens (    8.02 ms per token,   124.67 tokens per second)
llama_print_timings:        eval time =  3818.38 ms /    45 runs   (   84.85 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  4567.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Three weeks later, on a day in the noon, Ye Wenjie was recalled urgently from the timber yard. As soon as she entered the office, she found that the atmosphere was not right; her leader and his guiding officer were there, along with an unfamiliar stranger who was sitting in front of a black briefcase on his desk. The two items next to it were clearly taken out from the briefcase: an envelope and a book. The envelope had been opened, while the book she had seen before was the one labeled SILENT SPRING.



llama_print_timings:        load time =   739.92 ms
llama_print_timings:      sample time =    77.14 ms /   127 runs   (    0.61 ms per token,  1646.27 tokens per second)
llama_print_timings: prompt eval time =   739.87 ms /   130 tokens (    5.69 ms per token,   175.71 tokens per second)
llama_print_timings:        eval time = 11469.20 ms /   126 runs   (   91.03 ms per token,    10.99 tokens per second)
llama_print_timings:       total time = 12555.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This generation has a special sensitivity towards their political situation, but this sensitivity is stronger in Yuwenjie. She felt the world around her was shrinking like a pocket, pressing on to her.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“This is the politics director from the army political department, ” the guide pointed to the stranger. “I hope you will cooperate and tell the truth.”



llama_print_timings:        load time =   617.65 ms
llama_print_timings:      sample time =    14.84 ms /    34 runs   (    0.44 ms per token,  2290.80 tokens per second)
llama_print_timings: prompt eval time =   617.61 ms /    62 tokens (    9.96 ms per token,   100.39 tokens per second)
llama_print_timings:        eval time =  2830.13 ms /    33 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3512.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhang boss asked, "Is this letter written by you?" while pulling the envelope out. Lei Wenjie reached for it but Zhang boss didn't give it to her and still held on to the letter as she showed her one page at a time until she showed her the last page of the letter.



llama_print_timings:        load time =   703.82 ms
llama_print_timings:      sample time =    31.36 ms /    70 runs   (    0.45 ms per token,  2232.43 tokens per second)
llama_print_timings: prompt eval time =   703.77 ms /   104 tokens (    6.77 ms per token,   147.77 tokens per second)
llama_print_timings:        eval time =  5999.71 ms /    69 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  6841.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, it wasn’t me.” Wenjie shivered and shook her head.



llama_print_timings:        load time =   594.85 ms
llama_print_timings:      sample time =     9.68 ms /    22 runs   (    0.44 ms per token,  2272.02 tokens per second)
llama_print_timings: prompt eval time =   594.80 ms /    46 tokens (   12.93 ms per token,    77.34 tokens per second)
llama_print_timings:        eval time =  1782.68 ms /    21 runs   (   84.89 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  2420.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's your signature.



llama_print_timings:        load time =   586.34 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2260.98 tokens per second)
llama_print_timings: prompt eval time =   586.31 ms /    37 tokens (   15.85 ms per token,    63.11 tokens per second)
llama_print_timings:        eval time =   515.12 ms /     6 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1114.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, but I'm just copying someone else's work.



llama_print_timings:        load time =   596.51 ms
llama_print_timings:      sample time =     7.11 ms /    15 runs   (    0.47 ms per token,  2110.30 tokens per second)
llama_print_timings: prompt eval time =   596.47 ms /    40 tokens (   14.91 ms per token,    67.06 tokens per second)
llama_print_timings:        eval time =  1214.93 ms /    14 runs   (   86.78 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  1844.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Helping who? 一般, when something happened in the company, Yee Man Kit would rarely defend herself, eating all the loss silently and bearing all the injury quietly. But this time is different, she knows what it means.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

This is the interview that was copied by the reporter from the "Dà Jìnshí Bào" newspaper last week. His name is ...



llama_print_timings:        load time =   614.57 ms
llama_print_timings:      sample time =    15.41 ms /    34 runs   (    0.45 ms per token,  2206.36 tokens per second)
llama_print_timings: prompt eval time =   614.50 ms /    55 tokens (   11.17 ms per token,    89.50 tokens per second)
llama_print_timings:        eval time =  2785.28 ms /    33 runs   (   84.40 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  3469.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“You, Lei Wenjie!” Zhang boss glared at her with eyes like two black gun barrels. “I warn you that accusing someone will only make your trouble even bigger. We have investigated it clearly from Bai Muliang. He merely took the letter to Beijing as per your request and didn’t know its content.”



llama_print_timings:        load time =   681.25 ms
llama_print_timings:      sample time =    36.19 ms /    80 runs   (    0.45 ms per token,  2210.74 tokens per second)
llama_print_timings: prompt eval time =   681.20 ms /    98 tokens (    6.95 ms per token,   143.86 tokens per second)
llama_print_timings:        eval time =  6788.80 ms /    79 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  7633.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“He...said that?” Wenjie's eyesight blurred.



llama_print_timings:        load time =   594.28 ms
llama_print_timings:      sample time =     9.10 ms /    20 runs   (    0.45 ms per token,  2197.80 tokens per second)
llama_print_timings: prompt eval time =   594.25 ms /    45 tokens (   13.21 ms per token,    75.73 tokens per second)
llama_print_timings:        eval time =  1631.29 ms /    19 runs   (   85.86 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2266.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Zhang Manager didn't answer her, but instead picked up the book and showed it to the lieutenant commander and guide officer. "You wrote this letter must have been inspired by this one. "He put the book in front of the major and the guide officer. "This is called 《The Silent Spring》, which was published in America in 1962. It has made a big impact on the capitalist world." He then took another book out of his briefcase, the cover was white and black letters. "This is the Chinese translation of this book, which is sent out as an internal reference by the relevant department so that it can be used for criticism. Now, the superior authorities have made a clear judgment on this book: It is a big evil weed that goes against Marxism-Leninism and advocates the end times by using environmental issues as a pretext. Its essence is very reactionary."



llama_print_timings:        load time =   843.56 ms
llama_print_timings:      sample time =    86.94 ms /   194 runs   (    0.45 ms per token,  2231.40 tokens per second)
llama_print_timings: prompt eval time =   843.51 ms /   195 tokens (    4.33 ms per token,   231.18 tokens per second)
llama_print_timings:        eval time = 17045.50 ms /   193 runs   (   88.32 ms per token,    11.32 tokens per second)
llama_print_timings:       total time = 18277.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's not my book either, " said Minjie helplessly.



llama_print_timings:        load time =   603.49 ms
llama_print_timings:      sample time =     8.85 ms /    20 runs   (    0.44 ms per token,  2260.91 tokens per second)
llama_print_timings: prompt eval time =   603.46 ms /    43 tokens (   14.03 ms per token,    71.26 tokens per second)
llama_print_timings:        eval time =  1610.29 ms /    19 runs   (   84.75 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  2252.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Mr. Bai Muli is one of the book translators assigned by his superior. He brought the book with him which is completely legal, and he also has a duty to take care of it so you should not steal it when he is working – now that you have discovered ideas for attacking socialism in this book.” Lei Wenjie was silent. She knew she had fallen into a trap and any struggle would be futile.



llama_print_timings:        load time =   695.68 ms
llama_print_timings:      sample time =    42.91 ms /    94 runs   (    0.46 ms per token,  2190.68 tokens per second)
llama_print_timings: prompt eval time =   695.63 ms /   109 tokens (    6.38 ms per token,   156.69 tokens per second)
llama_print_timings:        eval time =  7875.38 ms /    93 runs   (   84.68 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  8766.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Although the general public may not know it, Bai Moli had no intention to frame Ye Wenjie when he wrote his letter to the Central Committee. Most of his letters did not even receive a reply and those who received one found themselves promoted or confronted with disaster in a single night. At that time, political tensions were extremely complex and as a journalist, Bai Moli believed himself well-versed on this systematic web of sensitivities, but he overestimated his own abilities and this letter triggered an unknown trap for him. Upon hearing news about it, fear overcame all else and he decided to sacrifice Ye Wenjie in order to protect himself.



llama_print_timings:        load time =   800.42 ms
llama_print_timings:      sample time =    67.40 ms /   148 runs   (    0.46 ms per token,  2195.78 tokens per second)
llama_print_timings: prompt eval time =   800.38 ms /   163 tokens (    4.91 ms per token,   203.65 tokens per second)
llama_print_timings:        eval time = 12926.97 ms /   147 runs   (   87.94 ms per token,    11.37 tokens per second)
llama_print_timings:       total time = 14031.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After half a century, historians have come to the consensus that this event of 1969 was a turning point in human history.



llama_print_timings:        load time =   610.87 ms
llama_print_timings:      sample time =    13.82 ms /    31 runs   (    0.45 ms per token,  2243.29 tokens per second)
llama_print_timings: prompt eval time =   610.82 ms /    55 tokens (   11.11 ms per token,    90.04 tokens per second)
llama_print_timings:        eval time =  2629.64 ms /    30 runs   (   87.65 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  3302.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

白沐霖 accidentally became a landmark figure in history, but he himself had no chance to know this. Historians were disappointed to record his boring later life. White Mulsanne worked for The Big Production Paper until 1975 when Inner Mongolia Construction Army was dissolved and transferred to work at the Northeast city's Chinese Culture Society, then went to Canada to teach in a Chinese school till l991, and died of lung cancer there. He didnt talk about Yew Wenjie to anyone during his later life, didnt know if he felt remorse or regret.



llama_print_timings:        load time =   779.73 ms
llama_print_timings:      sample time =    58.45 ms /   130 runs   (    0.45 ms per token,  2224.31 tokens per second)
llama_print_timings: prompt eval time =   779.70 ms /   155 tokens (    5.03 ms per token,   198.79 tokens per second)
llama_print_timings:        eval time = 11033.76 ms /   129 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time = 12075.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Little Ye, the commander treated you very well,” said the regiment commander who had just finished a cigarette. Looking down at the ground, he said, “Your family background is poor, but we don’t treat you as an outsider. Regarding your failure to fit in with the masses and not pushing for progress, I and the guidance counselor have spoken to you repeatedly, hoping to help you. Who knew you would commit such a major error?”



llama_print_timings:        load time =   696.71 ms
llama_print_timings:      sample time =    43.92 ms /    98 runs   (    0.45 ms per token,  2231.48 tokens per second)
llama_print_timings: prompt eval time =   696.67 ms /   117 tokens (    5.95 ms per token,   167.94 tokens per second)
llama_print_timings:        eval time =  8364.84 ms /    97 runs   (   86.24 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  9260.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I had already suspected that her opposition to the Cultural Revolution was deep-seated.



llama_print_timings:        load time =   612.95 ms
llama_print_timings:      sample time =     8.36 ms /    19 runs   (    0.44 ms per token,  2273.00 tokens per second)
llama_print_timings: prompt eval time =   612.91 ms /    58 tokens (   10.57 ms per token,    94.63 tokens per second)
llama_print_timings:        eval time =  1499.19 ms /    18 runs   (   83.29 ms per token,    12.01 tokens per second)
llama_print_timings:       total time =  2148.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Send two men to pick her up and bring her along with the evidence back to headquarters.” said Zhang manager without expression.


llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 4299.79 MB (+ 2048.00 MB per state)
llama_model_load_internal: offloading 0 repeating layers to GPU
llama_mo

The three female inmates were taken away one after another, and now only Yeyanjie was left alone in the cell. The small heap of coal in the corner of the cell had run out without anyone adding any more, and soon the stove went out leaving her chilly. She had to wrap herself up in her quilt.



llama_print_timings:        load time =   662.38 ms
llama_print_timings:      sample time =    32.09 ms /    72 runs   (    0.45 ms per token,  2243.55 tokens per second)
llama_print_timings: prompt eval time =   662.33 ms /    87 tokens (    7.61 ms per token,   131.35 tokens per second)
llama_print_timings:        eval time =  6067.58 ms /    71 runs   (   85.46 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  6871.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It was dark when two people arrived, one of whom was an older female official and the other person who accompanied her introduced herself as the military representative of the Supreme People's Procuratorate in a period known as the Cultural Revolution.



llama_print_timings:        load time =   671.91 ms
llama_print_timings:      sample time =    21.44 ms /    49 runs   (    0.44 ms per token,  2285.34 tokens per second)
llama_print_timings: prompt eval time =   671.78 ms /    91 tokens (    7.38 ms per token,   135.46 tokens per second)
llama_print_timings:        eval time =  4081.37 ms /    48 runs   (   85.03 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  4847.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Hello, Miss Cheng.” The female officer introduces herself. She is in her forties, wears a military coat and glasses with thick frame, has softly drawn facial features indicating she was once very beautiful, smiles warmly as she speaks, giving people an easygoing demeanor.



llama_print_timings:        load time =   712.47 ms
llama_print_timings:      sample time =    28.89 ms /    66 runs   (    0.44 ms per token,  2284.21 tokens per second)
llama_print_timings: prompt eval time =   712.43 ms /   124 tokens (    5.75 ms per token,   174.05 tokens per second)
llama_print_timings:        eval time =  5519.47 ms /    65 runs   (   84.91 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6362.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's so cold, the stove?” Program Lan Hua looked at the look of the guard director outside and turned back to Man Jie. “Well, young than you thought younger.” Saying this, she sat down on her bed close to Man Jie. She looked into her bag and continued mumbling in a voice like a grandmother, “Le Yang, you're so silly… you read too much and get even more confused. You, you…” She found the documents she was looking for and put them in her bosom holding them close to her chest, looking at Man Jie with love in her eyes. “However, young people make mistakes all the time. I used to do it when I was a member of the literature team in the Fourth Front Army. At a political study session, I said we should join Russia and become part of the Soviet Socialist Union as one new republic so that the international communist force would be stronger. How silly! Everyone had their moments of youth. Just like what you've just been saying, don't have any ideals or burdens, if you make mistakes, recognize th


llama_print_timings:        load time =   970.08 ms
llama_print_timings:      sample time =   160.85 ms /   250 runs   (    0.64 ms per token,  1554.26 tokens per second)
llama_print_timings: prompt eval time =   970.03 ms /   253 tokens (    3.83 ms per token,   260.82 tokens per second)
llama_print_timings:        eval time = 23592.64 ms /   249 runs   (   94.75 ms per token,    10.55 tokens per second)
llama_print_timings:       total time = 25293.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Tang Yunhua's words bridged the distance between Ye Wenjie and her, but Ye Wenjie learned to be careful in disaster. She dared not accept this generous goodwill readily.



llama_print_timings:        load time =   642.63 ms
llama_print_timings:      sample time =    21.85 ms /    49 runs   (    0.45 ms per token,  2242.56 tokens per second)
llama_print_timings: prompt eval time =   642.60 ms /    67 tokens (    9.59 ms per token,   104.26 tokens per second)
llama_print_timings:        eval time =  4056.41 ms /    48 runs   (   84.51 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  4795.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She put the stack of papers on the bed in front of her, handed her a pen and said, "Come, sign your name first, we can talk more clearly and resolve any psychological knot you have." She gave her an air of coaxing like a mother lulling a child to sleep.



llama_print_timings:        load time =   665.09 ms
llama_print_timings:      sample time =    29.37 ms /    65 runs   (    0.45 ms per token,  2213.52 tokens per second)
llama_print_timings: prompt eval time =   665.05 ms /    83 tokens (    8.01 ms per token,   124.80 tokens per second)
llama_print_timings:        eval time =  5467.65 ms /    64 runs   (   85.43 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  6264.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie stood silently and did not move to take the pen.



llama_print_timings:        load time =   600.73 ms
llama_print_timings:      sample time =     9.14 ms /    19 runs   (    0.48 ms per token,  2077.87 tokens per second)
llama_print_timings: prompt eval time =   600.69 ms /    47 tokens (   12.78 ms per token,    78.24 tokens per second)
llama_print_timings:        eval time =  1537.54 ms /    18 runs   (   85.42 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2179.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Li Hua smiled broadly, “You can trust me. I guarantee that the contents of this document have nothing to do with your case. Sign it.”



llama_print_timings:        load time =   618.56 ms
llama_print_timings:      sample time =    14.86 ms /    33 runs   (    0.45 ms per token,  2220.58 tokens per second)
llama_print_timings: prompt eval time =   618.51 ms /    61 tokens (   10.14 ms per token,    98.62 tokens per second)
llama_print_timings:        eval time =  2740.49 ms /    32 runs   (   85.64 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3423.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The follower who stood aside said, “Lei Weijie, the director is really trying to help you. She has been worrying about your matter these days.” 



llama_print_timings:        load time =   717.51 ms
llama_print_timings:      sample time =    16.98 ms /    37 runs   (    0.46 ms per token,  2178.78 tokens per second)
llama_print_timings: prompt eval time =   717.47 ms /   127 tokens (    5.65 ms per token,   177.01 tokens per second)
llama_print_timings:        eval time =  3154.53 ms /    36 runs   (   87.63 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  3948.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You picked up the documents and looked at them under the dim light in the prison cell. The contents of the files did not involve your case, but documented activities and conversations about your dead father between he and some people. The materials were provided by your sister Lei Wenxue who was a radical red guardsman. She had actively reported on her father to the police station many times in order to discredit his reputation. However, when you read this file, you immediately realized that it did not come from your sister's handwriting as it was cold and detailed with a great deal of information about your father's activities. It was like reading a list of people he met with and talked to during the period in question, something that would never be written by a childlike game.



llama_print_timings:        load time =   934.38 ms
llama_print_timings:      sample time =    73.53 ms /   165 runs   (    0.45 ms per token,  2243.89 tokens per second)
llama_print_timings: prompt eval time =   934.32 ms /   237 tokens (    3.94 ms per token,   253.66 tokens per second)
llama_print_timings:        eval time = 14382.20 ms /   164 runs   (   87.70 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 15647.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The content of the material she didn't understand very well, but she could feel that it had something to do with a major national defense project. As the daughter of a physicist, Ye Wenjie guessed that it must have been China's Two Bomb Project. In those days, to undermine someone at a high position, you would need black materials in various areas under his control. But since the Two-Bomb Project was an important central protection area, colluders found it difficult to interfere with it during the Cultural Revolution.



llama_print_timings:        load time =   747.14 ms
llama_print_timings:      sample time =    53.04 ms /   117 runs   (    0.45 ms per token,  2206.09 tokens per second)
llama_print_timings: prompt eval time =   747.10 ms /   132 tokens (    5.66 ms per token,   176.68 tokens per second)
llama_print_timings:        eval time = 10059.26 ms /   116 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 11045.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a result of the family background issue, my father was not able to pass the political screening for the Two Bomb Project and only conducted some theoretical work on its periphery, but it is easier for us to use him than to use the core figures in the Two-Bomb Engineering. I am not sure about the authenticity of the contents mentioned above, but can confirm that each punctuation mark has lethal political implications. In addition to the ultimate target, many people's destinies may fall into the abyss due to this material. The end of the document is my sister's signature, and I am asked to sign it as an additional witness. Notice that there are already three signatures on that position.



llama_print_timings:        load time =   777.67 ms
llama_print_timings:      sample time =    68.42 ms /   153 runs   (    0.45 ms per token,  2236.22 tokens per second)
llama_print_timings: prompt eval time =   777.63 ms /   156 tokens (    4.98 ms per token,   200.61 tokens per second)
llama_print_timings:        eval time = 13445.81 ms /   152 runs   (   88.46 ms per token,    11.30 tokens per second)
llama_print_timings:       total time = 14530.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know what my father and these people said.



llama_print_timings:        load time =   593.71 ms
llama_print_timings:      sample time =     6.50 ms /    14 runs   (    0.46 ms per token,  2152.52 tokens per second)
llama_print_timings: prompt eval time =   593.67 ms /    53 tokens (   11.20 ms per token,    89.28 tokens per second)
llama_print_timings:        eval time =  1125.65 ms /    13 runs   (   86.59 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  1747.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"How could you not know this? So many of these conversations are had at your home and your little sister knows but you don't?"



llama_print_timings:        load time =   608.37 ms
llama_print_timings:      sample time =    14.18 ms /    31 runs   (    0.46 ms per token,  2186.18 tokens per second)
llama_print_timings: prompt eval time =   608.34 ms /    53 tokens (   11.48 ms per token,    87.12 tokens per second)
llama_print_timings:        eval time =  2502.96 ms /    30 runs   (   83.43 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  3176.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know.



llama_print_timings:        load time =   591.59 ms
llama_print_timings:      sample time =     3.08 ms /     7 runs   (    0.44 ms per token,  2274.94 tokens per second)
llama_print_timings: prompt eval time =   591.55 ms /    34 tokens (   17.40 ms per token,    57.48 tokens per second)
llama_print_timings:        eval time =   510.72 ms /     6 runs   (   85.12 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  1116.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

These conversation contents are real, you have to believe the organization.



llama_print_timings:        load time =   580.92 ms
llama_print_timings:      sample time =     6.58 ms /    15 runs   (    0.44 ms per token,  2281.02 tokens per second)
llama_print_timings: prompt eval time =   580.88 ms /    42 tokens (   13.83 ms per token,    72.30 tokens per second)
llama_print_timings:        eval time =  1190.85 ms /    14 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  1800.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I didn't say it was true, but I don't know for sure so I can't sign.



llama_print_timings:        load time =   581.28 ms
llama_print_timings:      sample time =    11.03 ms /    25 runs   (    0.44 ms per token,  2266.75 tokens per second)
llama_print_timings: prompt eval time =   581.25 ms /    46 tokens (   12.64 ms per token,    79.14 tokens per second)
llama_print_timings:        eval time =  2043.72 ms /    24 runs   (   85.15 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  2674.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving her alone, he took out a roll of paper and some writing instruments from his bag. 他从袋子里拿出一张纸和几支写字笔



llama_print_timings:        load time =   636.97 ms
llama_print_timings:      sample time =    15.48 ms /    35 runs   (    0.44 ms per token,  2260.69 tokens per second)
llama_print_timings: prompt eval time =   636.93 ms /    73 tokens (    8.73 ms per token,   114.61 tokens per second)
llama_print_timings:        eval time =  2896.72 ms /    34 runs   (   85.20 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  3600.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Hey, let's make a deal. Your case is very flexible and you don't have to go through the legal process. Just attend a learning class and write a few self-criticisms good enough, and you can return to the militia team.” “The case is not too serious for you, ” he said. “It depends on your own feelings about it. You know, I'm very clear with you that sentencing you as an enemy of the revolution is totally possible. But now in our system, both public security and legal authorities are left-leaning. The right-wing way to do things is only a methodical issue, whereas the ultimate direction must be determined by the military commandants.” “Of course, we can only say this privately,” he added.



llama_print_timings:        load time =   778.99 ms
llama_print_timings:      sample time =    74.62 ms /   167 runs   (    0.45 ms per token,  2237.95 tokens per second)
llama_print_timings: prompt eval time =   778.92 ms /   157 tokens (    4.96 ms per token,   201.56 tokens per second)
llama_print_timings:        eval time = 14368.80 ms /   166 runs   (   86.56 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 15484.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The person accompanying said, "Cheng Representative is really in your best interest. You can see that there are three witnesses who have signed, and what difference would it make if you sign not." Ye Wenjie, don't be silly for a moment.



llama_print_timings:        load time =   634.27 ms
llama_print_timings:      sample time =    26.63 ms /    59 runs   (    0.45 ms per token,  2215.30 tokens per second)
llama_print_timings: prompt eval time =   634.23 ms /    70 tokens (    9.06 ms per token,   110.37 tokens per second)
llama_print_timings:        eval time =  4992.70 ms /    58 runs   (   86.08 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  5747.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, Xiao Ye, looking at you this smart kid just ruined it. I really want to save you! You must cooperate with me. Look at me, am I going to hurt you?” a novel by the same classmate two million years later



llama_print_timings:        load time =   639.04 ms
llama_print_timings:      sample time =    25.20 ms /    56 runs   (    0.45 ms per token,  2221.96 tokens per second)
llama_print_timings: prompt eval time =   638.99 ms /    75 tokens (    8.52 ms per token,   117.37 tokens per second)
llama_print_timings:        eval time =  4675.30 ms /    55 runs   (   85.01 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  5426.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie didn't attend the military representative, she saw her father's blood.



llama_print_timings:        load time =   605.66 ms
llama_print_timings:      sample time =    10.09 ms /    23 runs   (    0.44 ms per token,  2279.03 tokens per second)
llama_print_timings: prompt eval time =   605.62 ms /    43 tokens (   14.08 ms per token,    71.00 tokens per second)
llama_print_timings:        eval time =  1846.16 ms /    22 runs   (   83.92 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  2496.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

程丽华 silently looked at the document for a while and then put it back into her briefcase. She stood up slowly, her face still expressing a warm emotion that did not vanish despite her actions. As she moved to the corner where a bucket of water was kept, she picked it up, splashed half of the water on Le Wenjie's body and on the bedding sheets in one steady yet swift movement. With an angry remark, she threw the bucket out and went away.



llama_print_timings:        load time =   823.68 ms
llama_print_timings:      sample time =    47.30 ms /   107 runs   (    0.44 ms per token,  2262.25 tokens per second)
llama_print_timings: prompt eval time =   823.64 ms /   178 tokens (    4.63 ms per token,   216.11 tokens per second)
llama_print_timings:        eval time =  9371.92 ms /   106 runs   (   88.41 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 10407.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The warden of the detention center walked out last, and looked at wet clothes-stained Wenjie with a cold glance. “Thud”, he closed the door shut and locked it tightly.



llama_print_timings:        load time =   620.30 ms
llama_print_timings:      sample time =    19.75 ms /    45 runs   (    0.44 ms per token,  2278.94 tokens per second)
llama_print_timings: prompt eval time =   620.26 ms /    60 tokens (   10.34 ms per token,    96.73 tokens per second)
llama_print_timings:        eval time =  3737.73 ms /    44 runs   (   84.95 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  4444.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In this freezing winter in Inner Mongolia, the cold seeped through her wet clothes like a huge hand squeezing her. She could hear her own teeth chattering and eventually this sound disappeared as well. The deep cold permeating into her bones made the world before her become milky white. The entire universe was a massive ice block to which she, as the only living creature within it, was connected. Without firewood in hand, the little girl realized that all she had were illusions…



llama_print_timings:        load time =   733.57 ms
llama_print_timings:      sample time =    48.31 ms /   109 runs   (    0.44 ms per token,  2256.17 tokens per second)
llama_print_timings: prompt eval time =   733.52 ms /   128 tokens (    5.73 ms per token,   174.50 tokens per second)
llama_print_timings:        eval time =  9365.83 ms /   108 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 10317.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She was enveloped in a cloud of ice and then saw a building, from which she could see a girl waving a flag. Her slim frame and the expanse of the flag created an ironic contrast. That was Ye Wenjie's sister Yeyan Shu. Since he separated from his reactionary academic authorities, Ye Wenjie had never heard anything about her since she was brutally murdered in a mass campaign two years ago. In a daze, the flag-waving person changed into Li Baolin, whose glasses reflected the fire below; then again into Cheng Lei, into mother Sholing and even father. The flag-holder kept changing and waving the flag without stopping, like a eternal pendulum counting down Ye Wenjie's remaining life.



llama_print_timings:        load time =   840.66 ms
llama_print_timings:      sample time =    78.04 ms /   174 runs   (    0.45 ms per token,  2229.57 tokens per second)
llama_print_timings: prompt eval time =   840.62 ms /   187 tokens (    4.50 ms per token,   222.46 tokens per second)
llama_print_timings:        eval time = 15205.47 ms /   173 runs   (   87.89 ms per token,    11.38 tokens per second)
llama_print_timings:       total time = 16406.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The flag faded, everything faded, and that piece of universe-filled ice wrapped her up again, this time in black.



llama_print_timings:        load time =   627.26 ms
llama_print_timings:      sample time =    12.54 ms /    28 runs   (    0.45 ms per token,  2232.32 tokens per second)
llama_print_timings: prompt eval time =   627.23 ms /    60 tokens (   10.45 ms per token,    95.66 tokens per second)
llama_print_timings:        eval time =  2396.51 ms /    27 runs   (   88.76 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  3079.12 ms


translated 102.


In [None]:
translate_chapter(llm, "three_body", 5)

In [None]:
translate_chapter(llm, "three_body", 6)

In [18]:
for i in range(5, 37):
    translate_chapter(llm, "three_body", i)

llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

She had been in darkness for so long that she couldn't tell if it had only been a few moments or several days. The heavy crashing sounds reverberated from every direction, as though there were some immense mechanical gadgets digging under her very core of being. Despite the darkness, the sound seemed to become more and more real, and she finally realized that the source was not heaven nor hell. She made an effort to open her eyelids, which felt heavy as stones……First, she saw a lamp deep embedded within the ceiling, surrounded by iron mesh to protect it from impacts. It emitted dim light, and the ceiling seemed to be made of metal.



llama_print_timings:        load time =   840.15 ms
llama_print_timings:      sample time =    70.76 ms /   145 runs   (    0.49 ms per token,  2049.27 tokens per second)
llama_print_timings: prompt eval time =   840.11 ms /   170 tokens (    4.94 ms per token,   202.35 tokens per second)
llama_print_timings:        eval time = 12852.83 ms /   144 runs   (   89.26 ms per token,    11.20 tokens per second)
llama_print_timings:       total time = 14007.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She heard a male voice calling her name softly.



llama_print_timings:        load time =   588.15 ms
llama_print_timings:      sample time =     5.93 ms /    12 runs   (    0.49 ms per token,  2023.61 tokens per second)
llama_print_timings: prompt eval time =   588.13 ms /    40 tokens (   14.70 ms per token,    68.01 tokens per second)
llama_print_timings:        eval time =   925.84 ms /    11 runs   (   84.17 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  1540.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You're running a fever.



llama_print_timings:        load time =   641.33 ms
llama_print_timings:      sample time =     4.54 ms /     9 runs   (    0.50 ms per token,  1981.51 tokens per second)
llama_print_timings: prompt eval time =   641.31 ms /    40 tokens (   16.03 ms per token,    62.37 tokens per second)
llama_print_timings:        eval time =   675.68 ms /     8 runs   (   84.46 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1337.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Where are we?” Ye Wenjie asked helplessly, feeling that her voice wasn’t coming from herself.



llama_print_timings:        load time =   601.36 ms
llama_print_timings:      sample time =    12.54 ms /    28 runs   (    0.45 ms per token,  2233.21 tokens per second)
llama_print_timings: prompt eval time =   601.32 ms /    52 tokens (   11.56 ms per token,    86.48 tokens per second)
llama_print_timings:        eval time =  2295.02 ms /    27 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2953.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She felt a weakness when she was on the plane. Then, she fell asleep again and woke up with numbness disappearing and pain coming in turn: Joints of head and limbs were so painful that her breath was hot, mouth saliva is also very painful and thick.



llama_print_timings:        load time =   698.82 ms
llama_print_timings:      sample time =    29.40 ms /    65 runs   (    0.45 ms per token,  2210.66 tokens per second)
llama_print_timings: prompt eval time =   698.77 ms /   108 tokens (    6.47 ms per token,   154.56 tokens per second)
llama_print_timings:        eval time =  5463.07 ms /    64 runs   (   85.36 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  6293.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lei Wenjie turned around and saw two people dressed in the same military coat as Xiang Representative, only that they all wore red star-patterned hats with the open coats revealing the red patches on their army uniforms. One soldier was wearing glasses.



llama_print_timings:        load time =   688.27 ms
llama_print_timings:      sample time =    28.26 ms /    64 runs   (    0.44 ms per token,  2264.60 tokens per second)
llama_print_timings: prompt eval time =   688.23 ms /   102 tokens (    6.75 ms per token,   148.21 tokens per second)
llama_print_timings:        eval time =  5348.90 ms /    63 runs   (   84.90 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6160.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She tried to stand up, but she successfully did it. She saw the other side of the window, and outside was a roll of cloudy sea moved away by the sun; she immediately retracted her gaze and saw that the narrow cabin was full of army-green iron boxes. She guessed herself to be on a helicopter.



llama_print_timings:        load time =   701.33 ms
llama_print_timings:      sample time =    32.55 ms /    72 runs   (    0.45 ms per token,  2211.71 tokens per second)
llama_print_timings: prompt eval time =   701.30 ms /   114 tokens (    6.15 ms per token,   162.56 tokens per second)
llama_print_timings:        eval time =  6087.75 ms /    71 runs   (   85.74 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  6932.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Lie down, ” said the soldier with glasses. He helped her lie back down and cover her up with her coat.



llama_print_timings:        load time =   604.82 ms
llama_print_timings:      sample time =    12.80 ms /    28 runs   (    0.46 ms per token,  2187.33 tokens per second)
llama_print_timings: prompt eval time =   604.78 ms /    62 tokens (    9.75 ms per token,   102.52 tokens per second)
llama_print_timings:        eval time =  2318.87 ms /    27 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2980.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Ye Wenjie, is this paper yours?” another soldier asked her. He held out a translated Chinese science magazine to her. She saw that the title of the article in the issue was ‘The Possible Existence of Energy Interfaces within the Solar Radiation Layer and their Reflective Properties’. On the cover, she saw a photo of the 1966 issue of Celestial Physics Magazine.



llama_print_timings:        load time =   683.11 ms
llama_print_timings:      sample time =    40.01 ms /    90 runs   (    0.44 ms per token,  2249.55 tokens per second)
llama_print_timings: prompt eval time =   683.06 ms /   101 tokens (    6.76 ms per token,   147.86 tokens per second)
llama_print_timings:        eval time =  7593.97 ms /    89 runs   (   85.33 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  8453.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Of course, this is a no-brainer."



llama_print_timings:        load time =   587.95 ms
llama_print_timings:      sample time =     5.68 ms /    13 runs   (    0.44 ms per token,  2289.14 tokens per second)
llama_print_timings: prompt eval time =   587.91 ms /    51 tokens (   11.53 ms per token,    86.75 tokens per second)
llama_print_timings:        eval time =  1015.08 ms /    12 runs   (   84.59 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1628.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This is Red Bank Base's Li Zhi-cheng政委. I am Yang Hui-ning, the chief engineer of the base. In one hour, you can rest."



llama_print_timings:        load time =   610.79 ms
llama_print_timings:      sample time =    18.16 ms /    40 runs   (    0.45 ms per token,  2202.64 tokens per second)
llama_print_timings: prompt eval time =   610.75 ms /    61 tokens (   10.01 ms per token,    99.88 tokens per second)
llama_print_timings:        eval time =  3371.93 ms /    39 runs   (   86.46 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  4064.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are Yang Weining? Ye Wenjie didn't say anything, only looked at him with a surprised expression and realized that he was trying to hide the fact that they knew each other. Yang Weining once was my research supervisor, and he graduated when I entered college for my freshman year. I still remember how he first came to our home: then he had just been admitted to graduate school, and he visited with his professor to discuss their project proposal. He told me that he wanted to do something practical and applied instead of studying theoretical physics. My father replied at that time: I don't have any objections, but we are a Department of Theoretical Physics after all, how do you explain this request? Yang Weining answered: I want to contribute to the society in some way, which is also what I think is most important. My father then stayed silent for a while before finally saying: studying theoretical physics is not easy to make mistakes in thinking.



llama_print_timings:        load time =   896.26 ms
llama_print_timings:      sample time =    96.72 ms /   203 runs   (    0.48 ms per token,  2098.95 tokens per second)
llama_print_timings: prompt eval time =   896.21 ms /   220 tokens (    4.07 ms per token,   245.48 tokens per second)
llama_print_timings:        eval time = 18031.71 ms /   202 runs   (   89.27 ms per token,    11.20 tokens per second)
llama_print_timings:       total time = 19364.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Yang Weining is a very talented person with solid mathematical foundations and agile thinking. During his short graduate school career, he maintained an uncertain distance from his advisor and they remained strangers to each other. Yao Wenjie met Yang Weining frequently at that time, possibly influenced by her father. She paid little attention to him and didn't know if he had also paid attention to her. Later, Yang Weining graduated smoothly and broke off relations with his advisor soon after.



llama_print_timings:        load time =   746.05 ms
llama_print_timings:      sample time =    48.82 ms /   108 runs   (    0.45 ms per token,  2212.03 tokens per second)
llama_print_timings: prompt eval time =   746.02 ms /   123 tokens (    6.07 ms per token,   164.88 tokens per second)
llama_print_timings:        eval time =  9352.15 ms /   107 runs   (   87.40 ms per token,    11.44 tokens per second)
llama_print_timings:       total time = 10316.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After closing her eyes once more, Yue Wenjie was left alone in the small cabin. She could hear the engine's roar through the narrow space but managed to catch what two soldiers were discussing.



llama_print_timings:        load time =   630.16 ms
llama_print_timings:      sample time =    20.50 ms /    46 runs   (    0.45 ms per token,  2244.23 tokens per second)
llama_print_timings: prompt eval time =   630.12 ms /    76 tokens (    8.29 ms per token,   120.61 tokens per second)
llama_print_timings:        eval time =  3918.98 ms /    45 runs   (   87.09 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  4640.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I still think this is not a good idea.” This was Lei Zhiqing’s voice.



llama_print_timings:        load time =   579.63 ms
llama_print_timings:      sample time =    11.17 ms /    24 runs   (    0.47 ms per token,  2148.42 tokens per second)
llama_print_timings: prompt eval time =   579.59 ms /    45 tokens (   12.88 ms per token,    77.64 tokens per second)
llama_print_timings:        eval time =  1970.52 ms /    23 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2600.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A: “Can you provide what I need from a regular channel?”



llama_print_timings:        load time =   596.94 ms
llama_print_timings:      sample time =     7.19 ms /    15 runs   (    0.48 ms per token,  2087.39 tokens per second)
llama_print_timings: prompt eval time =   596.88 ms /    47 tokens (   12.70 ms per token,    78.74 tokens per second)
llama_print_timings:        eval time =  1254.13 ms /    14 runs   (   89.58 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  1884.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Well, I have worked very hard. This specialty is not available in the military and if you look for it from civilian sources, the problems get even worse as the confidentiality requirement for this project requires a base isolation work period for a long time. So what should be done about family members who follow army along? They can't go to the base either because nobody wants them there.” “You can always force them to come here but with this kind of job, if they are not comfortable it is impossible for them to do anything.”



llama_print_timings:        load time =   740.22 ms
llama_print_timings:      sample time =    53.98 ms /   114 runs   (    0.47 ms per token,  2111.89 tokens per second)
llama_print_timings: prompt eval time =   740.18 ms /   134 tokens (    5.52 ms per token,   181.04 tokens per second)
llama_print_timings:        eval time = 10004.42 ms /   113 runs   (   88.53 ms per token,    11.30 tokens per second)
llama_print_timings:       total time = 10988.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thus, the only solution is this.



llama_print_timings:        load time =   583.25 ms
llama_print_timings:      sample time =     4.40 ms /    10 runs   (    0.44 ms per token,  2271.18 tokens per second)
llama_print_timings: prompt eval time =   583.22 ms /    36 tokens (   16.20 ms per token,    61.73 tokens per second)
llama_print_timings:        eval time =   764.31 ms /     9 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1367.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's completely unconventional.



llama_print_timings:        load time =   576.21 ms
llama_print_timings:      sample time =     4.45 ms /    10 runs   (    0.44 ms per token,  2249.72 tokens per second)
llama_print_timings: prompt eval time =   576.16 ms /    38 tokens (   15.16 ms per token,    65.95 tokens per second)
llama_print_timings:        eval time =   790.04 ms /     9 runs   (   87.78 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  1385.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This project goes against the norm and if anything goes wrong, I will be responsible.”



llama_print_timings:        load time =   589.55 ms
llama_print_timings:      sample time =     8.36 ms /    19 runs   (    0.44 ms per token,  2273.27 tokens per second)
llama_print_timings: prompt eval time =   589.51 ms /    44 tokens (   13.40 ms per token,    74.64 tokens per second)
llama_print_timings:        eval time =  1545.82 ms /    18 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2171.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

My Yang Boss, can you handle this responsibility? You are buried in technology, and “Red Bank” is different from other national defense priority projects. It is complex on technical level and outside it is even more complex.



llama_print_timings:        load time =   625.23 ms
llama_print_timings:      sample time =    20.57 ms /    46 runs   (    0.45 ms per token,  2235.83 tokens per second)
llama_print_timings: prompt eval time =   625.18 ms /    71 tokens (    8.81 ms per token,   113.57 tokens per second)
llama_print_timings:        eval time =  3878.93 ms /    45 runs   (   86.20 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  4595.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's a fact.



llama_print_timings:        load time =   573.37 ms
llama_print_timings:      sample time =     3.17 ms /     7 runs   (    0.45 ms per token,  2211.69 tokens per second)
llama_print_timings: prompt eval time =   573.35 ms /    36 tokens (   15.93 ms per token,    62.79 tokens per second)
llama_print_timings:        eval time =   515.73 ms /     6 runs   (   85.95 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  1103.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When the plane landed in the evening, Ye Wenjie declined Yang Weining and Lei Zhicheng's assistance as they tried to help her down. She struggled to walk down the aircraft while a strong wind tried to knock her down, causing an audible howl from the still rotating blades of the rotor. The smell of forest in the wind was familiar to Ye Wenjie; she knew this wind and vice versa. It was the wind of Baichengling Mountain.



llama_print_timings:        load time =   681.76 ms
llama_print_timings:      sample time =    50.23 ms /   110 runs   (    0.46 ms per token,  2190.14 tokens per second)
llama_print_timings: prompt eval time =   681.72 ms /   111 tokens (    6.14 ms per token,   162.82 tokens per second)
llama_print_timings:        eval time =  9534.70 ms /   109 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 10443.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She soon heard another sound, a deep and rumbling hum that formed the backdrop to the world. This was the sound of the parabolic antennae in the wind some distance away. Only when it was closer did she truly feel the immense size of the radar array. Ye Wenjie's life had circled back to this spot—now she was at the top of the peak.



llama_print_timings:        load time =   696.64 ms
llama_print_timings:      sample time =    39.23 ms /    86 runs   (    0.46 ms per token,  2192.20 tokens per second)
llama_print_timings: prompt eval time =   696.60 ms /   108 tokens (    6.45 ms per token,   155.04 tokens per second)
llama_print_timings:        eval time =  7344.71 ms /    85 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  8219.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving her construction battalion in the direction of a forest, she could only see a murky sea of trees.



llama_print_timings:        load time =   630.80 ms
llama_print_timings:      sample time =    11.11 ms /    25 runs   (    0.44 ms per token,  2250.43 tokens per second)
llama_print_timings: prompt eval time =   630.77 ms /    61 tokens (   10.34 ms per token,    96.71 tokens per second)
llama_print_timings:        eval time =  2111.97 ms /    24 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  2792.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The helicopter clearly wasn't meant for her, and several soldiers passed by as they rolled out the military-green crates from the helicopter. No one looked at her at all, and she went on with Reyes, Yang Wenning, and three others toward the base's main gate. Ye Wenjie noticed how wide was the radar peak below, and how a small cluster of white buildings looked like an elaborate puzzle next to it. They continued walking towards the gates guarded by two soldiers.



llama_print_timings:        load time =   765.41 ms
llama_print_timings:      sample time =    50.33 ms /   111 runs   (    0.45 ms per token,  2205.36 tokens per second)
llama_print_timings: prompt eval time =   765.37 ms /   145 tokens (    5.28 ms per token,   189.45 tokens per second)
llama_print_timings:        eval time =  9725.48 ms /   110 runs   (   88.41 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 10716.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Zhicheng turned to Ye Wenjie and said gravely, “Ye Wenjie, your counterrevolutionary crimes are beyond doubt, and you will have to face the trial according to law. Here, you have a chance to atone for your crime by helping with a scientific research project of national defense that needs your expertise, specifically, please Yang Jianengineer explain it to you in detail. You should consider this carefully.” He pointed toward the satellite and then nodded to say goodbye to Yang Wenning before entering the base with the soldiers carrying the materials.



llama_print_timings:        load time =   771.57 ms
llama_print_timings:      sample time =    57.52 ms /   127 runs   (    0.45 ms per token,  2207.74 tokens per second)
llama_print_timings: prompt eval time =   771.53 ms /   148 tokens (    5.21 ms per token,   191.83 tokens per second)
llama_print_timings:        eval time = 11092.30 ms /   126 runs   (   88.03 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 12125.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Young Wang Ning and the others walked away. Yang Wei Ning gestured for Lei Wen Jie to follow him further, apparently so as not to be heard by the guards. At this point, he no longer concealed their acquaintance: “Lei Wen Jie, I can clearly tell you, this is not an opportunity.” He turned his head toward the base and explained to her, “It’s a top-secret research project and, with your identity, entering that door would…” He paused for a while before adding, seemingly to intensify the sound of the antenna on the wind, “probably permanently.”



llama_print_timings:        load time =   844.03 ms
llama_print_timings:      sample time =    61.48 ms /   138 runs   (    0.45 ms per token,  2244.52 tokens per second)
llama_print_timings: prompt eval time =   843.98 ms /   191 tokens (    4.42 ms per token,   226.31 tokens per second)
llama_print_timings:        eval time = 12119.24 ms /   137 runs   (   88.46 ms per token,    11.30 tokens per second)
llama_print_timings:       total time = 13240.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I'm going in.” said Ye Wenjie softly.



llama_print_timings:        load time =   598.00 ms
llama_print_timings:      sample time =     7.84 ms /    17 runs   (    0.46 ms per token,  2167.54 tokens per second)
llama_print_timings: prompt eval time =   597.98 ms /    41 tokens (   14.58 ms per token,    68.56 tokens per second)
llama_print_timings:        eval time =  1381.69 ms /    16 runs   (   86.36 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  2014.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining was surprised by her quick response. “You don't have to make a decision this quickly, you can go back on the plane first, it will take off three hours later and I'll give you a ride back.”



llama_print_timings:        load time =   633.02 ms
llama_print_timings:      sample time =    23.24 ms /    51 runs   (    0.46 ms per token,  2194.30 tokens per second)
llama_print_timings: prompt eval time =   632.98 ms /    70 tokens (    9.04 ms per token,   110.59 tokens per second)
llama_print_timings:        eval time =  4334.29 ms /    50 runs   (   86.69 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  5073.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't want to go back, let's go in.” Lei Wenjie's voice remained light, but there was a firmness in it. Now besides the world after death she did not know whether there is another place, she most wants to go to that place with isolation from the world on the top of the mountain, where she has a kind of familiar safety.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Be careful, you need to think about what this means.



llama_print_timings:        load time =   590.52 ms
llama_print_timings:      sample time =     6.62 ms /    13 runs   (    0.51 ms per token,  1964.93 tokens per second)
llama_print_timings: prompt eval time =   590.50 ms /    41 tokens (   14.40 ms per token,    69.43 tokens per second)
llama_print_timings:        eval time =  1082.40 ms /    12 runs   (   90.20 ms per token,    11.09 tokens per second)
llama_print_timings:       total time =  1703.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I can stay here for a long time.



llama_print_timings:        load time =   592.71 ms
llama_print_timings:      sample time =     4.44 ms /    10 runs   (    0.44 ms per token,  2252.76 tokens per second)
llama_print_timings: prompt eval time =   592.67 ms /    38 tokens (   15.60 ms per token,    64.12 tokens per second)
llama_print_timings:        eval time =   753.27 ms /     9 runs   (   83.70 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  1366.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Yang Weining bowed his head in silence, looking far off. He seems to be forcing Yang Wei Ning to think for a while before he started. Ye Wenjie stood silently watching the distant place where Dawson's peak disappeared into thick nights. In this freezing weather, there is no much time, and Yang Weining decided to leave quickly. Like pushing off Yang Wei Ning in his haste. However, Yang Wei Ning紧ly followed him and entered the Red Bank Base through the gate. Two guards slammed the heavy doors shut after they passed by them.



llama_print_timings:        load time =   765.28 ms
llama_print_timings:      sample time =    63.37 ms /   127 runs   (    0.50 ms per token,  2004.20 tokens per second)
llama_print_timings: prompt eval time =   765.23 ms /   147 tokens (    5.21 ms per token,   192.10 tokens per second)
llama_print_timings:        eval time = 11140.51 ms /   126 runs   (   88.42 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 12197.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After walking a while, Yang Weining stopped and pointed at the antenna to Wen Jie. “This is a large-scale weapons research project that could be more significant than an atomic bomb or hydrogen bomb.”



llama_print_timings:        load time =   658.55 ms
llama_print_timings:      sample time =    20.34 ms /    46 runs   (    0.44 ms per token,  2261.11 tokens per second)
llama_print_timings: prompt eval time =   658.52 ms /    71 tokens (    9.27 ms per token,   107.82 tokens per second)
llama_print_timings:        eval time =  3949.70 ms /    45 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  4699.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he passed by the largest building in the base, Yang Wenning straightened and pushed open the door. Ye Wenjie saw the words "Launch Master Control Room" on the door when she reached the gate, then entered. As soon as she stepped inside, a hot and oily aroma welcomed her. She saw a large hall filled with various equipment and signal lights flashing on show screens. Dozens of dressed in military attire's operation staff were sitting in densely arranged rows of instruments before them, seemingly hidden behind the deep trench. Operational commands come and go, appearing to be tense and chaotic. "It's warmer in here. You wait a while, I will arrange your quarters then come back." Yang Wenning said to Ye Wenjie, pointing to a chair next to a table where someone was already sitting with a gun.



llama_print_timings:        load time =   905.30 ms
llama_print_timings:      sample time =   132.55 ms /   192 runs   (    0.69 ms per token,  1448.48 tokens per second)
llama_print_timings: prompt eval time =   905.25 ms /   218 tokens (    4.15 ms per token,   240.82 tokens per second)
llama_print_timings:        eval time = 18950.31 ms /   191 runs   (   99.22 ms per token,    10.08 tokens per second)
llama_print_timings:       total time = 20459.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I'll wait outside,” said Ye Wenjie, stopping in her tracks.



llama_print_timings:        load time =   617.45 ms
llama_print_timings:      sample time =     9.48 ms /    20 runs   (    0.47 ms per token,  2109.48 tokens per second)
llama_print_timings: prompt eval time =   617.41 ms /    44 tokens (   14.03 ms per token,    71.27 tokens per second)
llama_print_timings:        eval time =  1700.86 ms /    19 runs   (   89.52 ms per token,    11.17 tokens per second)
llama_print_timings:       total time =  2362.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining smiled and said, "You will be a staff member of the base from now on. You can go anywhere except for a few restricted places." Then he had a look that suggested hesitation about this statement's other meaning: you cannot leave here anymore.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“I'm still going outside.” Lei Wenjie insisted.



llama_print_timings:        load time =   617.47 ms
llama_print_timings:      sample time =     8.18 ms /    18 runs   (    0.45 ms per token,  2199.68 tokens per second)
llama_print_timings: prompt eval time =   617.43 ms /    42 tokens (   14.70 ms per token,    68.02 tokens per second)
llama_print_timings:        eval time =  1537.69 ms /    17 runs   (   90.45 ms per token,    11.06 tokens per second)
llama_print_timings:       total time =  2192.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Wenning looked at the guard who didn't pay attention to them, it seems he understood Ye Wenjie. She walked out of the control room with him "You come to this place while I get back in a few minutes. We need someone to heat the room quickly as our base is still not good enough for warmth." said Yang Wenning before walking away fast.



llama_print_timings:        load time =   694.69 ms
llama_print_timings:      sample time =    52.13 ms /    83 runs   (    0.63 ms per token,  1592.30 tokens per second)
llama_print_timings: prompt eval time =   694.65 ms /   101 tokens (    6.88 ms per token,   145.40 tokens per second)
llama_print_timings:        eval time =  7805.23 ms /    82 runs   (   95.19 ms per token,    10.51 tokens per second)
llama_print_timings:       total time =  8734.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Hello, Miss Ye. I am the computer program. Welcome to CyberWorld.”



llama_print_timings:        load time =   724.70 ms
llama_print_timings:      sample time =     8.44 ms /    19 runs   (    0.44 ms per token,  2251.72 tokens per second)
llama_print_timings: prompt eval time =   724.66 ms /   112 tokens (    6.47 ms per token,   154.55 tokens per second)
llama_print_timings:        eval time =  1594.52 ms /    18 runs   (   88.58 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  2356.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The People's Liberation Army Rocket Force, Red Bottom Engineering No. 147 conducted a regular firing operation and the authorization confirmation has been completed in 30 seconds.



llama_print_timings:        load time =   626.65 ms
llama_print_timings:      sample time =    19.19 ms /    41 runs   (    0.47 ms per token,  2136.31 tokens per second)
llama_print_timings: prompt eval time =   626.61 ms /    57 tokens (   10.99 ms per token,    90.97 tokens per second)
llama_print_timings:        eval time =  3556.85 ms /    40 runs   (   88.92 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  4273.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Target Category: Class III; Coordinate Sequence Number: BN20l97F; Location check finished, 25 seconds remaining!”



llama_print_timings:        load time =   639.22 ms
llama_print_timings:      sample time =    15.25 ms /    33 runs   (    0.46 ms per token,  2164.08 tokens per second)
llama_print_timings: prompt eval time =   639.18 ms /    61 tokens (   10.48 ms per token,    95.44 tokens per second)
llama_print_timings:        eval time =  2860.18 ms /    32 runs   (   89.38 ms per token,    11.19 tokens per second)
llama_print_timings:       total time =  3569.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Document number: 22; Additional information: None; Continuous transmission: No; Final document checked, 20 seconds count down!



llama_print_timings:        load time =   619.95 ms
llama_print_timings:      sample time =    14.90 ms /    32 runs   (    0.47 ms per token,  2147.36 tokens per second)
llama_print_timings: prompt eval time =   619.91 ms /    59 tokens (   10.51 ms per token,    95.17 tokens per second)
llama_print_timings:        eval time =  2778.89 ms /    31 runs   (   89.64 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  3466.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Energy report, all right!”



llama_print_timings:        load time =   591.50 ms
llama_print_timings:      sample time =     4.60 ms /    10 runs   (    0.46 ms per token,  2174.86 tokens per second)
llama_print_timings: prompt eval time =   591.46 ms /    37 tokens (   15.99 ms per token,    62.56 tokens per second)
llama_print_timings:        eval time =   849.44 ms /     9 runs   (   94.38 ms per token,    10.60 tokens per second)
llama_print_timings:       total time =  1461.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Encoding report: Normal.”



llama_print_timings:        load time =   587.35 ms
llama_print_timings:      sample time =     3.56 ms /     8 runs   (    0.45 ms per token,  2245.93 tokens per second)
llama_print_timings: prompt eval time =   587.33 ms /    37 tokens (   15.87 ms per token,    63.00 tokens per second)
llama_print_timings:        eval time =   601.10 ms /     7 runs   (   85.87 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1203.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The amplifier report is normal.”



llama_print_timings:        load time =   665.71 ms
llama_print_timings:      sample time =     8.27 ms /     9 runs   (    0.92 ms per token,  1088.80 tokens per second)
llama_print_timings: prompt eval time =   665.67 ms /    38 tokens (   17.52 ms per token,    57.09 tokens per second)
llama_print_timings:        eval time =   824.88 ms /     8 runs   (  103.11 ms per token,     9.70 tokens per second)
llama_print_timings:       total time =  1529.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Monitoring report: Within the scope of authorization!



llama_print_timings:        load time =   588.42 ms
llama_print_timings:      sample time =     5.31 ms /    12 runs   (    0.44 ms per token,  2261.16 tokens per second)
llama_print_timings: prompt eval time =   588.38 ms /    39 tokens (   15.09 ms per token,    66.28 tokens per second)
llama_print_timings:        eval time =   928.23 ms /    11 runs   (   84.38 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  1540.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The program is not reversible, count down from 5 seconds!



llama_print_timings:        load time =   593.43 ms
llama_print_timings:      sample time =     6.62 ms /    15 runs   (    0.44 ms per token,  2267.57 tokens per second)
llama_print_timings: prompt eval time =   593.38 ms /    41 tokens (   14.47 ms per token,    69.10 tokens per second)
llama_print_timings:        eval time =  1214.48 ms /    14 runs   (   86.75 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  1837.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Everything quietened down, and after about fifteen seconds, with a loud bell sound, the red light on the overhead wires began to flash rapidly.



llama_print_timings:        load time =   629.14 ms
llama_print_timings:      sample time =    18.99 ms /    33 runs   (    0.58 ms per token,  1737.67 tokens per second)
llama_print_timings: prompt eval time =   629.10 ms /    55 tokens (   11.44 ms per token,    87.43 tokens per second)
llama_print_timings:        eval time =  2840.22 ms /    32 runs   (   88.76 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  3553.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Launching! All units, pay attention to monitoring!”



llama_print_timings:        load time =   594.46 ms
llama_print_timings:      sample time =     6.03 ms /    13 runs   (    0.46 ms per token,  2155.17 tokens per second)
llama_print_timings: prompt eval time =   594.44 ms /    39 tokens (   15.24 ms per token,    65.61 tokens per second)
llama_print_timings:        eval time =  1020.02 ms /    12 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  1642.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She felt a slight itch on her face and knew that an enormous electric field had appeared. She looked up through the antenna's direction, seeing a thin cloud of light in the night sky that seemed faint but nonetheless flickered at her glance. The light disappeared after the cloud moved far away from its point of origin, leaving another light-emitting cloud behind. In the control room, orders were being barked out, though she could only hear the few words:



llama_print_timings:        load time =   759.35 ms
llama_print_timings:      sample time =    45.37 ms /   102 runs   (    0.44 ms per token,  2248.08 tokens per second)
llama_print_timings: prompt eval time =   759.30 ms /   144 tokens (    5.27 ms per token,   189.65 tokens per second)
llama_print_timings:        eval time =  8798.87 ms /   101 runs   (   87.12 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  9761.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The power amplifier failed and the third magnetron has burned!



llama_print_timings:        load time =   606.08 ms
llama_print_timings:      sample time =     7.21 ms /    15 runs   (    0.48 ms per token,  2081.31 tokens per second)
llama_print_timings: prompt eval time =   606.05 ms /    45 tokens (   13.47 ms per token,    74.25 tokens per second)
llama_print_timings:        eval time =  1196.19 ms /    14 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  1834.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The redundancy unit has been successfully launched!”



llama_print_timings:        load time =   584.04 ms
llama_print_timings:      sample time =     4.94 ms /    11 runs   (    0.45 ms per token,  2227.62 tokens per second)
llama_print_timings: prompt eval time =   584.00 ms /    37 tokens (   15.78 ms per token,    63.36 tokens per second)
llama_print_timings:        eval time =   854.06 ms /    10 runs   (   85.41 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1459.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Breakpoint set at line 1, continue.”



llama_print_timings:        load time =   592.02 ms
llama_print_timings:      sample time =     5.77 ms /    13 runs   (    0.44 ms per token,  2251.86 tokens per second)
llama_print_timings: prompt eval time =   591.98 ms /    39 tokens (   15.18 ms per token,    65.88 tokens per second)
llama_print_timings:        eval time =  1016.97 ms /    12 runs   (   84.75 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  1634.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

……



llama_print_timings:        load time =  1068.23 ms
llama_print_timings:      sample time =     1.37 ms /     3 runs   (    0.46 ms per token,  2189.78 tokens per second)
llama_print_timings: prompt eval time =  1068.18 ms /    31 tokens (   34.46 ms per token,    29.02 tokens per second)
llama_print_timings:        eval time =   179.19 ms /     2 runs   (   89.59 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  1253.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

叶文洁 heard another "呼啦啦" sound, and in a hazy state, she saw the dark shadows emerge from the dense forest at the foot of the mountain, hovering to the sky above before they crashed into it. She was not expecting that so many birds would be awaken in the cold winter forest. Then she witnessed a horrible incident: a bird flock flew into the range directed by the antenna, and as the "yuèguāng" light shone on the background, she clearly saw each bird falling from the sky.



llama_print_timings:        load time =   702.86 ms
llama_print_timings:      sample time =    64.92 ms /   122 runs   (    0.53 ms per token,  1879.38 tokens per second)
llama_print_timings: prompt eval time =   702.82 ms /   123 tokens (    5.71 ms per token,   175.01 tokens per second)
llama_print_timings:        eval time = 10831.08 ms /   121 runs   (   89.51 ms per token,    11.17 tokens per second)
llama_print_timings:       total time = 11829.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This process lasted about fifteen minutes, and the red light on the track faded away, while Ye Wenjie's itching sensation vanished. In the control room, chaotic orders still rained down, although the loud male voice had stopped after a few moments.



llama_print_timings:        load time =   659.53 ms
llama_print_timings:      sample time =    28.02 ms /    61 runs   (    0.46 ms per token,  2177.09 tokens per second)
llama_print_timings: prompt eval time =   659.49 ms /    82 tokens (    8.04 ms per token,   124.34 tokens per second)
llama_print_timings:        eval time =  5283.95 ms /    60 runs   (   88.07 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  6069.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The 147th launch of the Hongbian project has been completed. The launch system has been shut down, and Hongbian has entered a monitoring state. Please pass control to the monitoring department and upload data about broken points.



llama_print_timings:        load time =   642.25 ms
llama_print_timings:      sample time =    22.31 ms /    48 runs   (    0.46 ms per token,  2151.89 tokens per second)
llama_print_timings: prompt eval time =   642.21 ms /    68 tokens (    9.44 ms per token,   105.88 tokens per second)
llama_print_timings:        eval time =  4139.31 ms /    47 runs   (   88.07 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  4883.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Please fill in the launch log carefully and attend the launching meeting with your team leader. Thank you.



llama_print_timings:        load time =   605.30 ms
llama_print_timings:      sample time =    10.02 ms /    22 runs   (    0.46 ms per token,  2196.05 tokens per second)
llama_print_timings: prompt eval time =   605.27 ms /    53 tokens (   11.42 ms per token,    87.56 tokens per second)
llama_print_timings:        eval time =  1791.46 ms /    21 runs   (   85.31 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  2441.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Everything is quiet, only the antennae still emitting echoes in the wind. Ye Wenjie looked up at the night sky and saw that the bird flock had returned to the forest. She looked at the antennas once more, feeling like they were a huge hand spreading out towards the infinite sky, possessing an extraordinary power. She looked at the "palm" facing the night sky and didn't see the BN20197F target destroyed by it among sparse cloud layers behind. Only 1969 was cold in the starry sky.



llama_print_timings:        load time =   746.19 ms
llama_print_timings:      sample time =    55.25 ms /   124 runs   (    0.45 ms per token,  2244.18 tokens per second)
llama_print_timings: prompt eval time =   746.15 ms /   136 tokens (    5.49 ms per token,   182.27 tokens per second)
llama_print_timings:        eval time = 10663.78 ms /   123 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 11662.66 ms


translated 61.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Thirty-eight years later.



llama_print_timings:        load time =   590.00 ms
llama_print_timings:      sample time =     4.11 ms /     9 runs   (    0.46 ms per token,  2191.38 tokens per second)
llama_print_timings: prompt eval time =   589.97 ms /    33 tokens (   17.88 ms per token,    55.94 tokens per second)
llama_print_timings:        eval time =   676.61 ms /     8 runs   (   84.58 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1283.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao thought that the four people who came to find him were a strange combination of two police officers and two military officers, if those two military officers were normal W.Z.D, but these were two army officers.



llama_print_timings:        load time =   627.79 ms
llama_print_timings:      sample time =    22.69 ms /    48 runs   (    0.47 ms per token,  2115.66 tokens per second)
llama_print_timings: prompt eval time =   627.77 ms /    66 tokens (    9.51 ms per token,   105.13 tokens per second)
llama_print_timings:        eval time =  4083.21 ms /    47 runs   (   86.88 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  4808.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao had a bad first impression of the police who came to see him. In fact, that young man in uniform was alright; he behaved very politely and did not arouse hatred in Wang Miao's heart. But the plainclothes officer gave off a strong smell of smoke, his skin is red and fat, with a loud voice like an animal, and his appearance was even worse than expected.



llama_print_timings:        load time =   685.81 ms
llama_print_timings:      sample time =    41.41 ms /    92 runs   (    0.45 ms per token,  2221.79 tokens per second)
llama_print_timings: prompt eval time =   685.77 ms /   110 tokens (    6.23 ms per token,   160.40 tokens per second)
llama_print_timings:        eval time =  8056.37 ms /    91 runs   (   88.53 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  8922.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Hey, Wang Miao?” the man asked directly calling out his name which made Wang Miao very uncomfortable. He didn’t even look up at all before turning to the younger guy and motioning him to follow him. The latter took out a policeman id and showed it to Wang Miao.



llama_print_timings:        load time =   672.67 ms
llama_print_timings:      sample time =    30.68 ms /    68 runs   (    0.45 ms per token,  2216.72 tokens per second)
llama_print_timings: prompt eval time =   672.62 ms /   104 tokens (    6.47 ms per token,   154.62 tokens per second)
llama_print_timings:        eval time =  5762.83 ms /    67 runs   (   86.01 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  6573.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Don't smoke in my house, please.



llama_print_timings:        load time =   609.12 ms
llama_print_timings:      sample time =     5.52 ms /    11 runs   (    0.50 ms per token,  1993.48 tokens per second)
llama_print_timings: prompt eval time =   609.09 ms /    44 tokens (   13.84 ms per token,    72.24 tokens per second)
llama_print_timings:        eval time =   886.42 ms /    10 runs   (   88.64 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  1520.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Oh, I'm sorry, Professor Wang. This is Captain Shiqiang." The young officer smiled and gave a nod to the name "Shi".



llama_print_timings:        load time =   618.33 ms
llama_print_timings:      sample time =    15.77 ms /    35 runs   (    0.45 ms per token,  2218.98 tokens per second)
llama_print_timings: prompt eval time =   618.28 ms /    60 tokens (   10.30 ms per token,    97.04 tokens per second)
llama_print_timings:        eval time =  3043.66 ms /    34 runs   (   89.52 ms per token,    11.17 tokens per second)
llama_print_timings:       total time =  3733.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yeah, let's talk in the hallway.



llama_print_timings:        load time =   661.68 ms
llama_print_timings:      sample time =    14.60 ms /    14 runs   (    1.04 ms per token,   959.17 tokens per second)
llama_print_timings: prompt eval time =   661.64 ms /    69 tokens (    9.59 ms per token,   104.29 tokens per second)
llama_print_timings:        eval time =  1339.44 ms /    13 runs   (  103.03 ms per token,     9.71 tokens per second)
llama_print_timings:       total time =  2064.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He turned his head slightly towards the young policeman.



llama_print_timings:        load time =   608.93 ms
llama_print_timings:      sample time =     5.34 ms /    12 runs   (    0.44 ms per token,  2247.61 tokens per second)
llama_print_timings: prompt eval time =   608.88 ms /    42 tokens (   14.50 ms per token,    68.98 tokens per second)
llama_print_timings:        eval time =   922.40 ms /    11 runs   (   83.86 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1554.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Professor Wang, have you had any contact with members of the Society for Science Boundaries lately?"



llama_print_timings:        load time =   654.43 ms
llama_print_timings:      sample time =    18.41 ms /    25 runs   (    0.74 ms per token,  1357.59 tokens per second)
llama_print_timings: prompt eval time =   654.39 ms /    54 tokens (   12.12 ms per token,    82.52 tokens per second)
llama_print_timings:        eval time =  2263.95 ms /    24 runs   (   94.33 ms per token,    10.60 tokens per second)
llama_print_timings:       total time =  2997.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I am not allowed to contact this influential academic organization, called "Science Boundaries," with its members being prominent scholars. How can that be?



llama_print_timings:        load time =   621.24 ms
llama_print_timings:      sample time =    15.89 ms /    35 runs   (    0.45 ms per token,  2203.20 tokens per second)
llama_print_timings: prompt eval time =   621.20 ms /    63 tokens (    9.86 ms per token,   101.42 tokens per second)
llama_print_timings:        eval time =  2929.73 ms /    34 runs   (   86.17 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3620.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Look at you!” said Shiqiang angrily, “Is it against the law for us to talk about him? Did we say we can’t have contact with him?” His smoking habits seemed to be his own business as he spewed out the cigarette smoke in his mouth into Wang Meng's face.



llama_print_timings:        load time =   642.47 ms
llama_print_timings:      sample time =    32.35 ms /    72 runs   (    0.45 ms per token,  2225.38 tokens per second)
llama_print_timings: prompt eval time =   642.43 ms /    74 tokens (    8.68 ms per token,   115.19 tokens per second)
llama_print_timings:        eval time =  6270.43 ms /    71 runs   (   88.32 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  7053.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's okay, this is my personal information and I don't need to answer your questions.



llama_print_timings:        load time =   581.71 ms
llama_print_timings:      sample time =     9.80 ms /    22 runs   (    0.45 ms per token,  2245.59 tokens per second)
llama_print_timings: prompt eval time =   581.66 ms /    45 tokens (   12.93 ms per token,    77.36 tokens per second)
llama_print_timings:        eval time =  1847.98 ms /    21 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  2472.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Everything is private now, like a famous scholar you should be responsible for public safety.” said Shi Qiang as he put out his cigarette and picked another one from the bent up cigarette box.



llama_print_timings:        load time =   624.91 ms
llama_print_timings:      sample time =    21.25 ms /    47 runs   (    0.45 ms per token,  2211.56 tokens per second)
llama_print_timings: prompt eval time =   624.87 ms /    73 tokens (    8.56 ms per token,   116.83 tokens per second)
llama_print_timings:        eval time =  3870.21 ms /    46 runs   (   84.13 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  4589.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I have the right not to respond, ” Wang Moxi said and turned around to go back into her room.



llama_print_timings:        load time =   606.31 ms
llama_print_timings:      sample time =    11.68 ms /    26 runs   (    0.45 ms per token,  2226.03 tokens per second)
llama_print_timings: prompt eval time =   606.26 ms /    52 tokens (   11.66 ms per token,    85.77 tokens per second)
llama_print_timings:        eval time =  2071.95 ms /    25 runs   (   82.88 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =  2729.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Wait a minute!” Shi Qiang said sharply, and waved his hand toward the young police officer next to him. “Give me their address and phone number. We’ll go over there together tomorrow afternoon.”



llama_print_timings:        load time =   643.73 ms
llama_print_timings:      sample time =    37.10 ms /    50 runs   (    0.74 ms per token,  1347.85 tokens per second)
llama_print_timings: prompt eval time =   643.70 ms /    61 tokens (   10.55 ms per token,    94.76 tokens per second)
llama_print_timings:        eval time =  4609.55 ms /    49 runs   (   94.07 ms per token,    10.63 tokens per second)
llama_print_timings:       total time =  5421.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What are you doing?” Wang Mo asked angrily, which made the neighbors come out to see what was going on.



llama_print_timings:        load time =   600.32 ms
llama_print_timings:      sample time =    12.71 ms /    28 runs   (    0.45 ms per token,  2202.30 tokens per second)
llama_print_timings: prompt eval time =   600.28 ms /    55 tokens (   10.91 ms per token,    91.62 tokens per second)
llama_print_timings:        eval time =  2365.75 ms /    27 runs   (   87.62 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  3022.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Shit! You, you bastard!” the young policeman shouted angrily as he grabbed Shi Qian by the arm and dragged him to a secluded spot. Obviously not just Miao Mu was uncomfortable with his crude remarks.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Professor Wang, please don’t take it personally.” A major officer hurried forward. “There is an important meeting this afternoon and we have been asked to invite several scholars and experts. Our superior gave us this directive.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

I am very busy this afternoon.



llama_print_timings:        load time =   584.77 ms
llama_print_timings:      sample time =     3.59 ms /     8 runs   (    0.45 ms per token,  2229.03 tokens per second)
llama_print_timings: prompt eval time =   584.72 ms /    35 tokens (   16.71 ms per token,    59.86 tokens per second)
llama_print_timings:        eval time =   615.05 ms /     7 runs   (   87.86 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  1215.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's clear. The head has already contacted the leaders of the superconducting center to inform them that you must be present at this meeting. If necessary, we can just postpone the meeting for you.



llama_print_timings:        load time =   623.33 ms
llama_print_timings:      sample time =    20.84 ms /    47 runs   (    0.44 ms per token,  2254.74 tokens per second)
llama_print_timings: prompt eval time =   623.31 ms /    64 tokens (    9.74 ms per token,   102.68 tokens per second)
llama_print_timings:        eval time =  4029.43 ms /    46 runs   (   87.60 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  4743.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He and his colleagues didn't say anything, turned and went downstairs. The two officers looked at them as they walked away, it seemed like both of them had a deep breath.



llama_print_timings:        load time =   609.39 ms
llama_print_timings:      sample time =    18.13 ms /    41 runs   (    0.44 ms per token,  2261.82 tokens per second)
llama_print_timings: prompt eval time =   609.35 ms /    56 tokens (   10.88 ms per token,    91.90 tokens per second)
llama_print_timings:        eval time =  3465.25 ms /    40 runs   (   86.63 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  4154.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“He’s such a jerk.” The lieutenant quietly said to his colleague.



llama_print_timings:        load time =   584.71 ms
llama_print_timings:      sample time =     8.70 ms /    19 runs   (    0.46 ms per token,  2183.91 tokens per second)
llama_print_timings: prompt eval time =   584.66 ms /    45 tokens (   12.99 ms per token,    76.97 tokens per second)
llama_print_timings:        eval time =  1501.34 ms /    18 runs   (   83.41 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  2125.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“He had a bad record. In the past, he acted recklessly when attempting to rescue hostages despite their lives and led to the deaths of an entire family. He also seemed to have a relationship with a criminal gang and used them to handle other competitors. Last year, he abused his position by torturing a prisoner, causing him to suffer physical injury.”



llama_print_timings:        load time =   692.71 ms
llama_print_timings:      sample time =    35.71 ms /    77 runs   (    0.46 ms per token,  2156.50 tokens per second)
llama_print_timings: prompt eval time =   692.67 ms /   107 tokens (    6.47 ms per token,   154.47 tokens per second)
llama_print_timings:        eval time =  6656.98 ms /    76 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  7510.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Such a person can't enter the combat center.



llama_print_timings:        load time =   585.93 ms
llama_print_timings:      sample time =     5.90 ms /    13 runs   (    0.45 ms per token,  2204.14 tokens per second)
llama_print_timings: prompt eval time =   585.90 ms /    37 tokens (   15.84 ms per token,    63.15 tokens per second)
llama_print_timings:        eval time =  1055.89 ms /    12 runs   (   87.99 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  1668.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The boss named him, should there be something special about him. However, he was restricted to very little except for the affairs of public security.”



llama_print_timings:        load time =   655.84 ms
llama_print_timings:      sample time =    31.51 ms /    32 runs   (    0.98 ms per token,  1015.52 tokens per second)
llama_print_timings: prompt eval time =   655.79 ms /    62 tokens (   10.58 ms per token,    94.54 tokens per second)
llama_print_timings:        eval time =  3108.98 ms /    31 runs   (  100.29 ms per token,     9.97 tokens per second)
llama_print_timings:       total time =  3901.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is the command center? Wang Miao looked at the two officers in front of him with a puzzled expression.



llama_print_timings:        load time =   603.07 ms
llama_print_timings:      sample time =    11.44 ms /    25 runs   (    0.46 ms per token,  2185.51 tokens per second)
llama_print_timings: prompt eval time =   603.02 ms /    47 tokens (   12.83 ms per token,    77.94 tokens per second)
llama_print_timings:        eval time =  2157.29 ms /    24 runs   (   89.89 ms per token,    11.13 tokens per second)
llama_print_timings:       total time =  2810.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Jiang Moxue's car entered a large estate in the outskirts of the city, where only the door number and not any unit name was visible on the gate. He knew this place belonged to the military instead of the police force.



llama_print_timings:        load time =   655.10 ms
llama_print_timings:      sample time =    25.99 ms /    55 runs   (    0.47 ms per token,  2116.44 tokens per second)
llama_print_timings: prompt eval time =   655.06 ms /    72 tokens (    9.10 ms per token,   109.91 tokens per second)
llama_print_timings:        eval time =  4859.43 ms /    54 runs   (   89.99 ms per token,    11.11 tokens per second)
llama_print_timings:       total time =  5633.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are at the meeting room. When you entered, you were surprised by the chaos around you. The floor was covered with computer devices which are not tidily arranged on the tables or on the ground; a bunch of network switches have been stacked randomly on servers and projectors stand in different angles in corner of the hall; fog like mist is floating in half-air…Assistant: In a chaotic meeting room, there are many computer devices which haven't been tidily arranged on tables or on the ground.



llama_print_timings:        load time =   828.11 ms
llama_print_timings:      sample time =    76.16 ms /   109 runs   (    0.70 ms per token,  1431.27 tokens per second)
llama_print_timings: prompt eval time =   828.07 ms /   177 tokens (    4.68 ms per token,   213.75 tokens per second)
llama_print_timings:        eval time = 10281.92 ms /   108 runs   (   95.20 ms per token,    10.50 tokens per second)
llama_print_timings:       total time = 11459.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The meeting table was also filled with documents and junk, with participants looking exhausted. Their clothes were wrinkled and the ties of those who wore them had been loosened; it seemed like they had worked through the night. The meeting was chaired by an army major general called Chang WeiSi. Half of the attendees were military officers, while the others were experts and scholars. Some of these were very famous scientists specializing in fundamental scientific research.



llama_print_timings:        load time =   756.42 ms
llama_print_timings:      sample time =    56.61 ms /   103 runs   (    0.55 ms per token,  1819.34 tokens per second)
llama_print_timings: prompt eval time =   756.38 ms /   129 tokens (    5.86 ms per token,   170.55 tokens per second)
llama_print_timings:        eval time =  9188.72 ms /   102 runs   (   90.09 ms per token,    11.10 tokens per second)
llama_print_timings:       total time = 10198.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

To his surprise, there were four foreigners present who surprised him even more: two of them were military officers, an American Air Force colonel and a British Army brigadier, serving as NATO liaison officers; while the other two were actually CIA officers working here as observers.



llama_print_timings:        load time =   684.27 ms
llama_print_timings:      sample time =    29.37 ms /    62 runs   (    0.47 ms per token,  2111.36 tokens per second)
llama_print_timings: prompt eval time =   684.24 ms /    87 tokens (    7.86 ms per token,   127.15 tokens per second)
llama_print_timings:        eval time =  5633.15 ms /    61 runs   (   92.35 ms per token,    10.83 tokens per second)
llama_print_timings:       total time =  6453.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

From everyone's face, Wang Miao read out a sentence: We have tried our best. Please kill the fucking end!



llama_print_timings:        load time =   686.59 ms
llama_print_timings:      sample time =    32.54 ms /    29 runs   (    1.12 ms per token,   891.16 tokens per second)
llama_print_timings: prompt eval time =   686.55 ms /    54 tokens (   12.71 ms per token,    78.65 tokens per second)
llama_print_timings:        eval time =  3096.22 ms /    28 runs   (  110.58 ms per token,     9.04 tokens per second)
llama_print_timings:       total time =  3931.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao saw Shi Qiang, he suddenly changed his bad manners of yesterday and greeted him politely. However, that silly smile on his face didn't make Wang Miao happy. He did not want to be sitting next to Shi Qiang but he had no choice since there was only that one empty seat available in the room which already had a strong smoke smell.



llama_print_timings:        load time =   713.35 ms
llama_print_timings:      sample time =    61.09 ms /    86 runs   (    0.71 ms per token,  1407.78 tokens per second)
llama_print_timings: prompt eval time =   713.31 ms /    98 tokens (    7.28 ms per token,   137.39 tokens per second)
llama_print_timings:        eval time =  8419.51 ms /    85 runs   (   99.05 ms per token,    10.10 tokens per second)
llama_print_timings:       total time =  9408.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he was sending the file, Shi Qiang crept up to Wang Miao and whispered, "Professor Wang, it seems like you are studying something new ... a type of material?"


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Nanomaterials.” Wang Miao replied simply.



llama_print_timings:        load time =   630.27 ms
llama_print_timings:      sample time =     6.88 ms /    14 runs   (    0.49 ms per token,  2034.88 tokens per second)
llama_print_timings: prompt eval time =   630.23 ms /    42 tokens (   15.01 ms per token,    66.64 tokens per second)
llama_print_timings:        eval time =  1217.37 ms /    13 runs   (   93.64 ms per token,    10.68 tokens per second)
llama_print_timings:       total time =  1878.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I heard that it's very powerful and not used for crime, is that right?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

What does it mean?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Oh, I heard that it can lift a truck with just one hair. And if criminals steal some of this to make knives, they could cut open an entire car in a single stroke!



llama_print_timings:        load time =   645.73 ms
llama_print_timings:      sample time =    19.85 ms /    43 runs   (    0.46 ms per token,  2166.68 tokens per second)
llama_print_timings: prompt eval time =   645.69 ms /    70 tokens (    9.22 ms per token,   108.41 tokens per second)
llama_print_timings:        eval time =  3953.72 ms /    42 runs   (   94.14 ms per token,    10.62 tokens per second)
llama_print_timings:       total time =  4690.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Hmm, not even a knife is needed to cut through passing cars like butter. Just use a thin thread made from materials that are one thousandth the size of hair!


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

$Strong took the document out of his bag and then put it back in again, obviously no longer interested. “You’re right, fish can commit crimes! I handled a murder case once. The woman who her husband's dick was cut off from! Know what she used? Frozen Pollack fillets stored in the freezer!”



llama_print_timings:        load time =   696.71 ms
llama_print_timings:      sample time =    33.98 ms /    74 runs   (    0.46 ms per token,  2177.69 tokens per second)
llama_print_timings: prompt eval time =   696.67 ms /   108 tokens (    6.45 ms per token,   155.02 tokens per second)
llama_print_timings:        eval time =  6622.94 ms /    73 runs   (   90.73 ms per token,    11.02 tokens per second)
llama_print_timings:       total time =  7474.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm not interested. How, let me come to the meeting just for this?



llama_print_timings:        load time =   605.66 ms
llama_print_timings:      sample time =     8.83 ms /    19 runs   (    0.46 ms per token,  2152.49 tokens per second)
llama_print_timings: prompt eval time =   605.62 ms /    49 tokens (   12.36 ms per token,    80.91 tokens per second)
llama_print_timings:        eval time =  1600.36 ms /    18 runs   (   88.91 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  2246.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“Fish? Nanomaterials? No, no. None of that’s relevant.” Strong put his mouth close to Wang Meng's ear. “Don't be nice to those guys, they discriminate against us and want to steal information from us but won't tell us anything.”



llama_print_timings:        load time =   718.98 ms
llama_print_timings:      sample time =    61.93 ms /    68 runs   (    0.91 ms per token,  1097.94 tokens per second)
llama_print_timings: prompt eval time =   718.94 ms /   100 tokens (    7.19 ms per token,   139.09 tokens per second)
llama_print_timings:        eval time =  6620.00 ms /    67 runs   (   98.81 ms per token,    10.12 tokens per second)
llama_print_timings:       total time =  7614.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

General Cai Weishi said, "Comrades, the meeting is now in order. Firstly, let me introduce the current situation to our comrades."



llama_print_timings:        load time =   628.07 ms
llama_print_timings:      sample time =    16.36 ms /    36 runs   (    0.45 ms per token,  2200.76 tokens per second)
llama_print_timings: prompt eval time =   628.02 ms /    68 tokens (    9.24 ms per token,   108.28 tokens per second)
llama_print_timings:        eval time =  3079.02 ms /    35 runs   (   87.97 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  3780.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The term “war zone”, which was unusual, puzzled Wang Miao. He also noticed that the general didn't seem to plan on introducing him to anything further. That in itself corroborated Xue Qiang's words. In his brief speech, General Jiang mentioned “comrades” twice and looked at Wang Miao among two NATO soldiers and two CIA agents. He felt the general had overlooked addressing them as "Sirs".



llama_print_timings:        load time =   706.54 ms
llama_print_timings:      sample time =    46.27 ms /   101 runs   (    0.46 ms per token,  2182.98 tokens per second)
llama_print_timings: prompt eval time =   706.49 ms /   118 tokens (    5.99 ms per token,   167.02 tokens per second)
llama_print_timings:        eval time =  9037.75 ms /   100 runs   (   90.38 ms per token,    11.06 tokens per second)
llama_print_timings:       total time =  9954.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“They are also gay, ” he whispered to Wang Meng. “Everyone here calls them that.”



llama_print_timings:        load time =   687.75 ms
llama_print_timings:      sample time =    11.22 ms /    24 runs   (    0.47 ms per token,  2139.61 tokens per second)
llama_print_timings: prompt eval time =   687.72 ms /    66 tokens (   10.42 ms per token,    95.97 tokens per second)
llama_print_timings:        eval time =  2163.08 ms /    23 runs   (   94.05 ms per token,    10.63 tokens per second)
llama_print_timings:       total time =  2900.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

While being perplexed, Wang Miao also left some impression of Shi Qiang's observation ability.



llama_print_timings:        load time =   630.51 ms
llama_print_timings:      sample time =    11.92 ms /    26 runs   (    0.46 ms per token,  2181.94 tokens per second)
llama_print_timings: prompt eval time =   630.48 ms /    47 tokens (   13.41 ms per token,    74.55 tokens per second)
llama_print_timings:        eval time =  2380.91 ms /    25 runs   (   95.24 ms per token,    10.50 tokens per second)
llama_print_timings:       total time =  3067.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Dashi, you put out your cigarette. The smoke is getting too thick.” said Chang Weisi, who was looking through his paperwork.



llama_print_timings:        load time =   663.60 ms
llama_print_timings:      sample time =    16.20 ms /    34 runs   (    0.48 ms per token,  2099.41 tokens per second)
llama_print_timings: prompt eval time =   663.56 ms /    59 tokens (   11.25 ms per token,    88.91 tokens per second)
llama_print_timings:        eval time =  3139.93 ms /    33 runs   (   95.15 ms per token,    10.51 tokens per second)
llama_print_timings:       total time =  3877.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He took the just-lit cigarette and looked around for a cigarette ashtray, but he “ziyang” a cigarette into his tea cup. He caught this opportunity to raise his hand to speak, before Constant winston answered him. "First of all, I have an idea, which was mentioned before - information equalization!".



llama_print_timings:        load time =   730.48 ms
llama_print_timings:      sample time =    34.87 ms /    77 runs   (    0.45 ms per token,  2208.39 tokens per second)
llama_print_timings: prompt eval time =   730.44 ms /    98 tokens (    7.45 ms per token,   134.17 tokens per second)
llama_print_timings:        eval time =  7134.46 ms /    76 runs   (   93.87 ms per token,    10.65 tokens per second)
llama_print_timings:       total time =  8024.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

General Zhang Wei Si raised his head. “No military operation is information-equivalent, and I understand that you attendees are not to be introduced to further background information,” he said.



llama_print_timings:        load time =   640.77 ms
llama_print_timings:      sample time =    18.97 ms /    42 runs   (    0.45 ms per token,  2213.79 tokens per second)
llama_print_timings: prompt eval time =   640.73 ms /    67 tokens (    9.56 ms per token,   104.57 tokens per second)
llama_print_timings:        eval time =  3551.06 ms /    41 runs   (   86.61 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  4277.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But we're different, ” Shi Qiang said. “The police have been involved since the establishment of the combat center, but until now, we don't even know what their function is. And you're pushing us out. You're getting familiar with our jobs one by one, and then driving us away.”



llama_print_timings:        load time =   693.70 ms
llama_print_timings:      sample time =    31.57 ms /    70 runs   (    0.45 ms per token,  2217.65 tokens per second)
llama_print_timings: prompt eval time =   693.65 ms /    85 tokens (    8.16 ms per token,   122.54 tokens per second)
llama_print_timings:        eval time =  5957.39 ms /    69 runs   (   86.34 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  6792.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Although the other officers were quietly stopping Shi Qiang, Shi Qiang had the audacity to speak to Cao WeiSi in such a disrespectful manner. Wang Miao was both surprised and resentful of Cao's response.



llama_print_timings:        load time =   641.31 ms
llama_print_timings:      sample time =    36.69 ms /    57 runs   (    0.64 ms per token,  1553.64 tokens per second)
llama_print_timings: prompt eval time =   641.27 ms /    70 tokens (    9.16 ms per token,   109.16 tokens per second)
llama_print_timings:        eval time =  5035.38 ms /    56 runs   (   89.92 ms per token,    11.12 tokens per second)
llama_print_timings:       total time =  5841.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I said, Dashi, now it turns out that you haven't improved your bad habits in the army. Can you represent the police? You have been suspended for a few months due to your disgraceful behavior, and you will be removed from the Public Security Force soon. I recruited you because of your experience in urban policing, but you should cherish this opportunity.”



llama_print_timings:        load time =   668.55 ms
llama_print_timings:      sample time =    48.07 ms /    84 runs   (    0.57 ms per token,  1747.52 tokens per second)
llama_print_timings: prompt eval time =   668.50 ms /    89 tokens (    7.51 ms per token,   133.13 tokens per second)
llama_print_timings:        eval time =  7302.70 ms /    83 runs   (   87.98 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  8189.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said with a deep voice, "Then I'm doing the crime and punishment thing? Didn't you say those are all just junk skills?"



llama_print_timings:        load time =   625.73 ms
llama_print_timings:      sample time =    18.21 ms /    35 runs   (    0.52 ms per token,  1921.60 tokens per second)
llama_print_timings: prompt eval time =   625.70 ms /    59 tokens (   10.61 ms per token,    94.29 tokens per second)
llama_print_timings:        eval time =  3026.72 ms /    34 runs   (   89.02 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  3735.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“But it will do, ” Cheng Weisi nodded to Shi Qiang. “It will suffice for now, during the wartime.”



llama_print_timings:        load time =   624.95 ms
llama_print_timings:      sample time =    16.84 ms /    35 runs   (    0.48 ms per token,  2077.89 tokens per second)
llama_print_timings: prompt eval time =   624.92 ms /    59 tokens (   10.59 ms per token,    94.41 tokens per second)
llama_print_timings:        eval time =  3074.03 ms /    34 runs   (   90.41 ms per token,    11.06 tokens per second)
llama_print_timings:       total time =  3775.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We can't think in the usual way any more,” said a CIA intelligence official in standard Mandarin. “We have to do something totally different.”



llama_print_timings:        load time =   611.96 ms
llama_print_timings:      sample time =    15.67 ms /    34 runs   (    0.46 ms per token,  2169.75 tokens per second)
llama_print_timings: prompt eval time =   611.92 ms /    56 tokens (   10.93 ms per token,    91.52 tokens per second)
llama_print_timings:        eval time =  3029.49 ms /    33 runs   (   91.80 ms per token,    10.89 tokens per second)
llama_print_timings:       total time =  3710.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That British officer apparently also speaks Chinese, he nodded and said, "Being or not being..."



llama_print_timings:        load time =   625.60 ms
llama_print_timings:      sample time =    12.96 ms /    23 runs   (    0.56 ms per token,  1774.28 tokens per second)
llama_print_timings: prompt eval time =   625.56 ms /    54 tokens (   11.58 ms per token,    86.32 tokens per second)
llama_print_timings:        eval time =  2004.96 ms /    22 runs   (   91.13 ms per token,    10.97 tokens per second)
llama_print_timings:       total time =  2691.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What did he say?” Xue Qiang asked Wang Miao.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Nothing. Wang Miao mechanically answered. These people seemed to be daydreaming, where was the war? The war is over now, isn't it? He looked up and saw from the window facing the courtyard outside, a city full of traffic flowing like water in spring sunlight; some people were taking their dogs for walks on the lawn while children were playing.



llama_print_timings:        load time =   685.28 ms
llama_print_timings:      sample time =    38.06 ms /    83 runs   (    0.46 ms per token,  2180.94 tokens per second)
llama_print_timings: prompt eval time =   685.24 ms /   100 tokens (    6.85 ms per token,   145.93 tokens per second)
llama_print_timings:        eval time =  7149.10 ms /    82 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  8008.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Which is more real, the world inside or outside of $?



llama_print_timings:        load time =   577.69 ms
llama_print_timings:      sample time =     6.88 ms /    15 runs   (    0.46 ms per token,  2181.18 tokens per second)
llama_print_timings: prompt eval time =   577.65 ms /    37 tokens (   15.61 ms per token,    64.05 tokens per second)
llama_print_timings:        eval time =  1200.84 ms /    14 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  1809.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

General Chang said, “Recently, the enemy’s attacks have become more obvious. The target is still the top scientific professionals in our country.” 



llama_print_timings:        load time =   618.10 ms
llama_print_timings:      sample time =    15.41 ms /    34 runs   (    0.45 ms per token,  2205.79 tokens per second)
llama_print_timings: prompt eval time =   618.05 ms /    60 tokens (   10.30 ms per token,    97.08 tokens per second)
llama_print_timings:        eval time =  2870.06 ms /    33 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  3556.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He draws out the topmost piece of paper, which is written in large print and appears to have been rushed. Both Chinese and English names are listed.



llama_print_timings:        load time =   620.49 ms
llama_print_timings:      sample time =    15.67 ms /    34 runs   (    0.46 ms per token,  2170.03 tokens per second)
llama_print_timings: prompt eval time =   620.46 ms /    63 tokens (    9.85 ms per token,   101.54 tokens per second)
llama_print_timings:        eval time =  2916.34 ms /    33 runs   (   88.37 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  3610.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Professor Wang, what are your impressions of this list?" 常伟思 asked as he showed it to Wang Miao.



llama_print_timings:        load time =   768.68 ms
llama_print_timings:      sample time =    30.50 ms /    30 runs   (    1.02 ms per token,   983.57 tokens per second)
llama_print_timings: prompt eval time =   768.59 ms /    52 tokens (   14.78 ms per token,    67.66 tokens per second)
llama_print_timings:        eval time =  3006.84 ms /    29 runs   (  103.68 ms per token,     9.64 tokens per second)
llama_print_timings:       total time =  3912.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I know three of them are prominent scholars in the field of physics,” Wang Mo said with a somewhat absentminded look on his face. The color of those two words stuck out like beacons to him, for they were not the same as the rest of those he mentioned. How could she have been here? What happened to her?



llama_print_timings:        load time =   697.56 ms
llama_print_timings:      sample time =    34.88 ms /    73 runs   (    0.48 ms per token,  2093.01 tokens per second)
llama_print_timings: prompt eval time =   697.51 ms /    91 tokens (    7.66 ms per token,   130.46 tokens per second)
llama_print_timings:        eval time =  6568.69 ms /    72 runs   (   91.23 ms per token,    10.96 tokens per second)
llama_print_timings:       total time =  7426.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He pointed at the name on the document with a yellowed, thick finger. "Not really familiar," he said. "Want to get to know him?"



llama_print_timings:        load time =   649.50 ms
llama_print_timings:      sample time =    15.76 ms /    34 runs   (    0.46 ms per token,  2157.91 tokens per second)
llama_print_timings: prompt eval time =   649.46 ms /    70 tokens (    9.28 ms per token,   107.78 tokens per second)
llama_print_timings:        eval time =  2974.35 ms /    33 runs   (   90.13 ms per token,    11.09 tokens per second)
llama_print_timings:       total time =  3692.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Now, Wang Miao knew that it was right to send this former warrior, who had a rough exterior and sharp eyes like knives, to help them. Even if he might not be a good police officer, he was definitely a hard-core character.



llama_print_timings:        load time =   638.93 ms
llama_print_timings:      sample time =    24.73 ms /    54 runs   (    0.46 ms per token,  2183.49 tokens per second)
llama_print_timings: prompt eval time =   638.89 ms /    75 tokens (    8.52 ms per token,   117.39 tokens per second)
llama_print_timings:        eval time =  4539.77 ms /    53 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  5289.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It was a year ago, and Wang Miao was the leader of the nanomaterial section of the "Chinese 2" high-energy accelerator project. On an afternoon rest break in Lingshan's construction site, he suddenly found himself attracted by the composition on his field of view. As a landscape photography enthusiast, real scenes often formed art compositions in his eyes. The main subject of the composition was the superconductive coil they were installing; it had three stories high and was halfway installed, appearing as a massive metal block and a messy network of low-temperature cryogenic refrigerant pipes that resembled a monster from an industrial era. In front of this metal giant, appeared a young female silhouette. The light distribution in the composition was also remarkable: The metal giant was buried in the shadow of a temporary construction roof and even more showed its rough texture; but a beam of golden sunshine, passing through the holes in the roof, shone on the soft hair of this girl's


llama_print_timings:        load time =  1062.98 ms
llama_print_timings:      sample time =   123.17 ms /   248 runs   (    0.50 ms per token,  2013.40 tokens per second)
llama_print_timings: prompt eval time =  1062.93 ms /   284 tokens (    3.74 ms per token,   267.19 tokens per second)
llama_print_timings:        eval time = 23628.57 ms /   247 runs   (   95.66 ms per token,    10.45 tokens per second)
llama_print_timings:       total time = 25266.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Look what you're looking at, get to work!”



llama_print_timings:        load time =   587.74 ms
llama_print_timings:      sample time =     6.19 ms /    14 runs   (    0.44 ms per token,  2262.81 tokens per second)
llama_print_timings: prompt eval time =   587.71 ms /    38 tokens (   15.47 ms per token,    64.66 tokens per second)
llama_print_timings:        eval time =  1145.70 ms /    13 runs   (   88.13 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  1760.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Moxiao was startled, but then he found that the figure wasn't him, but a young engineer. The latter also looked as silly as him. Wang Moxiao came back from his dream and saw that the woman wasn't just a regular employee, for the general manager was accompanying her and introducing something to her, with an extremely respectful look on his face.



llama_print_timings:        load time =   700.75 ms
llama_print_timings:      sample time =    39.25 ms /    85 runs   (    0.46 ms per token,  2165.66 tokens per second)
llama_print_timings: prompt eval time =   700.71 ms /   102 tokens (    6.87 ms per token,   145.57 tokens per second)
llama_print_timings:        eval time =  7489.26 ms /    84 runs   (   89.16 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  8369.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Who is she?” Wang Mo asked the director.



llama_print_timings:        load time =   589.90 ms
llama_print_timings:      sample time =     5.37 ms /    12 runs   (    0.45 ms per token,  2236.72 tokens per second)
llama_print_timings: prompt eval time =   589.85 ms /    41 tokens (   14.39 ms per token,    69.51 tokens per second)
llama_print_timings:        eval time =   940.77 ms /    11 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1553.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You should know her, ” the director said, drawing a large circle with his hand. “The first experiment of the two-hundred-billion investment project could be to validate an ultra-string model she has proposed.”



llama_print_timings:        load time =   695.12 ms
llama_print_timings:      sample time =    22.40 ms /    51 runs   (    0.44 ms per token,  2276.68 tokens per second)
llama_print_timings: prompt eval time =   695.07 ms /   103 tokens (    6.75 ms per token,   148.19 tokens per second)
llama_print_timings:        eval time =  4450.38 ms /    50 runs   (   89.01 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  5245.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What? Yang Dong is a woman?”



llama_print_timings:        load time =   603.48 ms
llama_print_timings:      sample time =     4.88 ms /    11 runs   (    0.44 ms per token,  2256.41 tokens per second)
llama_print_timings: prompt eval time =   603.46 ms /    41 tokens (   14.72 ms per token,    67.94 tokens per second)
llama_print_timings:        eval time =   844.39 ms /    10 runs   (   84.44 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1468.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, we also only knew about her the day before yesterday.



llama_print_timings:        load time =   596.12 ms
llama_print_timings:      sample time =     6.14 ms /    14 runs   (    0.44 ms per token,  2279.39 tokens per second)
llama_print_timings: prompt eval time =   596.08 ms /    45 tokens (   13.25 ms per token,    75.49 tokens per second)
llama_print_timings:        eval time =  1176.07 ms /    13 runs   (   90.47 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =  1799.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The engineer asked, "Is there something wrong with her psychologically that prevents her from ever appearing on media? It's like if you died, no one can see you on TV.



llama_print_timings:        load time =   790.60 ms
llama_print_timings:      sample time =    18.50 ms /    40 runs   (    0.46 ms per token,  2161.69 tokens per second)
llama_print_timings: prompt eval time =   790.51 ms /    70 tokens (   11.29 ms per token,    88.55 tokens per second)
llama_print_timings:        eval time =  3502.21 ms /    39 runs   (   89.80 ms per token,    11.14 tokens per second)
llama_print_timings:       total time =  4376.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Well, we don’t really know what the gender of Qian Zhongshu is either.” Wang Mo said with a little bit of sour grapes. “I think that she had some uncommon experiences during her childhood and thus got autism.”



llama_print_timings:        load time =   651.88 ms
llama_print_timings:      sample time =    26.30 ms /    57 runs   (    0.46 ms per token,  2167.14 tokens per second)
llama_print_timings: prompt eval time =   651.85 ms /    69 tokens (    9.45 ms per token,   105.85 tokens per second)
llama_print_timings:        eval time =  4921.86 ms /    56 runs   (   87.89 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  5694.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Dong and the Chief Engineer passed by, smiling at them as they went past. She didn't say a word, but Wang Meng remembered her clear eyes.



llama_print_timings:        load time =   629.13 ms
llama_print_timings:      sample time =    17.36 ms /    39 runs   (    0.45 ms per token,  2246.28 tokens per second)
llama_print_timings: prompt eval time =   629.11 ms /    64 tokens (    9.83 ms per token,   101.73 tokens per second)
llama_print_timings:        eval time =  3422.85 ms /    38 runs   (   90.08 ms per token,    11.10 tokens per second)
llama_print_timings:       total time =  4129.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That night, Wang Miao sat in his study and enjoyed the scenery photographs hung on the wall. His eyes fell on a landscape picture - it was an arid mountain valley with white mountains rising from the distant end. The half-decayed trees made up nearly three-quarters of the landscape. Wang Miao imagined the figure he had been envisioning in his mind impressed onto the picture, making her appear smaller than before, at the bottom of the valley. To his surprise, the whole landscape came to life like the photograph had recognized her. He then took her eyes and imaged them as the background sky for other photos, which all also came to life and displayed a beautiful sight that he had never imagined before. Previously, Wang Miao thought his photography works were missing something; now he knew it was her.



llama_print_timings:        load time =   955.87 ms
llama_print_timings:      sample time =    88.73 ms /   175 runs   (    0.51 ms per token,  1972.16 tokens per second)
llama_print_timings: prompt eval time =   955.82 ms /   245 tokens (    3.90 ms per token,   256.32 tokens per second)
llama_print_timings:        eval time = 15878.06 ms /   174 runs   (   91.25 ms per token,    10.96 tokens per second)
llama_print_timings:       total time = 17241.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The physicists on this list committed suicide within two months of each other, Chang Waisi said.



llama_print_timings:        load time =   616.20 ms
llama_print_timings:      sample time =    11.35 ms /    25 runs   (    0.45 ms per token,  2203.61 tokens per second)
llama_print_timings: prompt eval time =   616.16 ms /    51 tokens (   12.08 ms per token,    82.77 tokens per second)
llama_print_timings:        eval time =  2062.74 ms /    24 runs   (   85.95 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  2729.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A clear blue sky, a world without her. 



llama_print_timings:        load time =   640.93 ms
llama_print_timings:      sample time =     5.40 ms /    12 runs   (    0.45 ms per token,  2223.87 tokens per second)
llama_print_timings: prompt eval time =   640.88 ms /    77 tokens (    8.32 ms per token,   120.15 tokens per second)
llama_print_timings:        eval time =   943.93 ms /    11 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1608.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   589.39 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   589.35 ms /    45 tokens (   13.10 ms per token,    76.35 tokens per second)
llama_print_timings:        eval time =    83.69 ms /     1 runs   (   83.69 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =   676.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In less than two months, General Chang repeated.



llama_print_timings:        load time =   606.18 ms
llama_print_timings:      sample time =     5.36 ms /    12 runs   (    0.45 ms per token,  2237.14 tokens per second)
llama_print_timings: prompt eval time =   606.14 ms /    42 tokens (   14.43 ms per token,    69.29 tokens per second)
llama_print_timings:        eval time =   908.95 ms /    11 runs   (   82.63 ms per token,    12.10 tokens per second)
llama_print_timings:       total time =  1538.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You mean the last one. Satisfied, sitting next to Wang Meng, the big Shi said in a low voice, "She's the last suicide victim, she killed herself with too much sleeping pills last night and died without any pain." For an instant, Wang Meng suddenly felt a little grateful to big Shi.



llama_print_timings:        load time =   677.59 ms
llama_print_timings:      sample time =    32.57 ms /    73 runs   (    0.45 ms per token,  2241.26 tokens per second)
llama_print_timings: prompt eval time =   677.55 ms /    97 tokens (    6.99 ms per token,   143.16 tokens per second)
llama_print_timings:        eval time =  6323.08 ms /    72 runs   (   87.82 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  7148.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Why? Wang Miao asked, the scenery paintings from those photos were still rotating in his mind like a slide show.



llama_print_timings:        load time =   745.98 ms
llama_print_timings:      sample time =    29.67 ms /    28 runs   (    1.06 ms per token,   943.71 tokens per second)
llama_print_timings: prompt eval time =   745.92 ms /    58 tokens (   12.86 ms per token,    77.76 tokens per second)
llama_print_timings:        eval time =  2783.64 ms /    27 runs   (  103.10 ms per token,     9.70 tokens per second)
llama_print_timings:       total time =  3664.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Cheng Weisays, “Now we can only say with certainty that the causes of their suicides are the same. But the reasons themselves are very hard to figure out, and may not be understandable for non-professionals.” 



llama_print_timings:        load time =   657.72 ms
llama_print_timings:      sample time =    23.28 ms /    52 runs   (    0.45 ms per token,  2233.87 tokens per second)
llama_print_timings: prompt eval time =   657.68 ms /    85 tokens (    7.74 ms per token,   129.24 tokens per second)
llama_print_timings:        eval time =  4482.81 ms /    51 runs   (   87.90 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  5244.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yanmo flipped through the copies of those letters, all of them were long writings.



llama_print_timings:        load time =   597.08 ms
llama_print_timings:      sample time =     9.69 ms /    22 runs   (    0.44 ms per token,  2269.91 tokens per second)
llama_print_timings: prompt eval time =   597.03 ms /    49 tokens (   12.18 ms per token,    82.07 tokens per second)
llama_print_timings:        eval time =  1843.64 ms /    21 runs   (   87.79 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  2484.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Dr. Ding, could you show Professor Wang Yang Dun's will?” “She writes the shortest and most concise of all the wills,” the old man explained.



llama_print_timings:        load time =   604.54 ms
llama_print_timings:      sample time =    17.82 ms /    39 runs   (    0.46 ms per token,  2189.17 tokens per second)
llama_print_timings: prompt eval time =   604.52 ms /    58 tokens (   10.42 ms per token,    95.94 tokens per second)
llama_print_timings:        eval time =  3318.27 ms /    38 runs   (   87.32 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  4004.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He took out a white envelope from the desk and handed it to Wang Meng. "That person who is always low-headed is the boyfriend of Yang Dong," said Mr. Lai Sheng in a low voice. "I also saw that guy on the Large High Energy Collider Construction Site." He was one of the members of the theoretical group, famous for his discovery of a monopole but now he has gone missing. Wang Meng took out a white birch bark from the envelope. It was not paper and smelled sweet; there is a line of beautiful characters on it:


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Everything leads to this conclusion: physics has never existed and will never exist. I know that it is irresponsible of me, but there is no other choice.



llama_print_timings:        load time =   614.42 ms
llama_print_timings:      sample time =    16.16 ms /    35 runs   (    0.46 ms per token,  2166.38 tokens per second)
llama_print_timings: prompt eval time =   614.38 ms /    59 tokens (   10.41 ms per token,    96.03 tokens per second)
llama_print_timings:        eval time =  2934.76 ms /    34 runs   (   86.32 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3622.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She walked away without signing the agreement.



llama_print_timings:        load time =   582.59 ms
llama_print_timings:      sample time =     4.29 ms /     9 runs   (    0.48 ms per token,  2099.37 tokens per second)
llama_print_timings: prompt eval time =   582.56 ms /    37 tokens (   15.74 ms per token,    63.51 tokens per second)
llama_print_timings:        eval time =   677.56 ms /     8 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1279.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Physics...doesn't exist? Wang Mumu looked around blankly.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

General Zhang closed the folder and said, “Some specific information related to the construction of three new high-energy accelerators and their experimental results is involved. We will not discuss that here; first, we need to investigate ‘Science Boundaries’ Society. The UNESCO declared 2005 as the International Year of Physics, which was gradually born during the frequent academic conferences and exchanges in the international physics community. Dr. Ding, you are a theoretical physicist, can you further describe its situation?”



llama_print_timings:        load time =   748.60 ms
llama_print_timings:      sample time =    50.05 ms /   111 runs   (    0.45 ms per token,  2217.78 tokens per second)
llama_print_timings: prompt eval time =   748.56 ms /   138 tokens (    5.42 ms per token,   184.35 tokens per second)
llama_print_timings:        eval time =  9513.47 ms /   110 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 10492.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi nodded and said, “I have no direct connection with ‘Science Boundaries’, but this organization is very famous in the academic community. Its purpose is to say that since the late half of the last century, the classical theory of physics has gradually lost its simple and powerful characteristics, and theoretical images become increasingly complex, vague, and uncertain, experimental verification also becomes more difficult, which indicates that the exploration of the modern physics seems to have encountered many difficulties and obstacles. ‘Science Boundaries’ seeks a new thinking approach – simply put, it attempts to use scientific methods to find out the limits of science – to determine whether there is a bottom line in terms of depth and accuracy where science enters no longer possible.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“That's good,” said Chang Weisi. “According to our investigation, the scholars who committed suicide were mostly associated with ‘Scientific Boundaries’ and some of them even were members. However, we have not found any crimes such as cult manipulation or use of illegal drugs in these scholars' cases. In other words, even if ‘Scientific Boundaries’ has had an impact on these scholars, it is through legitimate academic exchanges.”



llama_print_timings:        load time =   712.31 ms
llama_print_timings:      sample time =    49.34 ms /   109 runs   (    0.45 ms per token,  2209.16 tokens per second)
llama_print_timings: prompt eval time =   712.24 ms /   116 tokens (    6.14 ms per token,   162.87 tokens per second)
llama_print_timings:        eval time =  9475.97 ms /   108 runs   (   87.74 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 10414.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Including the names of your contacts, the meeting venue and time, the contents of your conversations if you have exchanged written materials or email …”



llama_print_timings:        load time =   610.38 ms
llama_print_timings:      sample time =    15.60 ms /    34 runs   (    0.46 ms per token,  2179.07 tokens per second)
llama_print_timings: prompt eval time =   610.36 ms /    64 tokens (    9.54 ms per token,   104.86 tokens per second)
llama_print_timings:        eval time =  2854.82 ms /    33 runs   (   86.51 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3537.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Shut up!" said Chang Shi angrily.



llama_print_timings:        load time =   588.94 ms
llama_print_timings:      sample time =     6.92 ms /    15 runs   (    0.46 ms per token,  2167.63 tokens per second)
llama_print_timings: prompt eval time =   588.89 ms /    43 tokens (   13.70 ms per token,    73.02 tokens per second)
llama_print_timings:        eval time =  1227.20 ms /    14 runs   (   87.66 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  1848.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The policeman next to him whispered, “Don't be silent and nobody will treat you as a mute.”



llama_print_timings:        load time =   654.31 ms
llama_print_timings:      sample time =    11.86 ms /    26 runs   (    0.46 ms per token,  2192.43 tokens per second)
llama_print_timings: prompt eval time =   654.27 ms /    77 tokens (    8.50 ms per token,   117.69 tokens per second)
llama_print_timings:        eval time =  2214.69 ms /    25 runs   (   88.59 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  2921.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He felt very uncomfortable like a fly had just been eaten up, but he still managed to answer the question. “I met ‘Science Border’ through San Yu-Fei, a Japanese-Chinese physicist who currently works for a Japanese company and lives in this city. She used to work at a laboratory of Mitsubishi Electric Co., Ltd. researching nanomaterials, and we first met each other at a technical seminar earlier this year.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“At first I was not interested in these issues, just as a pastime. I am an application researcher and my level is not high in this area, mainly to listen to them discuss and argue with each other. These people have very deep thoughts and innovative views, and I feel that their communication has opened up many of my own ideas. But the topics discussed at these meetings are purely theoretical, with no special elements.”



llama_print_timings:        load time =   755.74 ms
llama_print_timings:      sample time =    40.51 ms /    89 runs   (    0.46 ms per token,  2197.10 tokens per second)
llama_print_timings: prompt eval time =   755.70 ms /   138 tokens (    5.48 ms per token,   182.61 tokens per second)
llama_print_timings:        eval time =  7820.94 ms /    88 runs   (   88.87 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  8765.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Professor Wang, we hope you will accept our invitation to join the ‘Scientific Boundaries’ Society. This is the main reason why we asked you here today.” Generral Ceng said, “We hope to receive some internal information about this organization through your channel.”



llama_print_timings:        load time =   644.33 ms
llama_print_timings:      sample time =    27.58 ms /    61 runs   (    0.45 ms per token,  2211.75 tokens per second)
llama_print_timings: prompt eval time =   644.28 ms /    76 tokens (    8.48 ms per token,   117.96 tokens per second)
llama_print_timings:        eval time =  5266.48 ms /    60 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  6035.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Are you asking me to go undercover?” Wang Mo asked nervously.



llama_print_timings:        load time =   596.90 ms
llama_print_timings:      sample time =     8.44 ms /    17 runs   (    0.50 ms per token,  2014.46 tokens per second)
llama_print_timings: prompt eval time =   596.87 ms /    48 tokens (   12.43 ms per token,    80.42 tokens per second)
llama_print_timings:        eval time =  1463.42 ms /    16 runs   (   91.46 ms per token,    10.93 tokens per second)
llama_print_timings:       total time =  2099.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Haha, undercover agent!” Big Shi laughed loudly.



llama_print_timings:        load time =   599.86 ms
llama_print_timings:      sample time =     7.46 ms /    16 runs   (    0.47 ms per token,  2145.63 tokens per second)
llama_print_timings: prompt eval time =   599.81 ms /    43 tokens (   13.95 ms per token,    71.69 tokens per second)
llama_print_timings:        eval time =  1269.86 ms /    15 runs   (   84.66 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1902.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Cheng Weisi looked at Dashichang with a critical look, and said to Wang Miao, "We only have some information about this. Our organization doesn't have any other means."


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Sorry, Commander. I can't do that.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Professor Wang, ‘Scientific Boundaries’ is an organization composed of top international scholars, and conducting an investigation into it is a highly complex and sensitive matter. We are walking on thin ice. Without help from the scholarly community, we can't move forward, so we have made this abrupt request to hope that you can understand. However, if you do not agree, we also respect your decision.”



llama_print_timings:        load time =   704.68 ms
llama_print_timings:      sample time =    58.02 ms /    92 runs   (    0.63 ms per token,  1585.58 tokens per second)
llama_print_timings: prompt eval time =   704.64 ms /   102 tokens (    6.91 ms per token,   144.76 tokens per second)
llama_print_timings:        eval time =  8465.85 ms /    91 runs   (   93.03 ms per token,    10.75 tokens per second)
llama_print_timings:       total time =  9439.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm busy with work and don't have time.



llama_print_timings:        load time =   585.73 ms
llama_print_timings:      sample time =     6.50 ms /    14 runs   (    0.46 ms per token,  2155.50 tokens per second)
llama_print_timings: prompt eval time =   585.71 ms /    48 tokens (   12.20 ms per token,    81.95 tokens per second)
llama_print_timings:        eval time =  1090.92 ms /    13 runs   (   83.92 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  1706.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Cheng WeiSi nodded, “Okay, Professor Wang, we don't want to keep you any longer. Thank you for coming to the meeting.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

He was blank for a few seconds, then realized that he had to leave.



llama_print_timings:        load time =   580.16 ms
llama_print_timings:      sample time =     7.61 ms /    17 runs   (    0.45 ms per token,  2232.73 tokens per second)
llama_print_timings: prompt eval time =   580.12 ms /    44 tokens (   13.18 ms per token,    75.85 tokens per second)
llama_print_timings:        eval time =  1402.12 ms /    16 runs   (   87.63 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  2015.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Chang WeiSi politely took WangMuo to the conference room door, ShiGai was behind him and said loudly, "This is fine. I totally don't agree with this proposal. We already have so many bookworms who can't see far ahead, why let them go?"


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Wang Miao turned around and walked to the side of the large Shi. He tried his best to restrain his anger, "You shouldn't speak like that if you are a qualified officer."



llama_print_timings:        load time =   625.33 ms
llama_print_timings:      sample time =    19.39 ms /    43 runs   (    0.45 ms per token,  2217.29 tokens per second)
llama_print_timings: prompt eval time =   625.30 ms /    60 tokens (   10.42 ms per token,    95.95 tokens per second)
llama_print_timings:        eval time =  3663.92 ms /    42 runs   (   87.24 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  4377.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I am not.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Those scholars committed suicide for unknown reasons. You shouldn't speak of them in such a disrespectful manner. The contributions they made to human society with their wisdom, are irreplaceable and unsurpassed by anyone else.



llama_print_timings:        load time =   635.40 ms
llama_print_timings:      sample time =    23.94 ms /    53 runs   (    0.45 ms per token,  2213.87 tokens per second)
llama_print_timings: prompt eval time =   635.36 ms /    67 tokens (    9.48 ms per token,   105.45 tokens per second)
llama_print_timings:        eval time =  4439.25 ms /    52 runs   (   85.37 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  5183.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Is he saying they are better than me?" Da Shi looked up at Wang Miao from his chair. "I'm not going to be fooled by a few words of praise.



llama_print_timings:        load time =   618.31 ms
llama_print_timings:      sample time =    19.10 ms /    42 runs   (    0.45 ms per token,  2198.95 tokens per second)
llama_print_timings: prompt eval time =   618.28 ms /    64 tokens (    9.66 ms per token,   103.51 tokens per second)
llama_print_timings:        eval time =  3593.81 ms /    41 runs   (   87.65 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  4298.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You're saying that I would?



llama_print_timings:        load time =   586.37 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2247.75 tokens per second)
llama_print_timings: prompt eval time =   586.33 ms /    36 tokens (   16.29 ms per token,    61.40 tokens per second)
llama_print_timings:        eval time =   699.58 ms /     8 runs   (   87.45 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  1303.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I'm just trying to be responsible for your safety.” Big Story looked at Wang Miao and gave him a dumb smile.



llama_print_timings:        load time =   607.75 ms
llama_print_timings:      sample time =    13.39 ms /    29 runs   (    0.46 ms per token,  2165.47 tokens per second)
llama_print_timings: prompt eval time =   607.70 ms /    55 tokens (   11.05 ms per token,    90.50 tokens per second)
llama_print_timings:        eval time =  2440.15 ms /    28 runs   (   87.15 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  3109.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In that situation, I am much safer than you. You should know that a person's discernment is proportional to his knowledge.



llama_print_timings:        load time =   615.29 ms
llama_print_timings:      sample time =    14.29 ms /    30 runs   (    0.48 ms per token,  2098.64 tokens per second)
llama_print_timings: prompt eval time =   615.27 ms /    55 tokens (   11.19 ms per token,    89.39 tokens per second)
llama_print_timings:        eval time =  2651.20 ms /    29 runs   (   91.42 ms per token,    10.94 tokens per second)
llama_print_timings:       total time =  3331.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's not necessarily so, like you are.



llama_print_timings:        load time =   615.61 ms
llama_print_timings:      sample time =     5.44 ms /    12 runs   (    0.45 ms per token,  2205.48 tokens per second)
llama_print_timings: prompt eval time =   615.57 ms /    39 tokens (   15.78 ms per token,    63.36 tokens per second)
llama_print_timings:        eval time =  1031.79 ms /    11 runs   (   93.80 ms per token,    10.66 tokens per second)
llama_print_timings:       total time =  1671.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Dash, if you say one more word, then get out of here!” Chang Wei Si yelled angrily.



llama_print_timings:        load time =   635.85 ms
llama_print_timings:      sample time =    12.56 ms /    28 runs   (    0.45 ms per token,  2228.77 tokens per second)
llama_print_timings: prompt eval time =   635.81 ms /    53 tokens (   12.00 ms per token,    83.36 tokens per second)
llama_print_timings:        eval time =  2432.91 ms /    27 runs   (   90.11 ms per token,    11.10 tokens per second)
llama_print_timings:       total time =  3126.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That's fine, ” said Wang Miao, turning to General Chang, “I have changed my mind and decided to follow your advice and join the ‘Science Bound’.”



llama_print_timings:        load time =   632.62 ms
llama_print_timings:      sample time =    18.30 ms /    41 runs   (    0.45 ms per token,  2239.95 tokens per second)
llama_print_timings: prompt eval time =   632.57 ms /    61 tokens (   10.37 ms per token,    96.43 tokens per second)
llama_print_timings:        eval time =  3553.02 ms /    40 runs   (   88.83 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  4267.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Okay, ” Big Story nods, “just go in and do it without being too obvious. Some things are easy to do, like checking out their computers for emails or URLs……”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Enough! You misunderstand, I was not going undercover. I just wanted to prove your ignorance and stupidity!”



llama_print_timings:        load time =   600.67 ms
llama_print_timings:      sample time =    12.33 ms /    28 runs   (    0.44 ms per token,  2271.07 tokens per second)
llama_print_timings: prompt eval time =   600.64 ms /    49 tokens (   12.26 ms per token,    81.58 tokens per second)
llama_print_timings:        eval time =  2364.74 ms /    27 runs   (   87.58 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  3021.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“If you're still alive after a while, then it can only mean one thing. . . I'm afraid.” Big Shi tilted his head and gave a grim laughter.



llama_print_timings:        load time =   625.09 ms
llama_print_timings:      sample time =    19.36 ms /    43 runs   (    0.45 ms per token,  2220.96 tokens per second)
llama_print_timings: prompt eval time =   625.05 ms /    62 tokens (   10.08 ms per token,    99.19 tokens per second)
llama_print_timings:        eval time =  3545.96 ms /    42 runs   (   84.43 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  4257.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I will certainly live on, but I really don't want to see you anymore!



llama_print_timings:        load time =   610.65 ms
llama_print_timings:      sample time =     8.39 ms /    19 runs   (    0.44 ms per token,  2263.52 tokens per second)
llama_print_timings: prompt eval time =   610.61 ms /    47 tokens (   12.99 ms per token,    76.97 tokens per second)
llama_print_timings:        eval time =  1555.36 ms /    18 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2203.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Constable Wei Si had always taken Wang Mu to the subway and arranged for a car to take him. At the farewell, he said, "Huo Shi Jiong has that temperament. In fact, he is an experienced criminal investigator and anti-terrorist expert." Twenty years ago, he used to be a soldier in my regiment."



llama_print_timings:        load time =   660.12 ms
llama_print_timings:      sample time =    34.87 ms /    77 runs   (    0.45 ms per token,  2208.14 tokens per second)
llama_print_timings: prompt eval time =   660.07 ms /    83 tokens (    7.95 ms per token,   125.74 tokens per second)
llama_print_timings:        eval time =  6782.58 ms /    76 runs   (   89.24 ms per token,    11.21 tokens per second)
llama_print_timings:       total time =  7600.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Walking to the front of the car, Chang Wei Si added, "You must have a lot of questions for me."



llama_print_timings:        load time =   599.30 ms
llama_print_timings:      sample time =    12.55 ms /    28 runs   (    0.45 ms per token,  2231.08 tokens per second)
llama_print_timings: prompt eval time =   599.25 ms /    51 tokens (   11.75 ms per token,    85.11 tokens per second)
llama_print_timings:        eval time =  2396.55 ms /    27 runs   (   88.76 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  3051.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What did you mean by those things?”



llama_print_timings:        load time =   600.58 ms
llama_print_timings:      sample time =     4.43 ms /    10 runs   (    0.44 ms per token,  2256.83 tokens per second)
llama_print_timings: prompt eval time =   600.54 ms /    41 tokens (   14.65 ms per token,    68.27 tokens per second)
llama_print_timings:        eval time =   802.98 ms /     9 runs   (   89.22 ms per token,    11.21 tokens per second)
llama_print_timings:       total time =  1423.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The military is definitely involved in war.



llama_print_timings:        load time =   588.04 ms
llama_print_timings:      sample time =     4.25 ms /     9 runs   (    0.47 ms per token,  2115.16 tokens per second)
llama_print_timings: prompt eval time =   588.00 ms /    38 tokens (   15.47 ms per token,    64.63 tokens per second)
llama_print_timings:        eval time =   725.35 ms /     8 runs   (   90.67 ms per token,    11.03 tokens per second)
llama_print_timings:       total time =  1333.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He looked around the bright spring scenery with a confused expression, "Where is the war? There isn't any hot spot anywhere in the world now. It seems like it has been the most peaceful era ever."



llama_print_timings:        load time =   629.66 ms
llama_print_timings:      sample time =    20.64 ms /    46 runs   (    0.45 ms per token,  2228.47 tokens per second)
llama_print_timings: prompt eval time =   629.64 ms /    66 tokens (    9.54 ms per token,   104.82 tokens per second)
llama_print_timings:        eval time =  3882.59 ms /    45 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  4609.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You will soon find out everything, and everyone will. Professor Wang, have there been any significant changes in your life that suddenly turned it upside down, changing the world around you radically?



llama_print_timings:        load time =   657.69 ms
llama_print_timings:      sample time =    18.41 ms /    40 runs   (    0.46 ms per token,  2172.38 tokens per second)
llama_print_timings: prompt eval time =   657.64 ms /    85 tokens (    7.74 ms per token,   129.25 tokens per second)
llama_print_timings:        eval time =  3391.16 ms /    39 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  4133.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No.”



llama_print_timings:        load time =   597.51 ms
llama_print_timings:      sample time =     1.76 ms /     4 runs   (    0.44 ms per token,  2271.44 tokens per second)
llama_print_timings: prompt eval time =   597.47 ms /    33 tokens (   18.11 ms per token,    55.23 tokens per second)
llama_print_timings:        eval time =   252.16 ms /     3 runs   (   84.05 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =   857.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have a life that is accidental, while the world has so many variables. Yet you have nothing to worry about in your own life.



llama_print_timings:        load time =   615.31 ms
llama_print_timings:      sample time =    13.15 ms /    30 runs   (    0.44 ms per token,  2281.02 tokens per second)
llama_print_timings: prompt eval time =   615.27 ms /    52 tokens (   11.83 ms per token,    84.52 tokens per second)
llama_print_timings:        eval time =  2444.84 ms /    29 runs   (   84.30 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  3118.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't understand.



llama_print_timings:        load time =   583.27 ms
llama_print_timings:      sample time =     3.35 ms /     7 runs   (    0.48 ms per token,  2086.44 tokens per second)
llama_print_timings: prompt eval time =   583.25 ms /    38 tokens (   15.35 ms per token,    65.15 tokens per second)
llama_print_timings:        eval time =   520.80 ms /     6 runs   (   86.80 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  1118.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Most people are like this.



llama_print_timings:        load time =   581.13 ms
llama_print_timings:      sample time =     3.51 ms /     8 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   581.09 ms /    36 tokens (   16.14 ms per token,    61.95 tokens per second)
llama_print_timings:        eval time =   643.38 ms /     7 runs   (   91.91 ms per token,    10.88 tokens per second)
llama_print_timings:       total time =  1240.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's true, most people's lives are accidental.



llama_print_timings:        load time =   580.37 ms
llama_print_timings:      sample time =     7.02 ms /    15 runs   (    0.47 ms per token,  2136.75 tokens per second)
llama_print_timings: prompt eval time =   580.33 ms /    37 tokens (   15.68 ms per token,    63.76 tokens per second)
llama_print_timings:        eval time =  1213.88 ms /    14 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  1826.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They have been coming and going in this way for generations.



llama_print_timings:        load time =   592.25 ms
llama_print_timings:      sample time =     6.10 ms /    14 runs   (    0.44 ms per token,  2293.58 tokens per second)
llama_print_timings: prompt eval time =   592.18 ms /    44 tokens (   13.46 ms per token,    74.30 tokens per second)
llama_print_timings:        eval time =  1114.57 ms /    13 runs   (   85.74 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  1734.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's all a matter of chance.



llama_print_timings:        load time =   597.93 ms
llama_print_timings:      sample time =     4.43 ms /    10 runs   (    0.44 ms per token,  2255.81 tokens per second)
llama_print_timings: prompt eval time =   597.89 ms /    34 tokens (   17.59 ms per token,    56.87 tokens per second)
llama_print_timings:        eval time =   761.54 ms /     9 runs   (   84.62 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1379.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I admit today my understanding is poor, aren't you saying that I...?”



llama_print_timings:        load time =   619.95 ms
llama_print_timings:      sample time =     8.33 ms /    19 runs   (    0.44 ms per token,  2280.91 tokens per second)
llama_print_timings: prompt eval time =   619.90 ms /    56 tokens (   11.07 ms per token,    90.34 tokens per second)
llama_print_timings:        eval time =  1497.67 ms /    18 runs   (   83.20 ms per token,    12.02 tokens per second)
llama_print_timings:       total time =  2155.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, the entire history of humanity is also accidental. From stone age to nowadays, there have been no significant changes at all. It's really lucky. But since it's lucky, then it has a certain end; and now I tell you that the end has come, so be prepared for your thoughts.



llama_print_timings:        load time =   639.36 ms
llama_print_timings:      sample time =    32.22 ms /    71 runs   (    0.45 ms per token,  2203.94 tokens per second)
llama_print_timings: prompt eval time =   639.32 ms /    76 tokens (    8.41 ms per token,   118.88 tokens per second)
llama_print_timings:        eval time =  5999.72 ms /    70 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  6785.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

General stopped Wang Miao from asking more questions.



llama_print_timings:        load time =   595.21 ms
llama_print_timings:      sample time =     4.85 ms /    11 runs   (    0.44 ms per token,  2266.64 tokens per second)
llama_print_timings: prompt eval time =   595.17 ms /    50 tokens (   11.90 ms per token,    84.01 tokens per second)
llama_print_timings:        eval time =   855.42 ms /    10 runs   (   85.54 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1472.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After getting on the bus, the driver asked Wang Miao's address. After that, he asked casually, "Oh, it is not you who picked me up?" I see the cars are identical.



llama_print_timings:        load time =   638.43 ms
llama_print_timings:      sample time =    20.25 ms /    44 runs   (    0.46 ms per token,  2172.30 tokens per second)
llama_print_timings: prompt eval time =   638.39 ms /    69 tokens (    9.25 ms per token,   108.08 tokens per second)
llama_print_timings:        eval time =  3762.77 ms /    43 runs   (   87.51 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  4495.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm not the one who picked up Dr. Ding, it was somebody else.



llama_print_timings:        load time =   604.82 ms
llama_print_timings:      sample time =     8.83 ms /    20 runs   (    0.44 ms per token,  2264.24 tokens per second)
llama_print_timings: prompt eval time =   604.78 ms /    41 tokens (   14.75 ms per token,    67.79 tokens per second)
llama_print_timings:        eval time =  1651.86 ms /    19 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2296.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao was moved and asked the driver about Ding Yi's residence. The driver told him. That night, Wang Miao went to see Ding Yi.



llama_print_timings:        load time =   626.25 ms
llama_print_timings:      sample time =    17.30 ms /    39 runs   (    0.44 ms per token,  2253.68 tokens per second)
llama_print_timings: prompt eval time =   626.20 ms /    61 tokens (   10.27 ms per token,    97.41 tokens per second)
llama_print_timings:        eval time =  3288.45 ms /    38 runs   (   86.54 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3992.36 ms


translated 139.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

When he opened the door, Wang Miao could smell alcohol. He saw Ding Yi lying on the sofa with the TV on and his eyes fixed on the ceiling. Looking around, he didn't find much furniture or decorations in the spacious living room except for a billiard table in one corner.



llama_print_timings:        load time =   718.12 ms
llama_print_timings:      sample time =    32.82 ms /    72 runs   (    0.46 ms per token,  2193.78 tokens per second)
llama_print_timings: prompt eval time =   718.09 ms /   115 tokens (    6.24 ms per token,   160.15 tokens per second)
llama_print_timings:        eval time =  6279.60 ms /    71 runs   (   88.45 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  7143.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   609.14 ms
llama_print_timings:      sample time =     0.44 ms /     1 runs   (    0.44 ms per token,  2267.57 tokens per second)
llama_print_timings: prompt eval time =   609.11 ms /    54 tokens (   11.28 ms per token,    88.65 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   611.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"I bought this house three months ago," said Ding Yi. "Why would she ever come into the family?" He smiled drunkenly and shook his head.



llama_print_timings:        load time =   627.61 ms
llama_print_timings:      sample time =    16.34 ms /    37 runs   (    0.44 ms per token,  2264.24 tokens per second)
llama_print_timings: prompt eval time =   627.57 ms /    66 tokens (    9.51 ms per token,   105.17 tokens per second)
llama_print_timings:        eval time =  3084.36 ms /    36 runs   (   85.68 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3784.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Dong's life is all that Wang Mu wants to know, but he doesn't know how to ask.



llama_print_timings:        load time =   601.58 ms
llama_print_timings:      sample time =    12.56 ms /    27 runs   (    0.47 ms per token,  2149.00 tokens per second)
llama_print_timings: prompt eval time =   601.54 ms /    49 tokens (   12.28 ms per token,    81.46 tokens per second)
llama_print_timings:        eval time =  2221.18 ms /    26 runs   (   85.43 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2879.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She was like a star, always so far away and her light shone cold on me. Ding Yi stood by the window and stared at the night sky, as if looking for the stars that had disappeared long ago.



llama_print_timings:        load time =   648.88 ms
llama_print_timings:      sample time =    24.07 ms /    48 runs   (    0.50 ms per token,  1994.52 tokens per second)
llama_print_timings: prompt eval time =   648.84 ms /    71 tokens (    9.14 ms per token,   109.43 tokens per second)
llama_print_timings:        eval time =  4259.15 ms /    47 runs   (   90.62 ms per token,    11.04 tokens per second)
llama_print_timings:       total time =  5017.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao also fell silent. It was strange that he just wanted to listen to her voice, but in the moment when they looked at each other's eyes on a sunset the previous year, she hadn't spoken, and he had never heard her voice before.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Ding Yi waved his hand as if to drive away something, freeing himself from the sad thoughts.



llama_print_timings:        load time =   597.44 ms
llama_print_timings:      sample time =    10.97 ms /    24 runs   (    0.46 ms per token,  2187.39 tokens per second)
llama_print_timings: prompt eval time =   597.40 ms /    54 tokens (   11.06 ms per token,    90.39 tokens per second)
llama_print_timings:        eval time =  2041.71 ms /    23 runs   (   88.77 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  2688.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are right, Professor Wang. Don't get involved with the military and police, they're a bunch of ignorant assholes. The suicides of those physicists have nothing to do with 'the science boundary,' I tried to explain it to them but couldn't make sense out of it.



llama_print_timings:        load time =   638.85 ms
llama_print_timings:      sample time =    30.11 ms /    66 runs   (    0.46 ms per token,  2192.11 tokens per second)
llama_print_timings: prompt eval time =   638.81 ms /    77 tokens (    8.30 ms per token,   120.54 tokens per second)
llama_print_timings:        eval time =  5646.12 ms /    65 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  6420.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They seem to have done some research as well.



llama_print_timings:        load time =   588.41 ms
llama_print_timings:      sample time =     4.97 ms /    11 runs   (    0.45 ms per token,  2213.73 tokens per second)
llama_print_timings: prompt eval time =   588.37 ms /    38 tokens (   15.48 ms per token,    64.59 tokens per second)
llama_print_timings:        eval time =   849.26 ms /    10 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1459.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Dong was not among the two scientists who had no ties with “Science Boundaries.”



llama_print_timings:        load time =   653.93 ms
llama_print_timings:      sample time =    11.24 ms /    25 runs   (    0.45 ms per token,  2224.79 tokens per second)
llama_print_timings: prompt eval time =   653.89 ms /    74 tokens (    8.84 ms per token,   113.17 tokens per second)
llama_print_timings:        eval time =  2045.11 ms /    24 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  2748.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Ding Yi, you know that I'm involved in this as well. So, I want to know the reason why Yang Dong made such a choice. I think you must have some knowledge of it," Wang Mupu said awkwardly, trying to hide his real intentions.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

If you know everything, you will only dive deeper into it. Now that both the person and the matter have been involved in this, even if you understand it now, the trouble is bigger than before.



llama_print_timings:        load time =   621.40 ms
llama_print_timings:      sample time =    19.26 ms /    43 runs   (    0.45 ms per token,  2232.26 tokens per second)
llama_print_timings: prompt eval time =   621.36 ms /    65 tokens (    9.56 ms per token,   104.61 tokens per second)
llama_print_timings:        eval time =  3618.67 ms /    42 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  4327.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm an application researcher, not as sensitive as the theoretical ones.



llama_print_timings:        load time =   584.14 ms
llama_print_timings:      sample time =     7.62 ms /    17 runs   (    0.45 ms per token,  2230.39 tokens per second)
llama_print_timings: prompt eval time =   584.08 ms /    44 tokens (   13.27 ms per token,    75.33 tokens per second)
llama_print_timings:        eval time =  1353.81 ms /    16 runs   (   84.61 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1972.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Alright, have you played billiards before?" DingYi walked up to the billiards table.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

I played around with it a bit in school.



llama_print_timings:        load time =   591.46 ms
llama_print_timings:      sample time =     4.79 ms /    11 runs   (    0.44 ms per token,  2294.53 tokens per second)
llama_print_timings: prompt eval time =   591.42 ms /    38 tokens (   15.56 ms per token,    64.25 tokens per second)
llama_print_timings:        eval time =   856.87 ms /    10 runs   (   85.69 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  1470.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi said, "I like to play this game because it reminds me of particle collisions in an accelerator." She picked up the black and white balls, put the black ball near a hole, then moved the white ball closer, only ten centimeters away from the black ball, and asked Wang Meng, "Can you make the black ball fall into the hole?"



llama_print_timings:        load time =   678.70 ms
llama_print_timings:      sample time =    38.00 ms /    81 runs   (    0.47 ms per token,  2131.35 tokens per second)
llama_print_timings: prompt eval time =   678.66 ms /    91 tokens (    7.46 ms per token,   134.09 tokens per second)
llama_print_timings:        eval time =  7185.25 ms /    80 runs   (   89.82 ms per token,    11.13 tokens per second)
llama_print_timings:       total time =  8037.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Everyone can get close.”



llama_print_timings:        load time =   634.49 ms
llama_print_timings:      sample time =     3.99 ms /     8 runs   (    0.50 ms per token,  2006.52 tokens per second)
llama_print_timings: prompt eval time =   634.45 ms /    36 tokens (   17.62 ms per token,    56.74 tokens per second)
llama_print_timings:        eval time =   722.60 ms /     7 runs   (  103.23 ms per token,     9.69 tokens per second)
llama_print_timings:       total time =  1375.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Try it.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Wang Miao took the golf club and gently hit a white ball, which knocked another black ball into the hole.



llama_print_timings:        load time =   623.02 ms
llama_print_timings:      sample time =    12.50 ms /    27 runs   (    0.46 ms per token,  2160.35 tokens per second)
llama_print_timings: prompt eval time =   622.98 ms /    50 tokens (   12.46 ms per token,    80.26 tokens per second)
llama_print_timings:        eval time =  2558.79 ms /    26 runs   (   98.41 ms per token,    10.16 tokens per second)
llama_print_timings:       total time =  3240.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“Well, let's put the billiards table in a new position.”Ding Yi called out to Wang Meng who looked very perplexed. The two lifted the heavy billiards table and moved it to the corner by the window in the living room. When they set down the table, Ding Yi took out the black ball she had just made a hole with and placed it at the same distance from the hole as before. “Will we be able to get this one in?”



llama_print_timings:        load time =   725.27 ms
llama_print_timings:      sample time =    49.14 ms /   106 runs   (    0.46 ms per token,  2157.28 tokens per second)
llama_print_timings: prompt eval time =   725.23 ms /   118 tokens (    6.15 ms per token,   162.71 tokens per second)
llama_print_timings:        eval time = 10130.17 ms /   105 runs   (   96.48 ms per token,    10.37 tokens per second)
llama_print_timings:       total time = 11084.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course.



llama_print_timings:        load time =   620.78 ms
llama_print_timings:      sample time =     1.81 ms /     4 runs   (    0.45 ms per token,  2207.51 tokens per second)
llama_print_timings: prompt eval time =   620.76 ms /    33 tokens (   18.81 ms per token,    53.16 tokens per second)
llama_print_timings:        eval time =   278.37 ms /     3 runs   (   92.79 ms per token,    10.78 tokens per second)
llama_print_timings:       total time =   907.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Let's play.



llama_print_timings:        load time =   613.51 ms
llama_print_timings:      sample time =     2.93 ms /     6 runs   (    0.49 ms per token,  2047.78 tokens per second)
llama_print_timings: prompt eval time =   613.47 ms /    34 tokens (   18.04 ms per token,    55.42 tokens per second)
llama_print_timings:        eval time =   459.88 ms /     5 runs   (   91.98 ms per token,    10.87 tokens per second)
llama_print_timings:       total time =  1086.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao once again easily hit the black ball into the hole.



llama_print_timings:        load time =   638.68 ms
llama_print_timings:      sample time =     7.36 ms /    16 runs   (    0.46 ms per token,  2174.50 tokens per second)
llama_print_timings: prompt eval time =   638.64 ms /    47 tokens (   13.59 ms per token,    73.59 tokens per second)
llama_print_timings:        eval time =  1463.95 ms /    15 runs   (   97.60 ms per token,    10.25 tokens per second)
llama_print_timings:       total time =  2136.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They moved the billiards table to the third corner of the living room, and Ding Yi gestured with her hand. “Play,” she said.



llama_print_timings:        load time =   671.55 ms
llama_print_timings:      sample time =    16.78 ms /    34 runs   (    0.49 ms per token,  2025.74 tokens per second)
llama_print_timings: prompt eval time =   671.51 ms /    68 tokens (    9.88 ms per token,   101.26 tokens per second)
llama_print_timings:        eval time =  3121.87 ms /    33 runs   (   94.60 ms per token,    10.57 tokens per second)
llama_print_timings:       total time =  3869.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I say, we...



llama_print_timings:        load time =   620.61 ms
llama_print_timings:      sample time =     2.77 ms /     6 runs   (    0.46 ms per token,  2163.72 tokens per second)
llama_print_timings: prompt eval time =   620.58 ms /    36 tokens (   17.24 ms per token,    58.01 tokens per second)
llama_print_timings:        eval time =   460.23 ms /     5 runs   (   92.05 ms per token,    10.86 tokens per second)
llama_print_timings:       total time =  1094.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Let's play.



llama_print_timings:        load time =   676.80 ms
llama_print_timings:      sample time =     2.71 ms /     6 runs   (    0.45 ms per token,  2209.94 tokens per second)
llama_print_timings: prompt eval time =   676.71 ms /    34 tokens (   19.90 ms per token,    50.24 tokens per second)
llama_print_timings:        eval time =   490.30 ms /     5 runs   (   98.06 ms per token,    10.20 tokens per second)
llama_print_timings:       total time =  1179.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao sighed and hit the black ball into the cave for the third time.



llama_print_timings:        load time =   637.83 ms
llama_print_timings:      sample time =     9.45 ms /    20 runs   (    0.47 ms per token,  2116.18 tokens per second)
llama_print_timings: prompt eval time =   637.76 ms /    47 tokens (   13.57 ms per token,    73.70 tokens per second)
llama_print_timings:        eval time =  1785.18 ms /    19 runs   (   93.96 ms per token,    10.64 tokens per second)
llama_print_timings:       total time =  2466.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They moved the billiard table twice, once to a corner near the door in the living room and again back to its original position. Ding Yi put black and white balls in the hole again and Weng Meng hit one of them into the hole. At this time, both persons were sweating.



llama_print_timings:        load time =   726.31 ms
llama_print_timings:      sample time =    35.69 ms /    65 runs   (    0.55 ms per token,  1821.09 tokens per second)
llama_print_timings: prompt eval time =   726.26 ms /    88 tokens (    8.25 ms per token,   121.17 tokens per second)
llama_print_timings:        eval time =  6443.00 ms /    64 runs   (  100.67 ms per token,     9.93 tokens per second)
llama_print_timings:       total time =  7334.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Okay, let's analyze the results,” said Dingyi as he lit a cigarette. “We performed five experiments altogether, four of them in different space and time positions and twice in the same position but at different times. Don't you find it shocking?” He opened his arms to highlight the fact that the results were exactly the same after all five experiments.



llama_print_timings:        load time =   746.93 ms
llama_print_timings:      sample time =    49.33 ms /    80 runs   (    0.62 ms per token,  1621.83 tokens per second)
llama_print_timings: prompt eval time =   746.88 ms /   104 tokens (    7.18 ms per token,   139.25 tokens per second)
llama_print_timings:        eval time =  8130.65 ms /    79 runs   (  102.92 ms per token,     9.72 tokens per second)
llama_print_timings:       total time =  9104.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What exactly do you mean?” asked Wang Miao, panting.



llama_print_timings:        load time =   644.15 ms
llama_print_timings:      sample time =     8.16 ms /    17 runs   (    0.48 ms per token,  2082.57 tokens per second)
llama_print_timings: prompt eval time =   644.11 ms /    46 tokens (   14.00 ms per token,    71.42 tokens per second)
llama_print_timings:        eval time =  1557.56 ms /    16 runs   (   97.35 ms per token,    10.27 tokens per second)
llama_print_timings:       total time =  2240.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Explain this incredible result in terms of physics."


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“In five trials, the mass of two balls has not changed; their positions, which are taken as reference points on the table, have not changed either; the vector of impulse is relatively constant during each impact, so black ball is knocked down.



llama_print_timings:        load time =   740.43 ms
llama_print_timings:      sample time =    25.17 ms /    54 runs   (    0.47 ms per token,  2145.07 tokens per second)
llama_print_timings: prompt eval time =   740.39 ms /   100 tokens (    7.40 ms per token,   135.06 tokens per second)
llama_print_timings:        eval time =  5153.87 ms /    53 runs   (   97.24 ms per token,    10.28 tokens per second)
llama_print_timings:       total time =  6011.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi picked up the bottle of white brandy that was lying on the floor and filled two cups, one with each hand. She handed a cup to Wang Miao but he turned it down. “We should celebrate because we have discovered a great law: the physical rules are uniform in time and space. All physics theories from Archimedes’ principle to string theory, as well as all scientific discoveries and thought achievements made by humans so far, are the sub-products of this great law; compared with Einstein and Hawking, we are really just dull people who have only worked out applications.”



llama_print_timings:        load time =   803.84 ms
llama_print_timings:      sample time =    60.86 ms /   130 runs   (    0.47 ms per token,  2136.19 tokens per second)
llama_print_timings: prompt eval time =   803.79 ms /   141 tokens (    5.70 ms per token,   175.42 tokens per second)
llama_print_timings:        eval time = 12594.13 ms /   129 runs   (   97.63 ms per token,    10.24 tokens per second)
llama_print_timings:       total time = 13684.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I still don't understand what you mean.



llama_print_timings:        load time =   625.53 ms
llama_print_timings:      sample time =     5.03 ms /    11 runs   (    0.46 ms per token,  2186.88 tokens per second)
llama_print_timings: prompt eval time =   625.50 ms /    37 tokens (   16.91 ms per token,    59.15 tokens per second)
llama_print_timings:        eval time =   951.12 ms /    10 runs   (   95.11 ms per token,    10.51 tokens per second)
llama_print_timings:       total time =  1599.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Imagine a different outcome: for the first time, the white ball hits the person in the hole; secondly, the black ball goes astray; thirdly, the black ball flies up into the ceiling; fourthly, the black ball behaves like a frightened sparrow inside the room and eventually falls into your pocket; fifthly, the black ball moves at nearly the speed of light, breaking the billiards table along the way and penetrating the wall until it leaves the solar system as described in Asimov’s novel The Billiard. What would you think when this happens?



llama_print_timings:        load time =   854.10 ms
llama_print_timings:      sample time =    59.41 ms /   127 runs   (    0.47 ms per token,  2137.62 tokens per second)
llama_print_timings: prompt eval time =   854.05 ms /   160 tokens (    5.34 ms per token,   187.34 tokens per second)
llama_print_timings:        eval time = 11813.73 ms /   126 runs   (   93.76 ms per token,    10.67 tokens per second)
llama_print_timings:       total time = 12942.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

DingYi stared at WangMu, who remained silent for a while before asking, "Is this really happening?"



llama_print_timings:        load time =   614.25 ms
llama_print_timings:      sample time =    12.23 ms /    27 runs   (    0.45 ms per token,  2207.33 tokens per second)
llama_print_timings: prompt eval time =   614.21 ms /    52 tokens (   11.81 ms per token,    84.66 tokens per second)
llama_print_timings:        eval time =  2317.80 ms /    26 runs   (   89.15 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  2990.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi took the two cups of wine in her hand and gulped them down, staring at the billiard table with eyes glossed over as if it were a demon. “Yes, this happened,” he said. Recently, experimental conditions for basic research are gradually maturing, and three expensive ‘billiard tables’ have been made in North America, Europe, and China, where you know exactly, your nano-center has made quite a profit from them.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

These high-energy accelerators will increase the energy of particle collisions by a quantity, which is something that humanity has never achieved before. Under the new collision energy levels, the same particles with the same collision energy will result in different experimental conditions, and there will be no regularities at all. Physics scientists are so confused that they repeat these ultra-high energy collisions under the same conditions multiple times, but each time the results are different.



llama_print_timings:        load time =   745.85 ms
llama_print_timings:      sample time =    42.29 ms /    95 runs   (    0.45 ms per token,  2246.45 tokens per second)
llama_print_timings: prompt eval time =   745.81 ms /   128 tokens (    5.83 ms per token,   171.63 tokens per second)
llama_print_timings:        eval time =  8304.15 ms /    94 runs   (   88.34 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  9246.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What does this mean?” Wang Miao asked, seeing that Ding Yi was staring at him but didn’t say anything. He added, “Oh, I work with nano technology as well and come in contact with microstructures of materials, but it is several levels shallower than what you do.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

This means that physical laws are not uniform in time and space.



llama_print_timings:        load time =   620.35 ms
llama_print_timings:      sample time =     6.65 ms /    14 runs   (    0.47 ms per token,  2106.85 tokens per second)
llama_print_timings: prompt eval time =   620.31 ms /    42 tokens (   14.77 ms per token,    67.71 tokens per second)
llama_print_timings:        eval time =  1150.58 ms /    13 runs   (   88.51 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  1801.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What does this mean?”



llama_print_timings:        load time =   592.22 ms
llama_print_timings:      sample time =     3.15 ms /     7 runs   (    0.45 ms per token,  2224.34 tokens per second)
llama_print_timings: prompt eval time =   592.18 ms /    36 tokens (   16.45 ms per token,    60.79 tokens per second)
llama_print_timings:        eval time =   566.00 ms /     6 runs   (   94.33 ms per token,    10.60 tokens per second)
llama_print_timings:       total time =  1172.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You should be able to figure it out yourself. The general has thought of it, and he is really clever.



llama_print_timings:        load time =   600.29 ms
llama_print_timings:      sample time =    10.93 ms /    24 runs   (    0.46 ms per token,  2195.39 tokens per second)
llama_print_timings: prompt eval time =   600.25 ms /    56 tokens (   10.72 ms per token,    93.29 tokens per second)
llama_print_timings:        eval time =  2004.73 ms /    23 runs   (   87.16 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2654.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao looked out the window and thought deeply, as if he could find answers to all his problems in the lights of the city.



llama_print_timings:        load time =   605.34 ms
llama_print_timings:      sample time =    13.50 ms /    30 runs   (    0.45 ms per token,  2221.56 tokens per second)
llama_print_timings: prompt eval time =   605.29 ms /    57 tokens (   10.62 ms per token,    94.17 tokens per second)
llama_print_timings:        eval time =  2502.32 ms /    29 runs   (   86.29 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3169.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That means that there is no universal physical law, so physics... doesn't exist.



llama_print_timings:        load time =   610.46 ms
llama_print_timings:      sample time =     8.38 ms /    19 runs   (    0.44 ms per token,  2266.76 tokens per second)
llama_print_timings: prompt eval time =   610.42 ms /    59 tokens (   10.35 ms per token,    96.65 tokens per second)
llama_print_timings:        eval time =  1496.10 ms /    18 runs   (   83.12 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =  2144.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She continued, “I know that I am being irresponsible, but there is no choice.”



llama_print_timings:        load time =   637.49 ms
llama_print_timings:      sample time =     9.48 ms /    21 runs   (    0.45 ms per token,  2214.96 tokens per second)
llama_print_timings: prompt eval time =   637.45 ms /    77 tokens (    8.28 ms per token,   120.79 tokens per second)
llama_print_timings:        eval time =  1718.02 ms /    20 runs   (   85.90 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2397.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao picked up the white ball he had just hit five times on the billiard table, stroking it for a while and then gently letting it go. "This is really not conducive to a frontier theory explorer," he said.



llama_print_timings:        load time =   626.30 ms
llama_print_timings:      sample time =    25.21 ms /    56 runs   (    0.45 ms per token,  2221.52 tokens per second)
llama_print_timings: prompt eval time =   626.25 ms /    68 tokens (    9.21 ms per token,   108.58 tokens per second)
llama_print_timings:        eval time =  4658.19 ms /    55 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  5400.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

To make progress in theoretical physics, you need a religious-like obsession that can easily lead you into the abyss.



llama_print_timings:        load time =   600.44 ms
llama_print_timings:      sample time =    11.79 ms /    26 runs   (    0.45 ms per token,  2205.82 tokens per second)
llama_print_timings: prompt eval time =   600.42 ms /    56 tokens (   10.72 ms per token,    93.27 tokens per second)
llama_print_timings:        eval time =  2170.89 ms /    25 runs   (   86.84 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  2826.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi gave Wang Miao an address when she said goodbye. “If you have time, please visit Yang Dong’s mother. Yang Dong has been living with her all these years and her daughter is everything to her now. She’s very pitiful.”



llama_print_timings:        load time =   634.46 ms
llama_print_timings:      sample time =    26.28 ms /    59 runs   (    0.45 ms per token,  2245.48 tokens per second)
llama_print_timings: prompt eval time =   634.43 ms /    76 tokens (    8.35 ms per token,   119.79 tokens per second)
llama_print_timings:        eval time =  4923.61 ms /    58 runs   (   84.89 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  5679.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao said, "Ding Yi, you know so much more than I do. Can't you just give me a little bit of information? You really believe in the fact that space-time is unevenly distributed?"



llama_print_timings:        load time =   623.00 ms
llama_print_timings:      sample time =    23.54 ms /    51 runs   (    0.46 ms per token,  2166.53 tokens per second)
llama_print_timings: prompt eval time =   622.96 ms /    63 tokens (    9.89 ms per token,   101.13 tokens per second)
llama_print_timings:        eval time =  4256.79 ms /    50 runs   (   85.14 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  4986.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know anything.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

They looked at each other for a long time before Ding Yi said, "That's a question."



llama_print_timings:        load time =   591.34 ms
llama_print_timings:      sample time =    11.15 ms /    24 runs   (    0.46 ms per token,  2152.08 tokens per second)
llama_print_timings: prompt eval time =   591.30 ms /    50 tokens (   11.83 ms per token,    84.56 tokens per second)
llama_print_timings:        eval time =  1982.55 ms /    23 runs   (   86.20 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2625.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Knowing that, Wang Miao simply accepted the British army colonel's words. It was a question of survival or death.



llama_print_timings:        load time =   596.57 ms
llama_print_timings:      sample time =    13.66 ms /    30 runs   (    0.46 ms per token,  2195.87 tokens per second)
llama_print_timings: prompt eval time =   596.53 ms /    53 tokens (   11.26 ms per token,    88.85 tokens per second)
llama_print_timings:        eval time =  2442.25 ms /    29 runs   (   84.22 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  3102.98 ms


translated 52.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The second day was a weekend, and Wang Miao got up very early to take his camera for a ride. As an avid photographer, he most yearned for wild wilderness themes, but after midlife, he had no more energy to pursue such luxury pleasures; Most of the time, he could only take pictures in urban landscapes. He purposely chose some corners of the city that gave off a wild atmosphere, like the dried-out lakes on park grounds, the newly unearthed soil at construction sites, and the emerging weeds peeking out from cement cracks. To eliminate the wasteful colours in the background of the city, he only used black-and-white film; Unexpectedly, this style of photography became a unique school, gradually becoming famous, with works selected for two large shows and joining the association of photographers. Every time he went out to take pictures, he rode his bike in the city at random, capturing inspiration and the compositions he needed. Sometimes he would turn for an entire day.



llama_print_timings:        load time =   871.73 ms
llama_print_timings:      sample time =   104.50 ms /   232 runs   (    0.45 ms per token,  2220.20 tokens per second)
llama_print_timings: prompt eval time =   871.69 ms /   210 tokens (    4.15 ms per token,   240.91 tokens per second)
llama_print_timings:        eval time = 20585.29 ms /   231 runs   (   89.11 ms per token,    11.22 tokens per second)
llama_print_timings:       total time = 21927.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Today, Wang Miao felt a bit strange. His photography was known for its calm and staid style, but today he found it difficult to find the sense of stability required to create such compositions. In his eyes, this emerging city seemed to be built on a river of sand, its stability being illusory. The two billiard balls that occupied him in the previous night kept flying aimlessly in his long dream and only revealed their existence occasionally by obscuring the white ball they were chasing after. The black ball was invisible at all times, appearing only when it briefly blocked the white one.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Is the underlying nature of matter really unsystematic? Is the stability and order of the world just a short-term dynamic equilibrium in the universe? Just a short-lived eddy in a chaotic flow?



llama_print_timings:        load time =   677.47 ms
llama_print_timings:      sample time =    19.80 ms /    45 runs   (    0.44 ms per token,  2272.50 tokens per second)
llama_print_timings: prompt eval time =   677.43 ms /    68 tokens (    9.96 ms per token,   100.38 tokens per second)
llama_print_timings:        eval time =  3890.24 ms /    44 runs   (   88.41 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  4654.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Unconsciously, he had ridden to the foot of the newly completed CCTV building. He stopped his bike and sat on the sidewalk, looking up at the A-shaped majestic structure that towered in the morning sunlight, trying to regain a sense of stability by following its pointed silhouette towards the deep blue sky. Suddenly, two words appeared in his mind: archer and farmer.



llama_print_timings:        load time =   694.17 ms
llama_print_timings:      sample time =    41.54 ms /    92 runs   (    0.45 ms per token,  2214.52 tokens per second)
llama_print_timings: prompt eval time =   694.13 ms /   120 tokens (    5.78 ms per token,   172.88 tokens per second)
llama_print_timings:        eval time =  7901.76 ms /    91 runs   (   86.83 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  8781.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When scholars discuss in the “scientific boundary”, they often use an abbreviation called SF, which is not a reference to science fiction, but to the two words that the previous paragraph was referencing. This stems from two hypotheses related to the nature of cosmic laws.



llama_print_timings:        load time =   627.43 ms
llama_print_timings:      sample time =    26.87 ms /    60 runs   (    0.45 ms per token,  2233.31 tokens per second)
llama_print_timings: prompt eval time =   627.40 ms /    74 tokens (    8.48 ms per token,   117.95 tokens per second)
llama_print_timings:        eval time =  5031.47 ms /    59 runs   (   85.28 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  5778.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The “Shooter Hypothesis” states that there is a legendary gunman who, on an empty target, will shoot holes in it every ten centimeters. The hypothetical alien scientists living on the surface of this target see their universe and discover the great equation: “There must be a hole at least ten centimeters away.” They turn the random act of the gunman into a law of their universe.



llama_print_timings:        load time =   680.01 ms
llama_print_timings:      sample time =    42.39 ms /    91 runs   (    0.47 ms per token,  2146.94 tokens per second)
llama_print_timings: prompt eval time =   679.97 ms /   111 tokens (    6.13 ms per token,   163.24 tokens per second)
llama_print_timings:        eval time =  7801.92 ms /    90 runs   (   86.69 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  8670.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Farmer's Delusion had a sinister undercurrent of terror: there was a flock of chickens on a farm, and the farmer would feed them every noon at eleven. A scientist who watched this behavior for nearly a year did not find any exception, so he also discovered one of great laws in his universe: "every day morning at eleven o'clock, food will descend." He announced it to the chickens on Thanksgiving morning and it didn't show up that morning when the farmer came to get them.



llama_print_timings:        load time =   742.52 ms
llama_print_timings:      sample time =    53.54 ms /   117 runs   (    0.46 ms per token,  2185.40 tokens per second)
llama_print_timings: prompt eval time =   742.48 ms /   132 tokens (    5.62 ms per token,   177.78 tokens per second)
llama_print_timings:        eval time = 10025.27 ms /   116 runs   (   86.42 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11007.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The ground felt like slippery sand under his feet, and the A-shaped building seemed to sway. He quickly looked away.



llama_print_timings:        load time =   607.99 ms
llama_print_timings:      sample time =    13.31 ms /    30 runs   (    0.44 ms per token,  2253.94 tokens per second)
llama_print_timings: prompt eval time =   607.94 ms /    58 tokens (   10.48 ms per token,    95.40 tokens per second)
llama_print_timings:        eval time =  2443.32 ms /    29 runs   (   84.25 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  3109.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Just to shake off the unease, Wang Miao forced himself to take a roll of film. Before returning home for lunch, his wife took her children out. Luncheon was not yet finished and he sat on a stool and went to sleep. Due to last night's lack of sleep, he felt very sleepy after waking up and it was almost 5 o'clock in the afternoon. When he remembered the film that he had taken during the morning, he quickly crawled into that small dark room, which was converted from a closet.



llama_print_timings:        load time =   756.13 ms
llama_print_timings:      sample time =    53.27 ms /   119 runs   (    0.45 ms per token,  2233.86 tokens per second)
llama_print_timings: prompt eval time =   756.08 ms /   144 tokens (    5.25 ms per token,   190.46 tokens per second)
llama_print_timings:        eval time = 10286.44 ms /   118 runs   (   87.17 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 11281.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The film developed quickly and he began to look at which images were worth enlarging into photographs. In the first image, he saw something unusual. The slide showed a small piece of grassland outside a large shopping mall, but he noticed an odd thing in it: l200：00：00.



llama_print_timings:        load time =   654.19 ms
llama_print_timings:      sample time =    31.35 ms /    69 runs   (    0.45 ms per token,  2201.10 tokens per second)
llama_print_timings: prompt eval time =   654.15 ms /    95 tokens (    6.89 ms per token,   145.23 tokens per second)
llama_print_timings:        eval time =  5835.19 ms /    68 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  6629.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The second negative film also has a number: ll99: 49: 33.



llama_print_timings:        load time =   598.36 ms
llama_print_timings:      sample time =    10.78 ms /    22 runs   (    0.49 ms per token,  2041.20 tokens per second)
llama_print_timings: prompt eval time =   598.32 ms /    47 tokens (   12.73 ms per token,    78.55 tokens per second)
llama_print_timings:        eval time =  1727.38 ms /    21 runs   (   82.26 ms per token,    12.16 tokens per second)
llama_print_timings:       total time =  2374.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Each roll of film has a small row of numbers on each sheet!



llama_print_timings:        load time =   600.03 ms
llama_print_timings:      sample time =     6.94 ms /    15 runs   (    0.46 ms per token,  2162.63 tokens per second)
llama_print_timings: prompt eval time =   600.00 ms /    47 tokens (   12.77 ms per token,    78.33 tokens per second)
llama_print_timings:        eval time =  1195.66 ms /    14 runs   (   85.40 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1826.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The third card is 1199:40:l8, the fourth is 1199:28:51, the sixth is 1199:l5:41 and so on. Then, the seventh card is 1199:07:38, eighth is 1198:53:09... The thirteenth card is 1193:36:23, the sixteenth is l194:50:49 and then, the last card is 1194:16:37.



llama_print_timings:        load time =   761.54 ms
llama_print_timings:      sample time =    61.08 ms /   135 runs   (    0.45 ms per token,  2210.14 tokens per second)
llama_print_timings: prompt eval time =   761.50 ms /   147 tokens (    5.18 ms per token,   193.04 tokens per second)
llama_print_timings:        eval time = 11544.37 ms /   134 runs   (   86.15 ms per token,    11.61 tokens per second)
llama_print_timings:       total time = 12579.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As soon as he thought of the problem, Wang Mu immediately remembered that the camera used by his grandfather was a Leica M2 with no automatic features and which cannot even rearrange its images in any way.



llama_print_timings:        load time =   675.97 ms
llama_print_timings:      sample time =    19.39 ms /    44 runs   (    0.44 ms per token,  2269.56 tokens per second)
llama_print_timings: prompt eval time =   675.93 ms /   100 tokens (    6.76 ms per token,   147.94 tokens per second)
llama_print_timings:        eval time =  3705.44 ms /    43 runs   (   86.17 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  4469.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he re-examined each slide, Wang Mu soon discovered the first paradox of these numbers: they automatically adjusted to their backgrounds. If the backdrop was black, the numbers were white; if the backdrop was white, the figures became black, as if for maximum contrast to facilitate visibility to the observer. When Wang Mu looked at Slide 16, his heart raced faster and he felt a cold draft along his spine rising up:



llama_print_timings:        load time =   683.68 ms
llama_print_timings:      sample time =    44.39 ms /    99 runs   (    0.45 ms per token,  2230.38 tokens per second)
llama_print_timings: prompt eval time =   683.65 ms /   111 tokens (    6.16 ms per token,   162.36 tokens per second)
llama_print_timings:        eval time =  8522.88 ms /    98 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  9406.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This photo is a withered tree against an old wall, the wall showing black and white colors. In such background, it is impossible to display clearly that the numbers would stand in a normal position, but they appear to be upright, bend themselves, and are displayed along with the dark trunk of the tree as if they were attached to it!



llama_print_timings:        load time =   691.85 ms
llama_print_timings:      sample time =    33.03 ms /    74 runs   (    0.45 ms per token,  2240.39 tokens per second)
llama_print_timings: prompt eval time =   691.81 ms /   113 tokens (    6.12 ms per token,   163.34 tokens per second)
llama_print_timings:        eval time =  6328.55 ms /    73 runs   (   86.69 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  7167.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As soon as he started studying the mathematical relations of these numbers, he initially thought they might be some kind of code, but the intervals between them were not the same. He quickly understood that these numbers represented the time elapsed in hours, minutes, and seconds. He took out his camera notes which detailed precisely the shooting times, down to the minute. He noticed that the difference between two shots on the roll of film corresponded with the actual duration of the elapsed time. Obviously, this roll of film recorded a speed up of time. Wang Miao immediately realized it was what.



llama_print_timings:        load time =   752.84 ms
llama_print_timings:      sample time =    54.03 ms /   120 runs   (    0.45 ms per token,  2220.95 tokens per second)
llama_print_timings: prompt eval time =   752.80 ms /   141 tokens (    5.34 ms per token,   187.30 tokens per second)
llama_print_timings:        eval time = 10314.01 ms /   119 runs   (   86.67 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 11311.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A countdown timer.



llama_print_timings:        load time =   570.89 ms
llama_print_timings:      sample time =     2.89 ms /     6 runs   (    0.48 ms per token,  2073.26 tokens per second)
llama_print_timings: prompt eval time =   570.85 ms /    34 tokens (   16.79 ms per token,    59.56 tokens per second)
llama_print_timings:        eval time =   436.11 ms /     5 runs   (   87.22 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  1019.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The countdown starts from 1200 hours and now there are still $1194$ hours left.



llama_print_timings:        load time =   587.91 ms
llama_print_timings:      sample time =    11.73 ms /    26 runs   (    0.45 ms per token,  2216.73 tokens per second)
llama_print_timings: prompt eval time =   587.87 ms /    49 tokens (   12.00 ms per token,    83.35 tokens per second)
llama_print_timings:        eval time =  2134.83 ms /    25 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2776.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No, it was at the moment I took the last roll of film. Does this countdown still continue?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

He took out a new black-and-white film and put it into his Leica camera. He took some random photos in the room before rushing back to the balcony for more exterior shots. Once finished, he took out the developed film from the camera and went straight back to the darkroom to process it. When the processed film was unveiled, he saw that a number's ghostly appearance appeared on each of the developed frames: 1187:27:39. The first frame on this roll is exactly three-four seconds after the last frame on the previous roll. From here on out, every third to fourth frame will have a time delay of three-four seconds.



llama_print_timings:        load time =   881.70 ms
llama_print_timings:      sample time =    67.75 ms /   150 runs   (    0.45 ms per token,  2214.02 tokens per second)
llama_print_timings: prompt eval time =   881.66 ms /   212 tokens (    4.16 ms per token,   240.46 tokens per second)
llama_print_timings:        eval time = 13112.61 ms /   149 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 14305.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The countdown is still ongoing.



llama_print_timings:        load time =   580.61 ms
llama_print_timings:      sample time =     4.24 ms /     9 runs   (    0.47 ms per token,  2120.64 tokens per second)
llama_print_timings: prompt eval time =   580.57 ms /    35 tokens (   16.59 ms per token,    60.29 tokens per second)
llama_print_timings:        eval time =   652.02 ms /     8 runs   (   81.50 ms per token,    12.27 tokens per second)
llama_print_timings:       total time =  1250.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao once again put a new film into the camera and took some wild photos, and there were several which he intentionally left with exposed planes to record the moments.



llama_print_timings:        load time =   614.29 ms
llama_print_timings:      sample time =    16.62 ms /    38 runs   (    0.44 ms per token,  2286.68 tokens per second)
llama_print_timings: prompt eval time =   614.25 ms /    60 tokens (   10.24 ms per token,    97.68 tokens per second)
llama_print_timings:        eval time =  3138.35 ms /    37 runs   (   84.82 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  3825.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When he took out the finished roll of film, his wife and child returned. Before putting in the next film, he gave the camera to his wife: "Come on, take this last roll."



llama_print_timings:        load time =   634.79 ms
llama_print_timings:      sample time =    18.22 ms /    41 runs   (    0.44 ms per token,  2249.78 tokens per second)
llama_print_timings: prompt eval time =   634.74 ms /    75 tokens (    8.46 ms per token,   118.16 tokens per second)
llama_print_timings:        eval time =  3346.90 ms /    40 runs   (   83.67 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  4063.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What are you taking pictures of? Your wife was surprised at her husband's camera. She and her son had never been interested in it, so they saw it as an outdated machine with a price tag of almost two thousand yuan.



llama_print_timings:        load time =   636.01 ms
llama_print_timings:      sample time =    23.08 ms /    50 runs   (    0.46 ms per token,  2166.75 tokens per second)
llama_print_timings: prompt eval time =   635.97 ms /    76 tokens (    8.37 ms per token,   119.50 tokens per second)
llama_print_timings:        eval time =  4121.74 ms /    49 runs   (   84.12 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  4860.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Everything is fine, just take a picture.



llama_print_timings:        load time =   602.87 ms
llama_print_timings:      sample time =     4.93 ms /    11 runs   (    0.45 ms per token,  2232.60 tokens per second)
llama_print_timings: prompt eval time =   602.82 ms /    54 tokens (   11.16 ms per token,    89.58 tokens per second)
llama_print_timings:        eval time =   846.26 ms /    10 runs   (   84.63 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1470.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Okay, Beibei, let's take a picture." The wife moved the camera to her son.



llama_print_timings:        load time =   613.85 ms
llama_print_timings:      sample time =    11.29 ms /    25 runs   (    0.45 ms per token,  2214.94 tokens per second)
llama_print_timings: prompt eval time =   613.81 ms /    49 tokens (   12.53 ms per token,    79.83 tokens per second)
llama_print_timings:        eval time =  2017.89 ms /    24 runs   (   84.08 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  2681.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

His mind suddenly flashed a vision of ghostly figures swirling like an open loosening noose in front of his son's face, and he shivered slightly. “No, don't photograph your son. Take some other pictures instead.”



llama_print_timings:        load time =   697.00 ms
llama_print_timings:      sample time =    25.38 ms /    57 runs   (    0.45 ms per token,  2246.22 tokens per second)
llama_print_timings: prompt eval time =   696.97 ms /    83 tokens (    8.40 ms per token,   119.09 tokens per second)
llama_print_timings:        eval time =  4777.47 ms /    56 runs   (   85.31 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5587.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"咔嚓" went the shutter, and then her wife said, "What's wrong with this?" She called out to Mankuo, who entered a dark closet. 



llama_print_timings:        load time =   624.34 ms
llama_print_timings:      sample time =    17.90 ms /    40 runs   (    0.45 ms per token,  2234.64 tokens per second)
llama_print_timings: prompt eval time =   624.30 ms /    79 tokens (    7.90 ms per token,   126.54 tokens per second)
llama_print_timings:        eval time =  3258.49 ms /    39 runs   (   83.55 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  3966.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's really a hassle." As the wife of a doctor, she couldn't understand why people still use such outdated and expensive equipment to take photos, and those photos are black and white too.



llama_print_timings:        load time =   626.30 ms
llama_print_timings:      sample time =    20.59 ms /    46 runs   (    0.45 ms per token,  2234.53 tokens per second)
llama_print_timings: prompt eval time =   626.26 ms /    68 tokens (    9.21 ms per token,   108.58 tokens per second)
llama_print_timings:        eval time =  3869.09 ms /    45 runs   (   85.98 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  4587.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the film was developed, Wang Miao saw that the ghostly countdown continued, on a chaotic set of photos including those with the lens cover that clearly showed off: 1l87:19:06, 1187:19:03, 1187:l8:59 and 1187:18:56.



llama_print_timings:        load time =   705.96 ms
llama_print_timings:      sample time =    39.20 ms /    87 runs   (    0.45 ms per token,  2219.61 tokens per second)
llama_print_timings: prompt eval time =   705.91 ms /   123 tokens (    5.74 ms per token,   174.24 tokens per second)
llama_print_timings:        eval time =  7437.64 ms /    86 runs   (   86.48 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  8322.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She knocked on the door twice, telling him that the filming was complete. As he grabbed his camera and took out a roll of film, he was obviously shaking in hand. Disregarding his wife's odd look, he returned to the dark room, shutting it firmly behind himself. He was very busy, spilling development fluid, fixer on the floor, and the film rolled out quickly. He closed his eyes silently, hoping that something bad does not happen...



llama_print_timings:        load time =   736.83 ms
llama_print_timings:      sample time =    47.17 ms /   104 runs   (    0.45 ms per token,  2204.74 tokens per second)
llama_print_timings: prompt eval time =   736.79 ms /   135 tokens (    5.46 ms per token,   183.23 tokens per second)
llama_print_timings:        eval time =  8940.96 ms /   103 runs   (   86.81 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  9894.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He looked through the magnifying glass at the wet and wet film, which disappeared gradually, leaving only his wife's indoor photos in the low-speed aperture, taken with not very professional operation. But Wang Meng thought it was the most beautiful photo he had ever seen.



llama_print_timings:        load time =   668.14 ms
llama_print_timings:      sample time =    26.85 ms /    60 runs   (    0.45 ms per token,  2234.72 tokens per second)
llama_print_timings: prompt eval time =   668.11 ms /    93 tokens (    7.18 ms per token,   139.20 tokens per second)
llama_print_timings:        eval time =  5204.39 ms /    59 runs   (   88.21 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  5993.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao stepped out of the dark room, breathed a sigh of relief and found that he had already become wet with sweat. His wife had gone into the kitchen to cook, while his son had gone into his own room to play. He sat alone on the sofa, began to calmly think for a while.



llama_print_timings:        load time =   636.23 ms
llama_print_timings:      sample time =    31.53 ms /    70 runs   (    0.45 ms per token,  2220.04 tokens per second)
llama_print_timings: prompt eval time =   636.19 ms /    76 tokens (    8.37 ms per token,   119.46 tokens per second)
llama_print_timings:        eval time =  5957.83 ms /    69 runs   (   86.35 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  6740.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

First, these shots record time passing precisely at different intervals and display signs of intelligent behavior, which cannot be recorded on film. This can only be a force to cause light sensitivity in the camera, which will be what? Is it a camera problem? Or was there an unintentional device placed in the camera? He removes the lens and disassembles the camera to examine every flawless, shiny machine part carefully using magnifying glass, but he does not find any abnormalities. So, most likely, the source of sensitization is something strong enough to penetrate through the light from outside, which in technical terms is also impossible: where is the source of the rays? How are they aimed?



llama_print_timings:        load time =   816.10 ms
llama_print_timings:      sample time =    68.75 ms /   154 runs   (    0.45 ms per token,  2239.93 tokens per second)
llama_print_timings: prompt eval time =   816.05 ms /   175 tokens (    4.66 ms per token,   214.45 tokens per second)
llama_print_timings:        eval time = 13322.46 ms /   153 runs   (   87.07 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 14453.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At least in the current state of technology, this power is paranormal.



llama_print_timings:        load time =   583.85 ms
llama_print_timings:      sample time =     7.48 ms /    17 runs   (    0.44 ms per token,  2272.73 tokens per second)
llama_print_timings: prompt eval time =   583.81 ms /    42 tokens (   13.90 ms per token,    71.94 tokens per second)
llama_print_timings:        eval time =  1350.10 ms /    16 runs   (   84.38 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  1966.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He then loaded a roll of film into his Leica camera and started taking random shots. When the roll was developed, he calmed down for a moment only to be thrust back on the edge of madness: the countdown timer was still present in the photos shown, indicating that it had never stopped running; just like his wife's shot, it had simply not been shown.



llama_print_timings:        load time =   696.29 ms
llama_print_timings:      sample time =    35.39 ms /    80 runs   (    0.44 ms per token,  2260.33 tokens per second)
llama_print_timings: prompt eval time =   696.25 ms /   116 tokens (    6.00 ms per token,   166.61 tokens per second)
llama_print_timings:        eval time =  6717.54 ms /    79 runs   (   85.03 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  7575.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The number 1186 is a prime number, meaning that it can be evenly divided by only one other number—itself.



llama_print_timings:        load time =   632.25 ms
llama_print_timings:      sample time =    14.55 ms /    30 runs   (    0.49 ms per token,  2061.15 tokens per second)
llama_print_timings: prompt eval time =   632.22 ms /    73 tokens (    8.66 ms per token,   115.47 tokens per second)
llama_print_timings:        eval time =  2488.84 ms /    29 runs   (   85.82 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3186.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mochong bursts out of the dark room, bursts out of his door, and knocks on the door of his neighbor. The open-door man is professor Zhang, who is now retired.



llama_print_timings:        load time =   636.70 ms
llama_print_timings:      sample time =    20.31 ms /    46 runs   (    0.44 ms per token,  2264.67 tokens per second)
llama_print_timings: prompt eval time =   636.66 ms /    56 tokens (   11.37 ms per token,    87.96 tokens per second)
llama_print_timings:        eval time =  3820.60 ms /    45 runs   (   84.90 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  4550.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Old Zhang, do you have a camera? Oh, not digital one, it needs to use the film! ”



llama_print_timings:        load time =   592.35 ms
llama_print_timings:      sample time =    11.85 ms /    26 runs   (    0.46 ms per token,  2194.46 tokens per second)
llama_print_timings: prompt eval time =   592.31 ms /    50 tokens (   11.85 ms per token,    84.41 tokens per second)
llama_print_timings:        eval time =  2074.21 ms /    25 runs   (   82.97 ms per token,    12.05 tokens per second)
llama_print_timings:       total time =  2720.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Hey, you the big photographer borrow my camera? That 20 thousand yuan's broken? I only have a digital one... Don't you feel uncomfortable? Your face is looking so miserable.”



llama_print_timings:        load time =   602.73 ms
llama_print_timings:      sample time =    22.43 ms /    50 runs   (    0.45 ms per token,  2229.06 tokens per second)
llama_print_timings: prompt eval time =   602.69 ms /    60 tokens (   10.04 ms per token,    99.55 tokens per second)
llama_print_timings:        eval time =  4140.42 ms /    49 runs   (   84.50 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  4845.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Here you are, " said Old Zhang. He handed me a very ordinary Kodak digital camera quickly.



llama_print_timings:        load time =   752.75 ms
llama_print_timings:      sample time =    20.90 ms /    25 runs   (    0.84 ms per token,  1196.40 tokens per second)
llama_print_timings: prompt eval time =   752.67 ms /    47 tokens (   16.01 ms per token,    62.44 tokens per second)
llama_print_timings:        eval time =  2312.76 ms /    24 runs   (   96.36 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =  3158.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sure, just delete the ones inside.



llama_print_timings:        load time =   586.45 ms
llama_print_timings:      sample time =     4.37 ms /    10 runs   (    0.44 ms per token,  2289.90 tokens per second)
llama_print_timings: prompt eval time =   586.41 ms /    39 tokens (   15.04 ms per token,    66.51 tokens per second)
llama_print_timings:        eval time =   794.96 ms /     9 runs   (   88.33 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  1401.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I think we should use the camera, because it's more reliable.” （我认为我们应该用那台相机，因为它更可靠些。）



llama_print_timings:        load time =   682.43 ms
llama_print_timings:      sample time =    14.92 ms /    33 runs   (    0.45 ms per token,  2211.65 tokens per second)
llama_print_timings: prompt eval time =   682.38 ms /   115 tokens (    5.93 ms per token,   168.53 tokens per second)
llama_print_timings:        eval time =  2756.99 ms /    32 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3504.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Quickly, take some photos just like before!



llama_print_timings:        load time =   607.47 ms
llama_print_timings:      sample time =     5.38 ms /    12 runs   (    0.45 ms per token,  2228.41 tokens per second)
llama_print_timings: prompt eval time =   607.43 ms /    40 tokens (   15.19 ms per token,    65.85 tokens per second)
llama_print_timings:        eval time =   878.98 ms /    11 runs   (   79.91 ms per token,    12.51 tokens per second)
llama_print_timings:       total time =  1510.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What's this for? Look at your face... what's wrong with you?!" The wife is horrified as she looks at him.



llama_print_timings:        load time =   595.81 ms
llama_print_timings:      sample time =    14.72 ms /    33 runs   (    0.45 ms per token,  2241.54 tokens per second)
llama_print_timings: prompt eval time =   595.77 ms /    53 tokens (   11.24 ms per token,    88.96 tokens per second)
llama_print_timings:        eval time =  2630.77 ms /    32 runs   (   82.21 ms per token,    12.16 tokens per second)
llama_print_timings:       total time =  3291.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Don't worry, take a picture!”



llama_print_timings:        load time =   590.25 ms
llama_print_timings:      sample time =     5.00 ms /    11 runs   (    0.45 ms per token,  2201.76 tokens per second)
llama_print_timings: prompt eval time =   590.22 ms /    37 tokens (   15.95 ms per token,    62.69 tokens per second)
llama_print_timings:        eval time =   890.12 ms /    10 runs   (   89.01 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  1502.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The wife put down the plate in her hand and looked at her husband with shock and apprehension in her eyes.



llama_print_timings:        load time =   601.28 ms
llama_print_timings:      sample time =    11.32 ms /    25 runs   (    0.45 ms per token,  2208.68 tokens per second)
llama_print_timings: prompt eval time =   601.26 ms /    50 tokens (   12.03 ms per token,    83.16 tokens per second)
llama_print_timings:        eval time =  2100.66 ms /    24 runs   (   87.53 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  2752.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mu put a Kodak camera into his six-year-old son's hand, “Peanut, help your dad take the picture. Just press it once here, this is a photo; and again, this is another photo. Take more pictures like this, at whatever you want.”



llama_print_timings:        load time =   657.67 ms
llama_print_timings:      sample time =    29.18 ms /    65 runs   (    0.45 ms per token,  2227.93 tokens per second)
llama_print_timings: prompt eval time =   657.64 ms /    85 tokens (    7.74 ms per token,   129.25 tokens per second)
llama_print_timings:        eval time =  5668.82 ms /    64 runs   (   88.58 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  6459.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Son quickly grasped, small guy is very interested in, shot very fast. Wang Mu turned from the sofa and picked up his leica, also shot, mother so desperately out of question, tears rolled out.



llama_print_timings:        load time =   690.33 ms
llama_print_timings:      sample time =    22.04 ms /    49 runs   (    0.45 ms per token,  2223.23 tokens per second)
llama_print_timings: prompt eval time =   690.28 ms /    92 tokens (    7.50 ms per token,   133.28 tokens per second)
llama_print_timings:        eval time =  4250.19 ms /    48 runs   (   88.55 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  5040.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Wang Miao, I know you have been under a lot of pressure at work recently. Don't...”



llama_print_timings:        load time =   611.89 ms
llama_print_timings:      sample time =    12.12 ms /    26 runs   (    0.47 ms per token,  2145.04 tokens per second)
llama_print_timings: prompt eval time =   611.85 ms /    48 tokens (   12.75 ms per token,    78.45 tokens per second)
llama_print_timings:        eval time =  2259.66 ms /    25 runs   (   90.39 ms per token,    11.06 tokens per second)
llama_print_timings:       total time =  2925.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao finished the roll of film for his Leica camera and grabbed back the digital camera from his child. He thought about it before retreating to the bedroom. Since he was afraid of his wife and children's distraction, he took several pictures using the eye-level viewfinder without a display screen, even if he would have to watch the result eventually.



llama_print_timings:        load time =   699.38 ms
llama_print_timings:      sample time =    36.20 ms /    81 runs   (    0.45 ms per token,  2237.69 tokens per second)
llama_print_timings: prompt eval time =   699.34 ms /    95 tokens (    7.36 ms per token,   135.84 tokens per second)
llama_print_timings:        eval time =  7148.68 ms /    80 runs   (   89.36 ms per token,    11.19 tokens per second)
llama_print_timings:       total time =  8011.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao took out the roll of film and locked it in the darkroom. After processing, he looked at the bottom plate carefully and used both hands to hold up the magnifier—the countdown for ghosts continued on the plate.



llama_print_timings:        load time =   682.69 ms
llama_print_timings:      sample time =    23.05 ms /    51 runs   (    0.45 ms per token,  2212.49 tokens per second)
llama_print_timings: prompt eval time =   682.65 ms /    87 tokens (    7.85 ms per token,   127.44 tokens per second)
llama_print_timings:        eval time =  4355.51 ms /    50 runs   (   87.11 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  5143.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mochong came out of the dark room and started checking the digital photos on the camera's LCD screen. From the display, he saw that when his son took the photograph, there was no countdown timer appearing; however, in his own photos, it clearly appeared with the sync change on the film.



llama_print_timings:        load time =   690.51 ms
llama_print_timings:      sample time =    29.96 ms /    67 runs   (    0.45 ms per token,  2236.24 tokens per second)
llama_print_timings: prompt eval time =   690.49 ms /    87 tokens (    7.94 ms per token,   126.00 tokens per second)
llama_print_timings:        eval time =  5884.06 ms /    66 runs   (   89.15 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  6709.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Miao used different cameras to take pictures, in order to eliminate the possibility of camera or film failure. But unintentionally letting his child take the photos with him after using the camera, plus using it beforehand for himself and for his wife to take pictures, he obtained a more peculiar result: The countdown would only show up on his photographs when using different cameras and films.



llama_print_timings:        load time =   692.57 ms
llama_print_timings:      sample time =    38.12 ms /    85 runs   (    0.45 ms per token,  2229.98 tokens per second)
llama_print_timings: prompt eval time =   692.53 ms /    94 tokens (    7.37 ms per token,   135.73 tokens per second)
llama_print_timings:        eval time =  7483.68 ms /    84 runs   (   89.09 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  8349.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He desperately grabbed the roll of film like a bunch of snakes entangled together, or a knot that he couldn't unravel.



llama_print_timings:        load time =   634.42 ms
llama_print_timings:      sample time =    17.15 ms /    36 runs   (    0.48 ms per token,  2098.76 tokens per second)
llama_print_timings: prompt eval time =   634.39 ms /    63 tokens (   10.07 ms per token,    99.31 tokens per second)
llama_print_timings:        eval time =  3103.97 ms /    35 runs   (   88.68 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  3817.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He knew that he couldn't solve the problem by himself. So, where should he go? His colleagues in universities and research institutes are not a good choice. They are no different from him - technical minds only. He had a feeling that this issue transcended technology. He thought of Ting Yi, but now she is also caught up in a mental crisis. Finally, he remembered the scientific boundary group which was composed of deep thinkers and active people.



llama_print_timings:        load time =   692.69 ms
llama_print_timings:      sample time =    44.32 ms /    99 runs   (    0.45 ms per token,  2233.80 tokens per second)
llama_print_timings: prompt eval time =   692.65 ms /   110 tokens (    6.30 ms per token,   158.81 tokens per second)
llama_print_timings:        eval time =  8425.56 ms /    98 runs   (   85.98 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  9321.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, he dialed Shen Yufei's phone.



llama_print_timings:        load time =   585.73 ms
llama_print_timings:      sample time =     6.75 ms /    15 runs   (    0.45 ms per token,  2221.56 tokens per second)
llama_print_timings: prompt eval time =   585.70 ms /    41 tokens (   14.29 ms per token,    70.00 tokens per second)
llama_print_timings:        eval time =  1192.98 ms /    14 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  1807.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Doctor, I have something important that needs to be handled.



llama_print_timings:        load time =   599.60 ms
llama_print_timings:      sample time =     7.14 ms /    15 runs   (    0.48 ms per token,  2099.66 tokens per second)
llama_print_timings: prompt eval time =   599.56 ms /    54 tokens (   11.10 ms per token,    90.07 tokens per second)
llama_print_timings:        eval time =  1228.90 ms /    14 runs   (   87.78 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  1862.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She just said those two words and hung up the phone.



llama_print_timings:        load time =   592.14 ms
llama_print_timings:      sample time =     7.10 ms /    13 runs   (    0.55 ms per token,  1831.24 tokens per second)
llama_print_timings: prompt eval time =   592.10 ms /    47 tokens (   12.60 ms per token,    79.38 tokens per second)
llama_print_timings:        eval time =  1050.14 ms /    12 runs   (   87.51 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1677.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufei was surprised when Wang Moxiong ate an alarm after her usual manner of speaking, which resulted in some people calling her the female Ernest Hemingway. This time, she didn't even ask what happened and Wang Moxiong felt both relieved and more worried at the same time.



llama_print_timings:        load time =   667.22 ms
llama_print_timings:      sample time =    31.28 ms /    69 runs   (    0.45 ms per token,  2206.02 tokens per second)
llama_print_timings: prompt eval time =   667.18 ms /    87 tokens (    7.67 ms per token,   130.40 tokens per second)
llama_print_timings:        eval time =  5914.68 ms /    68 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  6726.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He stuffed the roll of film into a bag and carried his digital camera along with him as he stepped out of the doorway, facing his wife's worrying glances. Instead of driving himself to the location, he hailed a taxi, eager to be accompanied on this night in the shining city.



llama_print_timings:        load time =   650.32 ms
llama_print_timings:      sample time =    29.83 ms /    66 runs   (    0.45 ms per token,  2212.54 tokens per second)
llama_print_timings: prompt eval time =   650.27 ms /    84 tokens (    7.74 ms per token,   129.18 tokens per second)
llama_print_timings:        eval time =  5676.95 ms /    65 runs   (   87.34 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  6461.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufei lives in a high-end villa district near the new ironclad line. The lights here are sparse, and there are several fishing ponds surrounded by luxurious villas. At night, there is a rural feeling here. Shen Yufei clearly has a lot of money, but Wang Meng does not know where she gets it from. Her former job and current position in the company cannot bring her as much money. However, there are no luxury marks in her villa, which is a gathering place of "scientific boundaries." The furnishings of that place look like a small library with a meeting room.



llama_print_timings:        load time =   738.29 ms
llama_print_timings:      sample time =    61.67 ms /   137 runs   (    0.45 ms per token,  2221.65 tokens per second)
llama_print_timings: prompt eval time =   738.25 ms /   131 tokens (    5.64 ms per token,   177.45 tokens per second)
llama_print_timings:        eval time = 11907.47 ms /   136 runs   (   87.55 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 12928.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the living room, Wang Miao saw Xin Yufei's husband Wei Cheng. This forty-something man, who had a sturdy scholarly appearance, was only known by his name. Shen Yufei introduced him when she said that he didn't have a job and spent most of his days at home, and was also used to the frequent visits of scholars.



llama_print_timings:        load time =   703.92 ms
llama_print_timings:      sample time =    38.52 ms /    86 runs   (    0.45 ms per token,  2232.37 tokens per second)
llama_print_timings: prompt eval time =   703.87 ms /   112 tokens (    6.28 ms per token,   159.12 tokens per second)
llama_print_timings:        eval time =  7466.72 ms /    85 runs   (   87.84 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  8345.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, he was not doing nothing, apparently studying something in his home, and greeting anyone with a half-hearted salute when meeting them. Then returning to his room on the first floor where he spent most of his day. One day, Wang Mo accidentally peered inside from the semi-open doorway and saw an amazing thing: a HP small machine. He couldn't be mistaken; it was exactly what they had at the superconductor research center four years ago, with a black gray casing and the RX8620 chip. Putting that million-dollar device in his home seemed strange because Wei Cheng was working alone on it every day.



llama_print_timings:        load time =   835.09 ms
llama_print_timings:      sample time =    64.92 ms /   145 runs   (    0.45 ms per token,  2233.48 tokens per second)
llama_print_timings: prompt eval time =   835.04 ms /   167 tokens (    5.00 ms per token,   199.99 tokens per second)
llama_print_timings:        eval time = 12696.17 ms /   144 runs   (   88.17 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 13829.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yu Fei has something to do upstairs, please wait a moment,” said Wei Cheng and went up the stairs.



llama_print_timings:        load time =   622.52 ms
llama_print_timings:      sample time =    13.75 ms /    30 runs   (    0.46 ms per token,  2182.45 tokens per second)
llama_print_timings: prompt eval time =   622.50 ms /    52 tokens (   11.97 ms per token,    83.53 tokens per second)
llama_print_timings:        eval time =  2584.40 ms /    29 runs   (   89.12 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  3270.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He had planned to wait, but he couldn't resist the urge and went upstairs. When he saw Wei Chengjing about to go into his room with a miniature computer. He seemed not displeased when he saw Wang Moben also came up to him, pointed at an empty room nearby, and said, "In that room. You can find her there."


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

When Wang Miao knocked on the door, it wasn't locked and he saw that Xin Yufei was sitting in front of a computer playing games. What surprised him most was that she was wearing an "virtual gear", which is currently very popular among game players. It consisted of a full-spectrum display helmet and a set of perception suits, whereby the player could feel the hits, stabbings, and fires in the games, as well as cool or cold temperatures. Even more realistic simulation was possible, such as experiencing exposure to wind and snow. At this time, Wang Miao remember what Dashi asked him to do: to remember the website address and e-mail address. He unconsciously looked at the monitor for a while and saw that the name of the game in full view on the screen was very unique.



llama_print_timings:        load time =   893.01 ms
llama_print_timings:      sample time =    96.55 ms /   184 runs   (    0.52 ms per token,  1905.79 tokens per second)
llama_print_timings: prompt eval time =   892.96 ms /   197 tokens (    4.53 ms per token,   220.61 tokens per second)
llama_print_timings:        eval time = 16627.98 ms /   183 runs   (   90.86 ms per token,    11.01 tokens per second)
llama_print_timings:       total time = 17967.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$"What happened to you?"



llama_print_timings:        load time =   858.64 ms
llama_print_timings:      sample time =     3.56 ms /     8 runs   (    0.44 ms per token,  2249.09 tokens per second)
llama_print_timings: prompt eval time =   858.60 ms /   181 tokens (    4.74 ms per token,   210.81 tokens per second)
llama_print_timings:        eval time =   628.82 ms /     7 runs   (   89.83 ms per token,    11.13 tokens per second)
llama_print_timings:       total time =  1503.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How is your leadership of the nanotechnology project going?”



llama_print_timings:        load time =   611.29 ms
llama_print_timings:      sample time =     6.88 ms /    15 runs   (    0.46 ms per token,  2179.28 tokens per second)
llama_print_timings: prompt eval time =   611.26 ms /    39 tokens (   15.67 ms per token,    63.80 tokens per second)
llama_print_timings:        eval time =  1252.21 ms /    14 runs   (   89.44 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  1894.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He was very surprised by this unrelated problem. "The nanotechnology project? What does it have to do with that?" He pointed at the roll of film.



llama_print_timings:        load time =   622.10 ms
llama_print_timings:      sample time =    15.95 ms /    36 runs   (    0.44 ms per token,  2256.63 tokens per second)
llama_print_timings: prompt eval time =   622.06 ms /    62 tokens (   10.03 ms per token,    99.67 tokens per second)
llama_print_timings:        eval time =  3049.92 ms /    35 runs   (   87.14 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  3742.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She didn't speak, but just looked at him quietly waiting for his answer to her question. That was how she had talked in the past. She rarely spoke.



llama_print_timings:        load time =   629.79 ms
llama_print_timings:      sample time =    16.07 ms /    35 runs   (    0.46 ms per token,  2177.29 tokens per second)
llama_print_timings: prompt eval time =   629.75 ms /    58 tokens (   10.86 ms per token,    92.10 tokens per second)
llama_print_timings:        eval time =  2899.97 ms /    34 runs   (   85.29 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  3601.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Stop the research,” said Xin Yufei.



llama_print_timings:        load time =   591.81 ms
llama_print_timings:      sample time =     6.02 ms /    13 runs   (    0.46 ms per token,  2160.19 tokens per second)
llama_print_timings: prompt eval time =   591.77 ms /    40 tokens (   14.79 ms per token,    67.59 tokens per second)
llama_print_timings:        eval time =  1048.78 ms /    12 runs   (   87.40 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  1666.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I mean, what’s the point?” 汪淼解释道。 “The point is that I don't see the point.”



llama_print_timings:        load time =   627.66 ms
llama_print_timings:      sample time =    14.14 ms /    32 runs   (    0.44 ms per token,  2263.56 tokens per second)
llama_print_timings: prompt eval time =   627.62 ms /    46 tokens (   13.64 ms per token,    73.29 tokens per second)
llama_print_timings:        eval time =  2707.16 ms /    31 runs   (   87.33 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3398.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufei remained silent and didn't repeat her own words.



llama_print_timings:        load time =   605.63 ms
llama_print_timings:      sample time =     7.57 ms /    17 runs   (    0.45 ms per token,  2246.60 tokens per second)
llama_print_timings: prompt eval time =   605.61 ms /    40 tokens (   15.14 ms per token,    66.05 tokens per second)
llama_print_timings:        eval time =  1392.21 ms /    16 runs   (   87.01 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  2030.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Stop it?! That's a national key project!”



llama_print_timings:        load time =   604.21 ms
llama_print_timings:      sample time =     6.25 ms /    14 runs   (    0.45 ms per token,  2238.21 tokens per second)
llama_print_timings: prompt eval time =   604.19 ms /    39 tokens (   15.49 ms per token,    64.55 tokens per second)
llama_print_timings:        eval time =  1103.39 ms /    13 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1734.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufei still didn't speak, just looked at him, with a calm expression in her eyes.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

You have to say the reason!



llama_print_timings:        load time =   601.88 ms
llama_print_timings:      sample time =     3.62 ms /     8 runs   (    0.45 ms per token,  2209.94 tokens per second)
llama_print_timings: prompt eval time =   601.83 ms /    38 tokens (   15.84 ms per token,    63.14 tokens per second)
llama_print_timings:        eval time =   627.02 ms /     7 runs   (   89.57 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  1245.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Take a break and try.”



llama_print_timings:        load time =   611.74 ms
llama_print_timings:      sample time =     3.64 ms /     8 runs   (    0.45 ms per token,  2199.01 tokens per second)
llama_print_timings: prompt eval time =   611.69 ms /    34 tokens (   17.99 ms per token,    55.58 tokens per second)
llama_print_timings:        eval time =   638.81 ms /     7 runs   (   91.26 ms per token,    10.96 tokens per second)
llama_print_timings:       total time =  1266.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Tell me what you know!”



llama_print_timings:        load time =   582.63 ms
llama_print_timings:      sample time =     3.96 ms /     9 runs   (    0.44 ms per token,  2273.88 tokens per second)
llama_print_timings: prompt eval time =   582.61 ms /    38 tokens (   15.33 ms per token,    65.22 tokens per second)
llama_print_timings:        eval time =   662.45 ms /     8 runs   (   82.81 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =  1262.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I can only tell you these.”



llama_print_timings:        load time =   579.88 ms
llama_print_timings:      sample time =     4.01 ms /     9 runs   (    0.45 ms per token,  2244.39 tokens per second)
llama_print_timings: prompt eval time =   579.86 ms /    38 tokens (   15.26 ms per token,    65.53 tokens per second)
llama_print_timings:        eval time =   666.68 ms /     8 runs   (   83.34 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  1264.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The project cannot stop, and it is impossible to stop.”



llama_print_timings:        load time =   580.83 ms
llama_print_timings:      sample time =     6.22 ms /    14 runs   (    0.44 ms per token,  2250.44 tokens per second)
llama_print_timings: prompt eval time =   580.78 ms /    39 tokens (   14.89 ms per token,    67.15 tokens per second)
llama_print_timings:        eval time =  1129.83 ms /    13 runs   (   86.91 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1738.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Take a break and try.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

That's all there is to say about the ghost countdown. After that, Shen Yufei didn't say anything else related to it, just repeating the same sentence, "Let's pause for a moment and try."



llama_print_timings:        load time =   628.99 ms
llama_print_timings:      sample time =    22.79 ms /    51 runs   (    0.45 ms per token,  2237.82 tokens per second)
llama_print_timings: prompt eval time =   628.90 ms /    74 tokens (    8.50 ms per token,   117.67 tokens per second)
llama_print_timings:        eval time =  4409.40 ms /    50 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  5142.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I see now that ‘Science Boundaries’ is not the academic exchange organization as you claim, but it has a much more complex relationship with reality than I had imagined.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

On the contrary, you get this impression because ‘scientific boundary’ involves things that are simpler than what you imagine.



llama_print_timings:        load time =   621.60 ms
llama_print_timings:      sample time =    11.65 ms /    26 runs   (    0.45 ms per token,  2232.53 tokens per second)
llama_print_timings: prompt eval time =   621.57 ms /    51 tokens (   12.19 ms per token,    82.05 tokens per second)
llama_print_timings:        eval time =  2157.09 ms /    25 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  2831.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The desperate Wang Mole didn't leave without saying a word, and Xin Yufei silently watched him all the way to the gate of the courtyard. As she did this, another car came rushing towards them, which stopped in front of the gate. The man got out from the car and looked into the villa with the lights that were leaking out through the windows. Wang Mole recognised him with only one glance.



llama_print_timings:        load time =   671.75 ms
llama_print_timings:      sample time =    42.67 ms /    94 runs   (    0.45 ms per token,  2202.90 tokens per second)
llama_print_timings: prompt eval time =   671.70 ms /   100 tokens (    6.72 ms per token,   148.88 tokens per second)
llama_print_timings:        eval time =  8173.15 ms /    93 runs   (   87.88 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  9038.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This person is Pan Han, one of the most famous figures in "Science Boundaries." As a biologist, he predicted that eating genetically modified food would cause descendants to be born with heritable birth defects and predicted an ecological disaster caused by genetically modified crops. Unlike those empty-headed scholars who use doomsday phrases, his predictions were full of details and were all proven accurate at a shocking degree. There are even rumors that he comes from the future.



llama_print_timings:        load time =   704.33 ms
llama_print_timings:      sample time =    50.24 ms /   113 runs   (    0.44 ms per token,  2249.02 tokens per second)
llama_print_timings: prompt eval time =   704.28 ms /   115 tokens (    6.12 ms per token,   163.29 tokens per second)
llama_print_timings:        eval time =  9737.43 ms /   112 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 10668.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He made another groundbreaking creation, which was the establishment of China's first experimental community. Unlike those Western utopian communities that sought to return to nature, his "Chinese pastoralism" was not set in a remote wilderness but rather in the biggest city in China. The experimental community had no property at all, including food and daily necessities, and it received them from garbage. While people's initial expectation was different, "Chinese pastoralism" survived and flourished, with its permanent members reaching thousands of people, and the uninvited visitors were also numerous.



llama_print_timings:        load time =   776.86 ms
llama_print_timings:      sample time =   109.94 ms /   127 runs   (    0.87 ms per token,  1155.13 tokens per second)
llama_print_timings: prompt eval time =   776.81 ms /   137 tokens (    5.67 ms per token,   176.36 tokens per second)
llama_print_timings:        eval time = 12590.56 ms /   126 runs   (   99.93 ms per token,    10.01 tokens per second)
llama_print_timings:       total time = 13877.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

With these two successes as a basis, Pan Qian's social thought is increasingly influential. He argues that the technological revolution is a kind of malady affecting human society, with the rapid development of technology comparable to the proliferation of cancer cells. Ultimately, the result is depletion of organic matter and destruction of organs, leading to death of its host body. Pan Qian advocates eliminating "ruthless" technologies such as fossil fuels and nuclear energy, while retaining "gentle" technologies like solar and micro hydroelectric. He proposes that cities should be gradually disbanded and people evenly distributed among self-supporting small towns based on "gentle technology", which is the foundation of a new agricultural society.



llama_print_timings:        load time =   774.48 ms
llama_print_timings:      sample time =    75.09 ms /   169 runs   (    0.44 ms per token,  2250.66 tokens per second)
llama_print_timings: prompt eval time =   774.45 ms /   150 tokens (    5.16 ms per token,   193.69 tokens per second)
llama_print_timings:        eval time = 14890.21 ms /   168 runs   (   88.63 ms per token,    11.28 tokens per second)
llama_print_timings:       total time = 16010.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Is he there?" Pan Han pointed to the second floor of the villa.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

She didn't answer and remained silent in front of him.



llama_print_timings:        load time =   588.33 ms
llama_print_timings:      sample time =     6.55 ms /    14 runs   (    0.47 ms per token,  2138.71 tokens per second)
llama_print_timings: prompt eval time =   588.29 ms /    42 tokens (   14.01 ms per token,    71.39 tokens per second)
llama_print_timings:        eval time =  1155.65 ms /    13 runs   (   88.90 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  1773.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   595.96 ms
llama_print_timings:      sample time =    12.89 ms /    29 runs   (    0.44 ms per token,  2249.81 tokens per second)
llama_print_timings: prompt eval time =   595.92 ms /    49 tokens (   12.16 ms per token,    82.23 tokens per second)
llama_print_timings:        eval time =  2323.69 ms /    28 runs   (   82.99 ms per token,    12.05 tokens per second)
llama_print_timings:       total time =  2978.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$ShenYuFei still didn't answer him, but just told the taxi driver, "Let's go. There is nothing." Then she gestured to the taxi driver to start driving. The taxi engine turned on and from that moment on, Wang Miao didn't hear anything anymore. He looked back in the distance to see Shen YuFei holding a lighted cigarette in her hand but without letting Peng Han walk into the villa.



llama_print_timings:        load time =   663.21 ms
llama_print_timings:      sample time =    51.47 ms /   102 runs   (    0.50 ms per token,  1981.81 tokens per second)
llama_print_timings: prompt eval time =   663.17 ms /    90 tokens (    7.37 ms per token,   135.71 tokens per second)
llama_print_timings:        eval time =  9110.12 ms /   101 runs   (   90.20 ms per token,    11.09 tokens per second)
llama_print_timings:       total time = 10007.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

By the time he arrived home, it was already late at night. Wang Mu got off a taxi at the entrance of his community and saw a black Santana parked in front of him, window down with a puff of smoke pouring out, and on the driver's seat sat Dashisheng who had packed himself like a mound.



llama_print_timings:        load time =   668.88 ms
llama_print_timings:      sample time =    33.63 ms /    75 runs   (    0.45 ms per token,  2229.95 tokens per second)
llama_print_timings: prompt eval time =   668.83 ms /    84 tokens (    7.96 ms per token,   125.59 tokens per second)
llama_print_timings:        eval time =  6335.71 ms /    74 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  7155.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Wow, Professor Wang and Professor Wang! How have you been the past few days?”



llama_print_timings:        load time =   588.75 ms
llama_print_timings:      sample time =     9.11 ms /    20 runs   (    0.46 ms per token,  2195.87 tokens per second)
llama_print_timings: prompt eval time =   588.72 ms /    44 tokens (   13.38 ms per token,    74.74 tokens per second)
llama_print_timings:        eval time =  1616.89 ms /    19 runs   (   85.10 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2246.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You're stalking me? That's so boring!”



llama_print_timings:        load time =   625.88 ms
llama_print_timings:      sample time =     7.79 ms /    17 runs   (    0.46 ms per token,  2182.85 tokens per second)
llama_print_timings: prompt eval time =   625.84 ms /    38 tokens (   16.47 ms per token,    60.72 tokens per second)
llama_print_timings:        eval time =  1398.19 ms /    16 runs   (   87.39 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  2059.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Don't misunderstand, I could just go straight through without saying a word. But I'm polite enough to say hello," said Dashi with his characteristic foolish smile and a sneer on his face. "So, what's there? Something useful for me to use?"


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

I have said that I am not involved with you anymore, please stop following me!



llama_print_timings:        load time =   598.86 ms
llama_print_timings:      sample time =     7.92 ms /    18 runs   (    0.44 ms per token,  2273.30 tokens per second)
llama_print_timings: prompt eval time =   598.82 ms /    45 tokens (   13.31 ms per token,    75.15 tokens per second)
llama_print_timings:        eval time =  1443.84 ms /    17 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2077.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding-dong started his car, "I might as well make some extra money on night shifts. The game is delayed because of it."



llama_print_timings:        load time =   619.26 ms
llama_print_timings:      sample time =    14.97 ms /    32 runs   (    0.47 ms per token,  2137.61 tokens per second)
llama_print_timings: prompt eval time =   619.22 ms /    62 tokens (    9.99 ms per token,   100.13 tokens per second)
llama_print_timings:        eval time =  2652.22 ms /    31 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3338.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao walked into his home, his wife and children had already gone to bed. He heard her husband was rolling in bed restlessly and mumbled faintly, which may give her a terrible nightmare. Wang Miao took two pieces of lixilem, got into bed and eventually fell asleep after a long time struggle.



llama_print_timings:        load time =   684.33 ms
llama_print_timings:      sample time =    33.39 ms /    72 runs   (    0.46 ms per token,  2156.59 tokens per second)
llama_print_timings: prompt eval time =   684.28 ms /    95 tokens (    7.20 ms per token,   138.83 tokens per second)
llama_print_timings:        eval time =  6174.88 ms /    71 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  7009.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

His dream was chaotic, but one thing remained constant: a countdown. Indeed, the countdown was something that Wang Moxue had expected in advance. In his dream, he frenziedly beat an hanging countdown with maddening force, but all the beats were impotent and it hung steadfast in the sky, counting down without fail. It caused Wang Moxue to be very annoyed, finally waking up from the dream.



llama_print_timings:        load time =   739.72 ms
llama_print_timings:      sample time =    45.12 ms /   100 runs   (    0.45 ms per token,  2216.26 tokens per second)
llama_print_timings: prompt eval time =   739.67 ms /   134 tokens (    5.52 ms per token,   181.16 tokens per second)
llama_print_timings:        eval time =  8590.71 ms /    99 runs   (   86.77 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  9536.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He opened his eyes and saw a blurry ceiling. City lights shone through the curtains outside, projecting dim halo on it. But there was something that followed him from the dream to reality: ghost countdown. The countdown was still in front of his eyes, showing a small number in bright white light, emitting a burning sensation.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The prices are $1180, $1080, $980 and so on.



llama_print_timings:        load time =   634.66 ms
llama_print_timings:      sample time =    10.65 ms /    24 runs   (    0.44 ms per token,  2253.52 tokens per second)
llama_print_timings: prompt eval time =   634.62 ms /    73 tokens (    8.69 ms per token,   115.03 tokens per second)
llama_print_timings:        eval time =  1954.12 ms /    23 runs   (   84.96 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2637.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Moxuan turned his head and saw everything in the bedroom. He was sure he had woken up, but the countdown didn't disappear. He closed his eyes, but even with fully closed eyes, the countdown still shone like silver water on velvet. He opened his eyes again, and the countdown still took up the center of his visual field, no matter how much his sight moved.



llama_print_timings:        load time =   709.97 ms
llama_print_timings:      sample time =    49.99 ms /    89 runs   (    0.56 ms per token,  1780.36 tokens per second)
llama_print_timings: prompt eval time =   709.93 ms /   121 tokens (    5.87 ms per token,   170.44 tokens per second)
llama_print_timings:        eval time =  7942.56 ms /    88 runs   (   90.26 ms per token,    11.08 tokens per second)
llama_print_timings:       total time =  8878.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A sudden sense of fear made Wang Meng jump off the bed and rush to the window, pulling back the curtains and pushing open the window. The city still sleeps soundlessly, with the countdown appearing before it like a title card on a movie screen.



llama_print_timings:        load time =   671.39 ms
llama_print_timings:      sample time =    25.03 ms /    57 runs   (    0.44 ms per token,  2277.63 tokens per second)
llama_print_timings: prompt eval time =   671.34 ms /    97 tokens (    6.92 ms per token,   144.49 tokens per second)
llama_print_timings:        eval time =  4747.38 ms /    56 runs   (   84.77 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  5528.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At that moment, Wang Miao felt as if he were suffocating. He couldn't help but cry out in a low voice. When his wife questioned him anxiously upon waking up from her dreams, he tried to remain calm and comforted her by saying everything is alright again, then lying back down and trying hard to stay awake through the rest of the night under the glow of the clock-like specter.



llama_print_timings:        load time =   675.82 ms
llama_print_timings:      sample time =    42.15 ms /    93 runs   (    0.45 ms per token,  2206.46 tokens per second)
llama_print_timings: prompt eval time =   675.78 ms /    99 tokens (    6.83 ms per token,   146.50 tokens per second)
llama_print_timings:        eval time =  7900.50 ms /    92 runs   (   85.87 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  8765.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the morning, Wang Mo tried to make himself look normal in front of his family, but his wife still saw something odd and asked him about his eyes. Was it that he couldn't see clearly anymore?



llama_print_timings:        load time =   648.30 ms
llama_print_timings:      sample time =    19.29 ms /    44 runs   (    0.44 ms per token,  2280.97 tokens per second)
llama_print_timings: prompt eval time =   648.27 ms /    65 tokens (    9.97 ms per token,   100.27 tokens per second)
llama_print_timings:        eval time =  3638.23 ms /    43 runs   (   84.61 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  4373.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After breakfast, Wang Miao called in sick and drove to the hospital. All the way, the countdown clock that was unrelentingly presenting itself in his eyes slowly became a part of this real world, changing its brightness according to the background. Wang Miao even stared at the rising sun, trying to make it fade away for a moment, but it didn't work; instead, the ghostly figures on the countdown clock became black, even more terrifying and sinister.



llama_print_timings:        load time =   762.09 ms
llama_print_timings:      sample time =    56.52 ms /   106 runs   (    0.53 ms per token,  1875.48 tokens per second)
llama_print_timings: prompt eval time =   762.05 ms /   133 tokens (    5.73 ms per token,   174.53 tokens per second)
llama_print_timings:        eval time =  9366.76 ms /   105 runs   (   89.21 ms per token,    11.21 tokens per second)
llama_print_timings:       total time = 10381.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$It was difficult to make an appointment at the Sunming Hospital. Wang Mu directly asked his wife's classmate, a famous ophthalmologist. He didn't mention the condition and just let the doctor check his eyes. After careful examination of both his eyes, the doctor told him that there wasn't any abnormality and that his eyes were normal.



llama_print_timings:        load time =   664.13 ms
llama_print_timings:      sample time =    34.95 ms /    79 runs   (    0.44 ms per token,  2260.57 tokens per second)
llama_print_timings: prompt eval time =   664.09 ms /    87 tokens (    7.63 ms per token,   131.01 tokens per second)
llama_print_timings:        eval time =  6592.70 ms /    78 runs   (   84.52 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  7414.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“My eyes always see something, no matter where I look,” Wang Mo said. At the same time, that string of numbers was before his face.



llama_print_timings:        load time =   623.93 ms
llama_print_timings:      sample time =    14.01 ms /    32 runs   (    0.44 ms per token,  2283.43 tokens per second)
llama_print_timings: prompt eval time =   623.89 ms /    63 tokens (    9.90 ms per token,   100.98 tokens per second)
llama_print_timings:        eval time =  2600.70 ms /    31 runs   (   83.89 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  3286.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The price is $1,175 and the discount rate is 11.



llama_print_timings:        load time =   651.33 ms
llama_print_timings:      sample time =     9.51 ms /    20 runs   (    0.48 ms per token,  2102.83 tokens per second)
llama_print_timings: prompt eval time =   651.29 ms /    74 tokens (    8.80 ms per token,   113.62 tokens per second)
llama_print_timings:        eval time =  1687.41 ms /    19 runs   (   88.81 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  2381.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Floaters, ” the doctor said as he began writing out a prescription. “A common eye disease in our age group. Crystalline opacities. Not too easy to treat, but nothing to worry about. Take some iodide and vitamin D maybe. Hopefully it will absorb off, but not necessarily so. But, again, this should have no impact on your vision.”



llama_print_timings:        load time =   692.02 ms
llama_print_timings:      sample time =    38.01 ms /    85 runs   (    0.45 ms per token,  2236.25 tokens per second)
llama_print_timings: prompt eval time =   691.98 ms /   112 tokens (    6.18 ms per token,   161.86 tokens per second)
llama_print_timings:        eval time =  7478.38 ms /    84 runs   (   89.03 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  8341.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You're talking aboutFloaters, what do they look like?



llama_print_timings:        load time =   598.34 ms
llama_print_timings:      sample time =     6.89 ms /    15 runs   (    0.46 ms per token,  2178.33 tokens per second)
llama_print_timings: prompt eval time =   598.31 ms /    43 tokens (   13.91 ms per token,    71.87 tokens per second)
llama_print_timings:        eval time =  1150.36 ms /    14 runs   (   82.17 ms per token,    12.17 tokens per second)
llama_print_timings:       total time =  1779.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Unpredictable, changing constantly, sometimes dark and mysterious, sometimes resembling a tadpole.”



llama_print_timings:        load time =   600.17 ms
llama_print_timings:      sample time =    10.65 ms /    24 runs   (    0.44 ms per token,  2252.46 tokens per second)
llama_print_timings: prompt eval time =   600.12 ms /    50 tokens (   12.00 ms per token,    83.32 tokens per second)
llama_print_timings:        eval time =  1956.49 ms /    23 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2604.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If it's a string of numbers, what would you like to say?



llama_print_timings:        load time =   611.68 ms
llama_print_timings:      sample time =     8.79 ms /    17 runs   (    0.52 ms per token,  1932.92 tokens per second)
llama_print_timings: prompt eval time =   611.64 ms /    39 tokens (   15.68 ms per token,    63.76 tokens per second)
llama_print_timings:        eval time =  1477.44 ms /    16 runs   (   92.34 ms per token,    10.83 tokens per second)
llama_print_timings:       total time =  2127.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The doctor's pen ran out of ink. “Do you see a string of numbers?”



llama_print_timings:        load time =   594.36 ms
llama_print_timings:      sample time =     9.48 ms /    21 runs   (    0.45 ms per token,  2215.19 tokens per second)
llama_print_timings: prompt eval time =   594.32 ms /    46 tokens (   12.92 ms per token,    77.40 tokens per second)
llama_print_timings:        eval time =  1682.03 ms /    20 runs   (   84.10 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  2318.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, right in the middle of my field of vision.



llama_print_timings:        load time =   599.30 ms
llama_print_timings:      sample time =     5.72 ms /    13 runs   (    0.44 ms per token,  2272.33 tokens per second)
llama_print_timings: prompt eval time =   599.28 ms /    39 tokens (   15.37 ms per token,    65.08 tokens per second)
llama_print_timings:        eval time =  1025.18 ms /    12 runs   (   85.43 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1649.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The doctor pushes aside the paper and pen, looking at him attentively. “I could see that you were overworked as soon as I saw you. Li Yao mentioned to me at the previous reunion that you have been under a lot of pressure at work.”



llama_print_timings:        load time =   656.79 ms
llama_print_timings:      sample time =    26.11 ms /    58 runs   (    0.45 ms per token,  2221.71 tokens per second)
llama_print_timings: prompt eval time =   656.74 ms /    84 tokens (    7.82 ms per token,   127.90 tokens per second)
llama_print_timings:        eval time =  4911.76 ms /    57 runs   (   86.17 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  5686.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You mean, this is due to a mental factor?”



llama_print_timings:        load time =   588.20 ms
llama_print_timings:      sample time =     5.87 ms /    13 runs   (    0.45 ms per token,  2215.78 tokens per second)
llama_print_timings: prompt eval time =   588.15 ms /    40 tokens (   14.70 ms per token,    68.01 tokens per second)
llama_print_timings:        eval time =  1019.30 ms /    12 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1633.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The doctor nodded, “If it were a regular patient, I would recommend him to go to the psychiatry department. But there is no need for that; it’s just too tiresome. Let him rest for a few days and take some time off with his family, Li瑶, and Bean-bean, whoever they are. Don’t worry. He will recover soon.”



llama_print_timings:        load time =   660.44 ms
llama_print_timings:      sample time =    37.24 ms /    84 runs   (    0.44 ms per token,  2255.88 tokens per second)
llama_print_timings: prompt eval time =   660.39 ms /    93 tokens (    7.10 ms per token,   140.83 tokens per second)
llama_print_timings:        eval time =  7216.66 ms /    83 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  8045.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The time is 10:02, 10:01, 10:00, and 09:59.



llama_print_timings:        load time =   630.33 ms
llama_print_timings:      sample time =    15.08 ms /    33 runs   (    0.46 ms per token,  2188.76 tokens per second)
llama_print_timings: prompt eval time =   630.29 ms /    73 tokens (    8.63 ms per token,   115.82 tokens per second)
llama_print_timings:        eval time =  2781.60 ms /    32 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  3479.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"I'm telling you what I saw, a countdown! Second after second, going by exactly. Is this psychological? "



llama_print_timings:        load time =   637.89 ms
llama_print_timings:      sample time =    13.24 ms /    29 runs   (    0.46 ms per token,  2190.33 tokens per second)
llama_print_timings: prompt eval time =   637.85 ms /    56 tokens (   11.39 ms per token,    87.80 tokens per second)
llama_print_timings:        eval time =  2439.23 ms /    28 runs   (   87.12 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  3137.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The doctor smiled broadly, “Do you want to know how much of an impact the mental factors can have on vision? We had a girl, about fifteen or sixteen years old, who saw everything blurred and blinded in the classroom. All her physical tests showed that her eyes were perfectly fine. Then after psychological treatment for a whole month, her eyes suddenly recovered to normal vision.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Wang Mu knew that it was a waste of time here. He got up to leave, saying, "Okay, regardless of my eyesight, I have only one question for you: what is the force that can cause people to see something from afar?"



llama_print_timings:        load time =   636.17 ms
llama_print_timings:      sample time =    24.57 ms /    55 runs   (    0.45 ms per token,  2238.32 tokens per second)
llama_print_timings: prompt eval time =   636.13 ms /    74 tokens (    8.60 ms per token,   116.33 tokens per second)
llama_print_timings:        eval time =  4644.44 ms /    54 runs   (   86.01 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  5390.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The doctor thought for a moment and said, “Yes, I was part of the medical team on Shenzhou-19. One of the astronauts reported seeing an unseen flash during his spacewalks at the International Space Station, which happened when the sun's activity increased, high energy particles were hitting the retina, causing a glow.” But what you saw was neither flash nor countdown; there is no possibility it was related to the sun’s activity.



llama_print_timings:        load time =   803.62 ms
llama_print_timings:      sample time =    44.44 ms /   100 runs   (    0.44 ms per token,  2250.02 tokens per second)
llama_print_timings: prompt eval time =   803.57 ms /   172 tokens (    4.67 ms per token,   214.04 tokens per second)
llama_print_timings:        eval time =  8610.48 ms /    99 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  9615.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao walked into the main laboratory of the Nano Research Center, taking off his sunglasses before entering. Despite this, his colleagues showed worried expressions when they met him.



llama_print_timings:        load time =   612.94 ms
llama_print_timings:      sample time =    19.19 ms /    43 runs   (    0.45 ms per token,  2240.87 tokens per second)
llama_print_timings: prompt eval time =   612.90 ms /    66 tokens (    9.29 ms per token,   107.68 tokens per second)
llama_print_timings:        eval time =  3561.46 ms /    42 runs   (   84.80 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  4260.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the center of the laboratory, Han Mu saw that the reaction tank was still running. The main body of this giant equipment is a spherical ball made up of numerous pipelines. The code name for "Sword Fly" is a super strong nano material produced by molecular construction, where molecule probes are used to build materials molecules one by one like building with bricks, which requires many resources and such products can not be mass-produced as they are the world's most precious treasures.



llama_print_timings:        load time =   714.49 ms
llama_print_timings:      sample time =    51.36 ms /   110 runs   (    0.47 ms per token,  2141.83 tokens per second)
llama_print_timings: prompt eval time =   714.43 ms /   125 tokens (    5.72 ms per token,   174.96 tokens per second)
llama_print_timings:        eval time =  9655.64 ms /   109 runs   (   88.58 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 10602.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The lab is now trying to replace the molecular construction method with a catalytic reaction in order to build large amounts of molecules simultaneously. The experiments are conducted inside the reactor black box, and it can complete the assembly of a large number of components simultaneously, which may take centuries with traditional manual operation but can be completed very quickly by the reactor black box. At the same time, this is an integration of real reaction and digital simulation. When synthesis is nearing completion, computer models are built based on the stages of the reaction to digitally simulate the remaining reaction process, greatly improving experimental efficiency.



llama_print_timings:        load time =   779.40 ms
llama_print_timings:      sample time =    55.76 ms /   126 runs   (    0.44 ms per token,  2259.52 tokens per second)
llama_print_timings: prompt eval time =   779.37 ms /   158 tokens (    4.93 ms per token,   202.73 tokens per second)
llama_print_timings:        eval time = 10818.98 ms /   125 runs   (   86.55 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 11849.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The experiment director rushed over to Wang Miao after he saw her. He began to report the series of malfunctions that had recently occurred in the reaction box. This is something that Wang Miao has been experiencing since she started working there. Now, the reaction box has been running for more than a year, with many sensors losing sensitivity and causing errors, which are urgently needed for maintenance. However, as the project's lead scientist, Wang Miao insisted on finishing the third batch of synthesis combinations before stopping the machine, and engineers had to add more correctional units to the reaction box, which have now also need correctional units themselves. The entire project team is exhausted due to these additional tasks. But the director was very careful not to bring up the issue of halting the experiment or suspending the trials as he understood Wang Miao's difficulty in dealing with such a serious matter. He simply stated all the difficulties and implications quite 


llama_print_timings:        load time =   828.13 ms
llama_print_timings:      sample time =    92.49 ms /   205 runs   (    0.45 ms per token,  2216.48 tokens per second)
llama_print_timings: prompt eval time =   828.09 ms /   183 tokens (    4.53 ms per token,   220.99 tokens per second)
llama_print_timings:        eval time = 17952.00 ms /   204 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 19201.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I looked up at the black box and thought it resembled a womb, with engineers working around it tirelessly to maintain its normal operation. The scene in front of me was haunted by a countdown.



llama_print_timings:        load time =   669.86 ms
llama_print_timings:      sample time =    21.62 ms /    47 runs   (    0.46 ms per token,  2174.21 tokens per second)
llama_print_timings: prompt eval time =   669.83 ms /    72 tokens (    9.30 ms per token,   107.49 tokens per second)
llama_print_timings:        eval time =  3978.54 ms /    46 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  4747.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The numbers 1174, 21, 11, 74 and 21 are repeated in the following sequence: 



llama_print_timings:        load time =   655.17 ms
llama_print_timings:      sample time =    14.38 ms /    32 runs   (    0.45 ms per token,  2225.47 tokens per second)
llama_print_timings: prompt eval time =   655.12 ms /    71 tokens (    9.23 ms per token,   108.38 tokens per second)
llama_print_timings:        eval time =  2686.87 ms /    31 runs   (   86.67 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  3406.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufei said, "Stop and try."



llama_print_timings:        load time =   615.71 ms
llama_print_timings:      sample time =     5.71 ms /    13 runs   (    0.44 ms per token,  2276.31 tokens per second)
llama_print_timings: prompt eval time =   615.67 ms /    48 tokens (   12.83 ms per token,    77.96 tokens per second)
llama_print_timings:        eval time =  1022.74 ms /    12 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1663.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How long will it take to fully update the peripheral sensory system?” he asked.



llama_print_timings:        load time =   623.48 ms
llama_print_timings:      sample time =     9.20 ms /    21 runs   (    0.44 ms per token,  2282.86 tokens per second)
llama_print_timings: prompt eval time =   623.44 ms /    43 tokens (   14.50 ms per token,    68.97 tokens per second)
llama_print_timings:        eval time =  1734.63 ms /    20 runs   (   86.73 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2400.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The experiment director suddenly saw hope and added, "Quickly do it, only three days' time is needed, Wang general, I promise!"



llama_print_timings:        load time =   613.16 ms
llama_print_timings:      sample time =    13.61 ms /    31 runs   (    0.44 ms per token,  2277.07 tokens per second)
llama_print_timings: prompt eval time =   613.12 ms /    60 tokens (   10.22 ms per token,    97.86 tokens per second)
llama_print_timings:        eval time =  2689.78 ms /    30 runs   (   89.66 ms per token,    11.15 tokens per second)
llama_print_timings:       total time =  3364.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have not given up, the equipment does need to be repaired and therefore the test must be halted. It has nothing to do with me. Wang Meng in her mind said, then turned to the director, looking at him through the countdown display. “Please put an end to the test, shut it down for maintenance,” she said.



llama_print_timings:        load time =   666.89 ms
llama_print_timings:      sample time =    32.61 ms /    74 runs   (    0.44 ms per token,  2269.03 tokens per second)
llama_print_timings: prompt eval time =   666.85 ms /    87 tokens (    7.66 ms per token,   130.46 tokens per second)
llama_print_timings:        eval time =  6214.43 ms /    73 runs   (   85.13 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  7029.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Great, I will send you the updated plan soon. We can turn off the machine tomorrow afternoon.”



llama_print_timings:        load time =   623.59 ms
llama_print_timings:      sample time =    11.58 ms /    24 runs   (    0.48 ms per token,  2073.43 tokens per second)
llama_print_timings: prompt eval time =   623.55 ms /    52 tokens (   11.99 ms per token,    83.39 tokens per second)
llama_print_timings:        eval time =  2007.35 ms /    23 runs   (   87.28 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  2681.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Let's just stop now.”



llama_print_timings:        load time =   587.52 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2247.75 tokens per second)
llama_print_timings: prompt eval time =   587.48 ms /    36 tokens (   16.32 ms per token,    61.28 tokens per second)
llama_print_timings:        eval time =   695.18 ms /     8 runs   (   86.90 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1301.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The director looked at Wang Miao like he didn't recognize him, but then immediately restored to an excited state as if he were afraid of losing this chance. He picked up the telephone and gave an order for it to be switched off. The researchers and engineers in the project group were suddenly excited and began to flip the hundreds of complex switches in accordance with the procedure, and one after another the monitoring screens went dark. Finally, the main monitor screen showed that it was turned off.



llama_print_timings:        load time =   698.93 ms
llama_print_timings:      sample time =    46.05 ms /   105 runs   (    0.44 ms per token,  2279.88 tokens per second)
llama_print_timings: prompt eval time =   698.89 ms /   113 tokens (    6.18 ms per token,   161.69 tokens per second)
llama_print_timings:        eval time =  9000.96 ms /   104 runs   (   86.55 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  9909.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Almost at the same time, Wang Miao's countdown stopped moving and fixed to 174:10:07. After a few seconds, the digit shook a few times before disappearing entirely.



llama_print_timings:        load time =   648.99 ms
llama_print_timings:      sample time =    20.59 ms /    47 runs   (    0.44 ms per token,  2282.66 tokens per second)
llama_print_timings: prompt eval time =   648.96 ms /    76 tokens (    8.54 ms per token,   117.11 tokens per second)
llama_print_timings:        eval time =  3815.74 ms /    46 runs   (   82.95 ms per token,    12.06 tokens per second)
llama_print_timings:       total time =  4559.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When the reality without ghost countdown was before his eyes, Wang Miao let out a breath and sat down. Soon he realized that someone else was watching him nearby.



llama_print_timings:        load time =   648.13 ms
llama_print_timings:      sample time =    16.22 ms /    37 runs   (    0.44 ms per token,  2281.13 tokens per second)
llama_print_timings: prompt eval time =   648.09 ms /    75 tokens (    8.64 ms per token,   115.73 tokens per second)
llama_print_timings:        eval time =  3052.67 ms /    36 runs   (   84.80 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  3773.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He told the experiment director, "The system update is a job for the equipment department. You and your team can take a few days off now; everybody here has been working very hard recently."



llama_print_timings:        load time =   611.41 ms
llama_print_timings:      sample time =    17.65 ms /    40 runs   (    0.44 ms per token,  2265.90 tokens per second)
llama_print_timings: prompt eval time =   611.37 ms /    59 tokens (   10.36 ms per token,    96.50 tokens per second)
llama_print_timings:        eval time =  3319.30 ms /    39 runs   (   85.11 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  4010.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Mr. Wang, you are too tired. Here is Mr. Zhang, the chief engineer who is watching over you. You should also go home and rest.”



llama_print_timings:        load time =   608.81 ms
llama_print_timings:      sample time =    15.46 ms /    35 runs   (    0.44 ms per token,  2264.49 tokens per second)
llama_print_timings: prompt eval time =   608.77 ms /    51 tokens (   11.94 ms per token,    83.78 tokens per second)
llama_print_timings:        eval time =  2874.71 ms /    34 runs   (   84.55 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  3553.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yeah, I'm so tired.” Wang Moxi said helplessly. He picked up the phone and called Xin Yufen. She answered after only a short ringing sound.



llama_print_timings:        load time =   645.32 ms
llama_print_timings:      sample time =    22.05 ms /    44 runs   (    0.50 ms per token,  1995.56 tokens per second)
llama_print_timings: prompt eval time =   645.22 ms /    70 tokens (    9.22 ms per token,   108.49 tokens per second)
llama_print_timings:        eval time =  3787.38 ms /    43 runs   (   88.08 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  4532.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What are you behind?" Wang Mu asked, trying to keep his voice calm but couldn't.



llama_print_timings:        load time =   606.78 ms
llama_print_timings:      sample time =     9.72 ms /    22 runs   (    0.44 ms per token,  2262.21 tokens per second)
llama_print_timings: prompt eval time =   606.73 ms /    52 tokens (   11.67 ms per token,    85.71 tokens per second)
llama_print_timings:        eval time =  1792.88 ms /    21 runs   (   85.38 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2443.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm sorry, I can't speak Chinese.



llama_print_timings:        load time =  1062.57 ms
llama_print_timings:      sample time =     5.67 ms /    13 runs   (    0.44 ms per token,  2291.15 tokens per second)
llama_print_timings: prompt eval time =  1062.53 ms /    31 tokens (   34.28 ms per token,    29.18 tokens per second)
llama_print_timings:        eval time =  1026.13 ms /    12 runs   (   85.51 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  2114.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is the end of a countdown?



llama_print_timings:        load time =   594.62 ms
llama_print_timings:      sample time =     4.43 ms /    10 runs   (    0.44 ms per token,  2255.30 tokens per second)
llama_print_timings: prompt eval time =   594.59 ms /    38 tokens (   15.65 ms per token,    63.91 tokens per second)
llama_print_timings:        eval time =   751.01 ms /     9 runs   (   83.45 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  1365.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm sorry, I can't speak Chinese.



llama_print_timings:        load time =  1061.14 ms
llama_print_timings:      sample time =     5.83 ms /    13 runs   (    0.45 ms per token,  2227.93 tokens per second)
llama_print_timings: prompt eval time =  1061.12 ms /    31 tokens (   34.23 ms per token,    29.21 tokens per second)
llama_print_timings:        eval time =   996.58 ms /    12 runs   (   83.05 ms per token,    12.04 tokens per second)
llama_print_timings:       total time =  2083.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Are you listening?”



llama_print_timings:        load time =   622.83 ms
llama_print_timings:      sample time =     6.19 ms /     6 runs   (    1.03 ms per token,   969.62 tokens per second)
llama_print_timings: prompt eval time =   622.79 ms /    35 tokens (   17.79 ms per token,    56.20 tokens per second)
llama_print_timings:        eval time =   496.67 ms /     5 runs   (   99.33 ms per token,    10.07 tokens per second)
llama_print_timings:       total time =  1146.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In.



llama_print_timings:        load time =   577.39 ms
llama_print_timings:      sample time =     1.31 ms /     3 runs   (    0.44 ms per token,  2283.11 tokens per second)
llama_print_timings: prompt eval time =   577.35 ms /    33 tokens (   17.50 ms per token,    57.16 tokens per second)
llama_print_timings:        eval time =   176.58 ms /     2 runs   (   88.29 ms per token,    11.33 tokens per second)
llama_print_timings:       total time =   759.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

How can high-intensity nanomaterials be made? Is this really important enough to deserve so much attention?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

We should not judge what is worth paying attention to.



llama_print_timings:        load time =   611.67 ms
llama_print_timings:      sample time =    12.21 ms /    13 runs   (    0.94 ms per token,  1064.35 tokens per second)
llama_print_timings: prompt eval time =   611.63 ms /    40 tokens (   15.29 ms per token,    65.40 tokens per second)
llama_print_timings:        eval time =  1161.40 ms /    12 runs   (   96.78 ms per token,    10.33 tokens per second)
llama_print_timings:       total time =  1828.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Enough!” Wang Mo shouted, his fear and despair suddenly turned into fury. “You think a little magic trick can fool me? Stop technological progress? I admit I am not able to explain technologically at this moment, but that is because I haven’t gotten to the back of that disgusting magician yet!”



llama_print_timings:        load time =   693.18 ms
llama_print_timings:      sample time =    33.62 ms /    76 runs   (    0.44 ms per token,  2260.29 tokens per second)
llama_print_timings: prompt eval time =   693.14 ms /    94 tokens (    7.37 ms per token,   135.61 tokens per second)
llama_print_timings:        eval time =  6497.78 ms /    75 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  7344.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What you mean is to see the countdown on a larger scale?"



llama_print_timings:        load time =   580.28 ms
llama_print_timings:      sample time =     7.08 ms /    16 runs   (    0.44 ms per token,  2259.25 tokens per second)
llama_print_timings: prompt eval time =   580.24 ms /    45 tokens (   12.89 ms per token,    77.55 tokens per second)
llama_print_timings:        eval time =  1270.10 ms /    15 runs   (   84.67 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1882.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufen's words made Wang Mole a little surprised. He had not prepared for this question, so he forced himself to remain calm to avoid being trapped. “Drop that silly trick of yours,” he replied confidently. “You can play magic tricks on such a big scale as well! You can project holographic images to the sky just like in the last war when NATO did it; powerful lasers even have the power to cover the entire surface of the moon!” Wang Mole was surprised, but he managed to hold back his surprise and added, “Considering a certain possibility that I haven't thought of yet, you could still perform your magic even on such a huge scale as the sun.”



llama_print_timings:        load time =   929.84 ms
llama_print_timings:      sample time =    69.09 ms /   153 runs   (    0.45 ms per token,  2214.66 tokens per second)
llama_print_timings: prompt eval time =   929.79 ms /   235 tokens (    3.96 ms per token,   252.75 tokens per second)
llama_print_timings:        eval time = 13577.43 ms /   152 runs   (   89.33 ms per token,    11.20 tokens per second)
llama_print_timings:       total time = 14823.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Can you handle it? We are friends, I want to help you, don't follow Yang Dun's path.”



llama_print_timings:        load time =   606.68 ms
llama_print_timings:      sample time =    12.11 ms /    27 runs   (    0.45 ms per token,  2229.75 tokens per second)
llama_print_timings: prompt eval time =   606.64 ms /    52 tokens (   11.67 ms per token,    85.72 tokens per second)
llama_print_timings:        eval time =  2240.21 ms /    26 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  2901.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When he heard the name, Wang Moxiong shivered with fear. But then his rage kicked in and he ignored it entirely: "Can I accept this challenge?"



llama_print_timings:        load time =   632.80 ms
llama_print_timings:      sample time =    17.73 ms /    39 runs   (    0.45 ms per token,  2200.03 tokens per second)
llama_print_timings: prompt eval time =   632.76 ms /    63 tokens (   10.04 ms per token,    99.56 tokens per second)
llama_print_timings:        eval time =  3238.70 ms /    38 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  3950.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes.



llama_print_timings:        load time =   604.39 ms
llama_print_timings:      sample time =     1.36 ms /     3 runs   (    0.45 ms per token,  2202.64 tokens per second)
llama_print_timings: prompt eval time =   604.35 ms /    33 tokens (   18.31 ms per token,    54.60 tokens per second)
llama_print_timings:        eval time =   175.54 ms /     2 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =   785.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You want it this way?’ Wang Mu's voice weakened.



llama_print_timings:        load time =   588.82 ms
llama_print_timings:      sample time =     7.15 ms /    16 runs   (    0.45 ms per token,  2236.82 tokens per second)
llama_print_timings: prompt eval time =   588.78 ms /    43 tokens (   13.69 ms per token,    73.03 tokens per second)
llama_print_timings:        eval time =  1308.60 ms /    15 runs   (   87.24 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1930.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is there an internet-enabled computer near you? Great, go to this URL: https://modernout.com/pages/morse-code-chart. Print it out and keep it with you.”



llama_print_timings:        load time =   638.41 ms
llama_print_timings:      sample time =    19.90 ms /    45 runs   (    0.44 ms per token,  2260.85 tokens per second)
llama_print_timings: prompt eval time =   638.26 ms /    73 tokens (    8.74 ms per token,   114.37 tokens per second)
llama_print_timings:        eval time =  3857.25 ms /    44 runs   (   87.66 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  4585.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao saw only a Morse code translator on the webpage.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

I don't understand, this is...



llama_print_timings:        load time =   578.30 ms
llama_print_timings:      sample time =     4.56 ms /    10 runs   (    0.46 ms per token,  2194.91 tokens per second)
llama_print_timings: prompt eval time =   578.26 ms /    37 tokens (   15.63 ms per token,    63.99 tokens per second)
llama_print_timings:        eval time =   775.87 ms /     9 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1374.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the next two days, try to find a place where you can observe cosmic background radiation. Please refer to my subsequent email for details.



llama_print_timings:        load time =   623.17 ms
llama_print_timings:      sample time =    13.52 ms /    30 runs   (    0.45 ms per token,  2219.26 tokens per second)
llama_print_timings: prompt eval time =   623.13 ms /    57 tokens (   10.93 ms per token,    91.47 tokens per second)
llama_print_timings:        eval time =  2568.95 ms /    29 runs   (   88.58 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  3253.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is what I do.



llama_print_timings:        load time =   628.29 ms
llama_print_timings:      sample time =     3.20 ms /     7 runs   (    0.46 ms per token,  2189.55 tokens per second)
llama_print_timings: prompt eval time =   628.26 ms /    37 tokens (   16.98 ms per token,    58.89 tokens per second)
llama_print_timings:        eval time =   524.89 ms /     6 runs   (   87.48 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1166.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I know the nanotechnology research project has been stopped, are you planning to restart it?



llama_print_timings:        load time =   607.50 ms
llama_print_timings:      sample time =     9.30 ms /    21 runs   (    0.44 ms per token,  2257.34 tokens per second)
llama_print_timings: prompt eval time =   607.46 ms /    46 tokens (   13.21 ms per token,    75.73 tokens per second)
llama_print_timings:        eval time =  1793.59 ms /    20 runs   (   89.68 ms per token,    11.15 tokens per second)
llama_print_timings:       total time =  2443.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, three days later.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The countdown will continue.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

What scale am I looking at it on?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After a long silence, the woman who represented an extraordinary power beyond human understanding sealed off Wang Meng's all possibilities.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

In three days, namely the fourteenth day, from one o'clock in the morning to five o'clock in the evening, the entire universe will be shining for you.
translated 170.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

He picked up the phone and realized it was already past one o'clock in the morning.



llama_print_timings:        load time =   611.04 ms
llama_print_timings:      sample time =     9.22 ms /    20 runs   (    0.46 ms per token,  2170.14 tokens per second)
llama_print_timings: prompt eval time =   611.01 ms /    56 tokens (   10.91 ms per token,    91.65 tokens per second)
llama_print_timings:        eval time =  1699.88 ms /    19 runs   (   89.47 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  2351.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I'm Wang Miao, really sorry to bother you at this time.”



llama_print_timings:        load time =   593.16 ms
llama_print_timings:      sample time =     8.21 ms /    18 runs   (    0.46 ms per token,  2191.38 tokens per second)
llama_print_timings: prompt eval time =   593.12 ms /    44 tokens (   13.48 ms per token,    74.18 tokens per second)
llama_print_timings:        eval time =  1439.69 ms /    17 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2068.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No problem, I'm having insomnia.”



llama_print_timings:        load time =   591.54 ms
llama_print_timings:      sample time =     5.83 ms /    13 runs   (    0.45 ms per token,  2228.32 tokens per second)
llama_print_timings: prompt eval time =   591.50 ms /    37 tokens (   15.99 ms per token,    62.55 tokens per second)
llama_print_timings:        eval time =  1093.64 ms /    12 runs   (   91.14 ms per token,    10.97 tokens per second)
llama_print_timings:       total time =  1712.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I… I have something to ask you. Do you know any organization in China that observes the cosmic background radiation?” Wang Miao felt like he wanted to talk to someone, but he soon thought it was best not to make this matter known to more people.



llama_print_timings:        load time =   660.79 ms
llama_print_timings:      sample time =    25.07 ms /    56 runs   (    0.45 ms per token,  2234.19 tokens per second)
llama_print_timings: prompt eval time =   660.75 ms /    87 tokens (    7.59 ms per token,   131.67 tokens per second)
llama_print_timings:        eval time =  4804.72 ms /    55 runs   (   87.36 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  5575.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"The cosmic background radiation? Why do you have such a fascination with it? It seems like you've really got something on your mind... Have you gone to visit Yang Dun mother yet?”



llama_print_timings:        load time =   620.06 ms
llama_print_timings:      sample time =    18.77 ms /    42 runs   (    0.45 ms per token,  2237.73 tokens per second)
llama_print_timings: prompt eval time =   620.01 ms /    59 tokens (   10.51 ms per token,    95.16 tokens per second)
llama_print_timings:        eval time =  3669.86 ms /    41 runs   (   89.51 ms per token,    11.17 tokens per second)
llama_print_timings:       total time =  4372.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm sorry, I forgot.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

It's okay, a lot of people in the scientific community are... like that. But you'd better go see her. She's older now and doesn't want to hire a caregiver; if she needs help with anything it might be troublesome for you to help her. By the way, the cosmic background radiation is something you could ask Yang Dong's mother about; she retired before and specialized in astronomy and knows these institutes quite well.



llama_print_timings:        load time =   814.05 ms
llama_print_timings:      sample time =    56.92 ms /   102 runs   (    0.56 ms per token,  1792.11 tokens per second)
llama_print_timings: prompt eval time =   814.00 ms /   116 tokens (    7.02 ms per token,   142.51 tokens per second)
llama_print_timings:        eval time =  9263.09 ms /   101 runs   (   91.71 ms per token,    10.90 tokens per second)
llama_print_timings:       total time = 10325.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Okay, I'll go right after work.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Thank you. I really can't face anything related to Yang Dong any more.



llama_print_timings:        load time =   602.43 ms
llama_print_timings:      sample time =     9.78 ms /    19 runs   (    0.51 ms per token,  1943.73 tokens per second)
llama_print_timings: prompt eval time =   602.40 ms /    47 tokens (   12.82 ms per token,    78.02 tokens per second)
llama_print_timings:        eval time =  1606.97 ms /    18 runs   (   89.28 ms per token,    11.20 tokens per second)
llama_print_timings:       total time =  2251.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the phone call, Wang Moxi sat down at her computer and started printing the Morse code chart she had displayed on the web page. At this time, he calmed down, took his thoughts off the countdown timer, thought about the topic of ‘scientific boundary’ and Xin Yufei's game. About Xin Yufei, he was certain of only one thing, which was that she didn't like playing games, as he had envisioned her. This woman who spoke like an electric telegraph did not have a muzzle on, but rather, from outside to inside, she was cold. Wang Moxi always subconsciously associated her with the long-gone DOS operating system, which presented a black screen filled with a simple and indifferent “C://” prompt in neon lights that would output whatever you typed in, with no extra words or variations. Now he knew that after the “//” prompt was Xin Yufei's abyss.



llama_print_timings:        load time =   904.24 ms
llama_print_timings:      sample time =   109.36 ms /   213 runs   (    0.51 ms per token,  1947.71 tokens per second)
llama_print_timings: prompt eval time =   904.19 ms /   223 tokens (    4.05 ms per token,   246.63 tokens per second)
llama_print_timings:        eval time = 19511.99 ms /   212 runs   (   92.04 ms per token,    10.87 tokens per second)
llama_print_timings:       total time = 20912.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She's really into playing games and wearing VR gear? She doesn't have any kids, so those VR gears can only be used by her to buy back for personal use, which is somewhat surprising.



llama_print_timings:        load time =   647.68 ms
llama_print_timings:      sample time =    22.16 ms /    49 runs   (    0.45 ms per token,  2211.59 tokens per second)
llama_print_timings: prompt eval time =   647.64 ms /    66 tokens (    9.81 ms per token,   101.91 tokens per second)
llama_print_timings:        eval time =  4176.67 ms /    48 runs   (   87.01 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  4923.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Miao went to the center experiment hall and found that the experimental hall was empty. He then went to the staff workshop to borrow VR devices and key cards, but there were no staffs at all in the staff workshop.



llama_print_timings:        load time =   768.88 ms
llama_print_timings:      sample time =    23.19 ms /    52 runs   (    0.45 ms per token,  2242.73 tokens per second)
llama_print_timings: prompt eval time =   768.84 ms /   145 tokens (    5.30 ms per token,   188.60 tokens per second)
llama_print_timings:        eval time =  4350.75 ms /    51 runs   (   85.31 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5222.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When the game starts, Wang Moxi finds himself in a barren land during dawn. The desert is dark brown and details are not clearly visible. In the distance, a small part of white light can be seen on the horizon. The rest of the sky is twinkling with stars. Suddenly, a loud bang, two peaks emitting red light crash down onto the distant ground. The entire desert is engulfed in the red light after the dust settles and Wang Moxi reads the big letters: "Thrice".



llama_print_timings:        load time =   744.46 ms
llama_print_timings:      sample time =    78.90 ms /   115 runs   (    0.69 ms per token,  1457.58 tokens per second)
llama_print_timings: prompt eval time =   744.41 ms /   139 tokens (    5.36 ms per token,   186.72 tokens per second)
llama_print_timings:        eval time = 10721.34 ms /   114 runs   (   94.05 ms per token,    10.63 tokens per second)
llama_print_timings:       total time = 11823.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Then a registration interface appeared, and Wang Meng registered with the ID "Hai Ren" and successfully logged in.



llama_print_timings:        load time =   625.09 ms
llama_print_timings:      sample time =    11.82 ms /    25 runs   (    0.47 ms per token,  2115.42 tokens per second)
llama_print_timings: prompt eval time =   625.06 ms /    52 tokens (   12.02 ms per token,    83.19 tokens per second)
llama_print_timings:        eval time =  2178.67 ms /    24 runs   (   90.78 ms per token,    11.02 tokens per second)
llama_print_timings:       total time =  2858.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The wasteland still existed, but the compressor in V's power suit started with a hissing sound. Wang Moxi felt an overwhelming coldness as he chased after two moving silhouettes against the background of early dawn… Wang Moxi caught up with them, and they were both male figures wearing ragged robes, covered in dirty animal hide, each carrying a broad, short bronze sword. One of them had a wooden crate about half his height on his back. The man looked at Wang Sen and the dirtiness of his face matched that of the animal fur, but his eyes shone with an uncanny light. “Cold, ” he said.



llama_print_timings:        load time =   835.05 ms
llama_print_timings:      sample time =    67.85 ms /   149 runs   (    0.46 ms per token,  2195.92 tokens per second)
llama_print_timings: prompt eval time =   835.00 ms /   175 tokens (    4.77 ms per token,   209.58 tokens per second)
llama_print_timings:        eval time = 13344.95 ms /   148 runs   (   90.17 ms per token,    11.09 tokens per second)
llama_print_timings:       total time = 14487.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes. It's really cold.” Wang Sen exclaimed in agreement.



llama_print_timings:        load time =   591.56 ms
llama_print_timings:      sample time =     7.08 ms /    16 runs   (    0.44 ms per token,  2260.84 tokens per second)
llama_print_timings: prompt eval time =   591.51 ms /    41 tokens (   14.43 ms per token,    69.31 tokens per second)
llama_print_timings:        eval time =  1287.05 ms /    15 runs   (   85.80 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1910.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is the Warring States period, and I am King Chou.



llama_print_timings:        load time =   595.05 ms
llama_print_timings:      sample time =     7.30 ms /    16 runs   (    0.46 ms per token,  2192.68 tokens per second)
llama_print_timings: prompt eval time =   595.02 ms /    44 tokens (   13.52 ms per token,    73.95 tokens per second)
llama_print_timings:        eval time =  1274.42 ms /    15 runs   (   84.96 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1902.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Is Zhou Wenwang a person from the Warring States Period?" Wang Miao asked.



llama_print_timings:        load time =   604.73 ms
llama_print_timings:      sample time =    11.35 ms /    23 runs   (    0.49 ms per token,  2026.79 tokens per second)
llama_print_timings: prompt eval time =   604.69 ms /    46 tokens (   13.15 ms per token,    76.07 tokens per second)
llama_print_timings:        eval time =  1931.99 ms /    22 runs   (   87.82 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  2589.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“He's still alive,” said the other man without carrying his boxes, “and King Xu is a genius as well. I am one of King Wu's disciples, so my ID is ‘King Wu Disciple’.”



llama_print_timings:        load time =   642.63 ms
llama_print_timings:      sample time =    24.92 ms /    53 runs   (    0.47 ms per token,  2126.55 tokens per second)
llama_print_timings: prompt eval time =   642.59 ms /    75 tokens (    8.57 ms per token,   116.72 tokens per second)
llama_print_timings:        eval time =  4543.27 ms /    52 runs   (   87.37 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  5297.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“My ID is ‘Sea Man’, ” Wang Miao said, “What are you carrying?”



llama_print_timings:        load time =   767.33 ms
llama_print_timings:      sample time =    19.59 ms /    23 runs   (    0.85 ms per token,  1174.01 tokens per second)
llama_print_timings: prompt eval time =   767.27 ms /    52 tokens (   14.76 ms per token,    67.77 tokens per second)
llama_print_timings:        eval time =  2040.24 ms /    22 runs   (   92.74 ms per token,    10.78 tokens per second)
llama_print_timings:       total time =  2893.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

周文王 opened the long rectangular box and revealed a five-tiered square grid within, with sand flowing from one layer to another.



llama_print_timings:        load time =   834.47 ms
llama_print_timings:      sample time =    16.06 ms /    32 runs   (    0.50 ms per token,  1992.65 tokens per second)
llama_print_timings: prompt eval time =   834.37 ms /   101 tokens (    8.26 ms per token,   121.05 tokens per second)
llama_print_timings:        eval time =  2738.95 ms /    31 runs   (   88.35 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  3644.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhou Wenwang explained, “The hourglass, complete once every eight hours, needs to be inverted three times a day. But I often forget and have to rely on my followers for reminding me.”



llama_print_timings:        load time =   617.75 ms
llama_print_timings:      sample time =    22.16 ms /    47 runs   (    0.47 ms per token,  2120.94 tokens per second)
llama_print_timings: prompt eval time =   617.69 ms /    62 tokens (    9.96 ms per token,   100.37 tokens per second)
llama_print_timings:        eval time =  3881.41 ms /    46 runs   (   84.38 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  4597.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You seem to be on a long trip. Is it necessary to carry this heavy stopwatch?



llama_print_timings:        load time =   601.69 ms
llama_print_timings:      sample time =     8.94 ms /    20 runs   (    0.45 ms per token,  2236.39 tokens per second)
llama_print_timings: prompt eval time =   601.65 ms /    48 tokens (   12.53 ms per token,    79.78 tokens per second)
llama_print_timings:        eval time =  1688.95 ms /    19 runs   (   88.89 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  2330.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

How do you measure time?



llama_print_timings:        load time =   600.38 ms
llama_print_timings:      sample time =     3.15 ms /     7 runs   (    0.45 ms per token,  2221.52 tokens per second)
llama_print_timings: prompt eval time =   600.35 ms /    37 tokens (   16.23 ms per token,    61.63 tokens per second)
llama_print_timings:        eval time =   527.98 ms /     6 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  1142.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's quite convenient to use a small sundial, or simply look at the sun to get an idea of the time.



llama_print_timings:        load time =   609.03 ms
llama_print_timings:      sample time =    12.64 ms /    28 runs   (    0.45 ms per token,  2215.54 tokens per second)
llama_print_timings: prompt eval time =   608.99 ms /    50 tokens (   12.18 ms per token,    82.10 tokens per second)
llama_print_timings:        eval time =  2373.94 ms /    27 runs   (   87.92 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  3039.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhou Wenwang and his followers looked at each other, then stared at Wang Mankou as if he was an idiot. “Sun? How can we know the time by looking at the sun? This is New Dynasty.”



llama_print_timings:        load time =   644.64 ms
llama_print_timings:      sample time =    24.44 ms /    55 runs   (    0.44 ms per token,  2250.41 tokens per second)
llama_print_timings: prompt eval time =   644.60 ms /    70 tokens (    9.21 ms per token,   108.59 tokens per second)
llama_print_timings:        eval time =  4756.25 ms /    54 runs   (   88.08 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  5509.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, you mean the term "cold like a fish". That sounds really cold!



llama_print_timings:        load time =   642.57 ms
llama_print_timings:      sample time =     8.35 ms /    19 runs   (    0.44 ms per token,  2275.99 tokens per second)
llama_print_timings: prompt eval time =   642.53 ms /    58 tokens (   11.08 ms per token,    90.27 tokens per second)
llama_print_timings:        eval time =  1515.89 ms /    18 runs   (   84.22 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  2195.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mao also felt cold, but he couldn't take off the sensor suit without permission. In normal cases, that would be considered cheating and his ID would be terminated by the game. He said: “It will be warmer once the sun comes out.”



llama_print_timings:        load time =   625.53 ms
llama_print_timings:      sample time =    25.79 ms /    55 runs   (    0.47 ms per token,  2132.53 tokens per second)
llama_print_timings: prompt eval time =   625.49 ms /    67 tokens (    9.34 ms per token,   107.12 tokens per second)
llama_print_timings:        eval time =  4843.19 ms /    54 runs   (   89.69 ms per token,    11.15 tokens per second)
llama_print_timings:       total time =  5587.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Are you pretending to be a great prophet? Even Zhou Wen Wang is not a prophet!" Followers mocked Wang Mao with disdain.



llama_print_timings:        load time =   621.49 ms
llama_print_timings:      sample time =    16.57 ms /    37 runs   (    0.45 ms per token,  2233.09 tokens per second)
llama_print_timings: prompt eval time =   621.46 ms /    61 tokens (   10.19 ms per token,    98.16 tokens per second)
llama_print_timings:        eval time =  3258.90 ms /    36 runs   (   90.52 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =  3955.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This requires a seer? Who doesn't see that the sun will rise in two hours?" Wang Mo pointed to the sky.



llama_print_timings:        load time =   621.78 ms
llama_print_timings:      sample time =    13.09 ms /    29 runs   (    0.45 ms per token,  2216.11 tokens per second)
llama_print_timings: prompt eval time =   621.74 ms /    59 tokens (   10.54 ms per token,    94.90 tokens per second)
llama_print_timings:        eval time =  2421.04 ms /    28 runs   (   86.47 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  3101.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This is the New Age!" said the follower.



llama_print_timings:        load time =   584.36 ms
llama_print_timings:      sample time =     5.71 ms /    13 runs   (    0.44 ms per token,  2275.51 tokens per second)
llama_print_timings: prompt eval time =   584.32 ms /    40 tokens (   14.61 ms per token,    68.46 tokens per second)
llama_print_timings:        eval time =  1062.69 ms /    12 runs   (   88.56 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  1673.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is the Epoch?



llama_print_timings:        load time =   581.13 ms
llama_print_timings:      sample time =     3.54 ms /     8 runs   (    0.44 ms per token,  2256.70 tokens per second)
llama_print_timings: prompt eval time =   581.09 ms /    36 tokens (   16.14 ms per token,    61.95 tokens per second)
llama_print_timings:        eval time =   629.02 ms /     7 runs   (   89.86 ms per token,    11.13 tokens per second)
llama_print_timings:       total time =  1226.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Besides the Heng Dynasty, all dynasties are chaotic," said Zhou Wenwang. This was like answering a question from an ignorant child.



llama_print_timings:        load time =   600.78 ms
llama_print_timings:      sample time =    17.81 ms /    39 runs   (    0.46 ms per token,  2189.41 tokens per second)
llama_print_timings: prompt eval time =   600.74 ms /    55 tokens (   10.92 ms per token,    91.55 tokens per second)
llama_print_timings:        eval time =  3237.31 ms /    38 runs   (   85.19 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  3917.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's just as I expected. The morning light in the sky fades away quickly, and the night sky is covered by the darkness again. Stars in the celestial spheres twinkle brightly.



llama_print_timings:        load time =   623.17 ms
llama_print_timings:      sample time =    20.79 ms /    45 runs   (    0.46 ms per token,  2164.71 tokens per second)
llama_print_timings: prompt eval time =   623.13 ms /    57 tokens (   10.93 ms per token,    91.47 tokens per second)
llama_print_timings:        eval time =  3802.07 ms /    44 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  4517.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Now it's not dawn but dusk?" asked Wang Miao.



llama_print_timings:        load time =   583.04 ms
llama_print_timings:      sample time =     7.54 ms /    17 runs   (    0.44 ms per token,  2255.24 tokens per second)
llama_print_timings: prompt eval time =   583.00 ms /    43 tokens (   13.56 ms per token,    73.76 tokens per second)
llama_print_timings:        eval time =  1439.38 ms /    16 runs   (   89.96 ms per token,    11.12 tokens per second)
llama_print_timings:       total time =  2056.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's morning, but the sun may not rise; this is a strange era.”



llama_print_timings:        load time =   595.03 ms
llama_print_timings:      sample time =     8.85 ms /    20 runs   (    0.44 ms per token,  2258.87 tokens per second)
llama_print_timings: prompt eval time =   595.00 ms /    46 tokens (   12.93 ms per token,    77.31 tokens per second)
llama_print_timings:        eval time =  1623.50 ms /    19 runs   (   85.45 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  2257.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's cold and Wang Mu is very unhappy. "Look at that, the sun won't rise for a long time," he said, pointing to the dim horizon.



llama_print_timings:        load time =   616.79 ms
llama_print_timings:      sample time =    17.21 ms /    38 runs   (    0.45 ms per token,  2208.28 tokens per second)
llama_print_timings: prompt eval time =   616.76 ms /    64 tokens (    9.64 ms per token,   103.77 tokens per second)
llama_print_timings:        eval time =  3163.10 ms /    37 runs   (   85.49 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  3857.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How could you have such an idea?” the follower said, turning to Zhou Wenwang. “Ji Chang, please give me some dried fish.”



llama_print_timings:        load time =   623.02 ms
llama_print_timings:      sample time =    16.81 ms /    38 runs   (    0.44 ms per token,  2260.96 tokens per second)
llama_print_timings: prompt eval time =   622.98 ms /    67 tokens (    9.30 ms per token,   107.55 tokens per second)
llama_print_timings:        eval time =  3200.96 ms /    37 runs   (   86.51 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3899.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, ” said Zhou Wenwang firmly. “I'm just barely managing to get by myself and make sure I can reach Changle instead of you.”



llama_print_timings:        load time =   604.32 ms
llama_print_timings:      sample time =    17.89 ms /    40 runs   (    0.45 ms per token,  2236.14 tokens per second)
llama_print_timings: prompt eval time =   604.27 ms /    56 tokens (   10.79 ms per token,    92.67 tokens per second)
llama_print_timings:        eval time =  3447.15 ms /    39 runs   (   88.39 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  4131.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he spoke, Wang Moxi noticed that the horizon in another direction was also beginning to glow. He could not tell if it was south or north but he knew it must have been a different direction than where the sun had appeared last time.



llama_print_timings:        load time =   618.11 ms
llama_print_timings:      sample time =    23.17 ms /    52 runs   (    0.45 ms per token,  2244.09 tokens per second)
llama_print_timings: prompt eval time =   618.08 ms /    61 tokens (   10.13 ms per token,    98.69 tokens per second)
llama_print_timings:        eval time =  4487.95 ms /    51 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  5211.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This dawn comes soon, and in a few moments the world's sun rises. It is a small blue sun that resembles a bright moon but still gives him some warmth. He looks clearly at all the details of the earth. But this daylight is short-lived and the sun has left behind a shallow arc on the horizon, leaving cold and darkness to everything once more.



llama_print_timings:        load time =   703.59 ms
llama_print_timings:      sample time =    37.37 ms /    83 runs   (    0.45 ms per token,  2220.80 tokens per second)
llama_print_timings: prompt eval time =   703.55 ms /   110 tokens (    6.40 ms per token,   156.35 tokens per second)
llama_print_timings:        eval time =  7104.44 ms /    82 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  7977.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They stopped in front of a dead tree. Zhou Wenwang and his followers wielded bronze swords to chop wood while Wang Meng collected the small pieces of wood into one place. The followers lit matches, crackling, crackling, and finally made a pile of fire. Wang Meng's sense suit's front part was heated but his back still cold.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Followers say, “When you burn dry wood, the fire will grow.”



llama_print_timings:        load time =   593.85 ms
llama_print_timings:      sample time =     8.03 ms /    18 runs   (    0.45 ms per token,  2241.87 tokens per second)
llama_print_timings: prompt eval time =   593.81 ms /    46 tokens (   12.91 ms per token,    77.47 tokens per second)
llama_print_timings:        eval time =  1475.31 ms /    17 runs   (   86.78 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  2104.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shut up! That's the job of King Xu Shang.



llama_print_timings:        load time =   583.40 ms
llama_print_timings:      sample time =     7.73 ms /    16 runs   (    0.48 ms per token,  2070.13 tokens per second)
llama_print_timings: prompt eval time =   583.36 ms /    40 tokens (   14.58 ms per token,    68.57 tokens per second)
llama_print_timings:        eval time =  1281.14 ms /    15 runs   (   85.41 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1898.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Anyway, those scattered all over the road have become that way. If your theories can work, don't say to just burn them or eat them because then they're not worth a life.



llama_print_timings:        load time =   640.49 ms
llama_print_timings:      sample time =    18.89 ms /    42 runs   (    0.45 ms per token,  2223.75 tokens per second)
llama_print_timings: prompt eval time =   640.44 ms /    74 tokens (    8.65 ms per token,   115.55 tokens per second)
llama_print_timings:        eval time =  3514.98 ms /    41 runs   (   85.73 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4240.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After the bonfire had been extinguished, the three continued on their journey. Due to their lack of communication, the game time passed faster. Within six turns of the sandglass on his back, the king's days off was up. The sun had not risen at all in that time and there were no shadows cast at all.



llama_print_timings:        load time =   662.14 ms
llama_print_timings:      sample time =    33.31 ms /    73 runs   (    0.46 ms per token,  2191.67 tokens per second)
llama_print_timings: prompt eval time =   662.10 ms /    92 tokens (    7.20 ms per token,   138.95 tokens per second)
llama_print_timings:        eval time =  6442.88 ms /    72 runs   (   89.48 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  7256.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It seems like the sun won't come out today," said Wang Miao. At the same time, she looked at her HP and saw that it was quickly decreasing due to the cold.



llama_print_timings:        load time =   607.20 ms
llama_print_timings:      sample time =    18.95 ms /    43 runs   (    0.44 ms per token,  2268.89 tokens per second)
llama_print_timings: prompt eval time =   607.17 ms /    60 tokens (   10.12 ms per token,    98.82 tokens per second)
llama_print_timings:        eval time =  3626.36 ms /    42 runs   (   86.34 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  4319.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You're just another prophet, " the follower said. "It is an era of chaos! "



llama_print_timings:        load time =   617.20 ms
llama_print_timings:      sample time =    12.30 ms /    26 runs   (    0.47 ms per token,  2112.96 tokens per second)
llama_print_timings: prompt eval time =   617.16 ms /    63 tokens (    9.80 ms per token,   102.08 tokens per second)
llama_print_timings:        eval time =  2195.09 ms /    25 runs   (   87.80 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  2869.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When the sun rose, it was so big that it filled at least one fifth of the sky. The warmth felt so good on his skin, making Xia Wang very happy but he looked around and saw that all his followers had a worried expression, as if a devil had arrived.



llama_print_timings:        load time =   706.36 ms
llama_print_timings:      sample time =    26.92 ms /    61 runs   (    0.44 ms per token,  2266.14 tokens per second)
llama_print_timings: prompt eval time =   706.32 ms /   123 tokens (    5.74 ms per token,   174.14 tokens per second)
llama_print_timings:        eval time =  5155.77 ms /    60 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  5983.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Quickly, find some shade!” Followers shouted and Wang Miao followed them to a low rock behind which they sat down. The shadow gradually shortened and the ground around was so bright it was blinding, the frozen soil rapidly melted under their feet turning into mud, sweat poured out quickly… when the big sun rose above their heads, three people wrapped themselves in animal skins to block out the light shining through all gaps and holes. They moved to another side of the rock and hid within a newly-emerged shadow…



llama_print_timings:        load time =   790.34 ms
llama_print_timings:      sample time =    52.77 ms /   119 runs   (    0.44 ms per token,  2255.07 tokens per second)
llama_print_timings: prompt eval time =   790.30 ms /   165 tokens (    4.79 ms per token,   208.78 tokens per second)
llama_print_timings:        eval time = 10116.55 ms /   118 runs   (   85.73 ms per token,    11.66 tokens per second)
llama_print_timings:       total time = 11145.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the sun sets, the air is still unusually hot and the three men are sitting on a rock and following the lead of one of them who is miserable and says “What hellish expedition! I can’t stand it anymore; and I have no food to eat. Please give me some fish or something, and don’t let us drink the water.”



llama_print_timings:        load time =   659.75 ms
llama_print_timings:      sample time =    34.05 ms /    76 runs   (    0.45 ms per token,  2232.08 tokens per second)
llama_print_timings: prompt eval time =   659.71 ms /    95 tokens (    6.94 ms per token,   144.00 tokens per second)
llama_print_timings:        eval time =  6456.68 ms /    75 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  7269.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhou Wen Wang said, "You can only dehydrate now."



llama_print_timings:        load time =   615.31 ms
llama_print_timings:      sample time =     8.33 ms /    18 runs   (    0.46 ms per token,  2161.12 tokens per second)
llama_print_timings: prompt eval time =   615.28 ms /    50 tokens (   12.31 ms per token,    81.26 tokens per second)
llama_print_timings:        eval time =  1552.76 ms /    17 runs   (   91.34 ms per token,    10.95 tokens per second)
llama_print_timings:       total time =  2206.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After dehydration, you're not going to throw me away?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Of course not. I promise to take you to ChaoGou.



llama_print_timings:        load time =   633.89 ms
llama_print_timings:      sample time =     7.94 ms /    16 runs   (    0.50 ms per token,  2014.61 tokens per second)
llama_print_timings: prompt eval time =   633.87 ms /    42 tokens (   15.09 ms per token,    66.26 tokens per second)
llama_print_timings:        eval time =  1398.96 ms /    15 runs   (   93.26 ms per token,    10.72 tokens per second)
llama_print_timings:       total time =  2066.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The follower stripped off the wet robe and lay bare on the mud. In the twilight of the setting sun, Wang Ming saw that the sweat of the follower abruptly increased. He quickly realized it was not perspiration but that water inside his body was being completely drained out, forming a few small streams on the sand below… Ten minutes later, when all the water had been eliminated, the whole body lay flat and still on the mud like an undiluted candle melted into soft skin.



llama_print_timings:        load time =   833.35 ms
llama_print_timings:      sample time =    60.91 ms /   114 runs   (    0.53 ms per token,  1871.71 tokens per second)
llama_print_timings: prompt eval time =   833.31 ms /   153 tokens (    5.45 ms per token,   183.61 tokens per second)
llama_print_timings:        eval time = 10620.37 ms /   113 runs   (   93.99 ms per token,    10.64 tokens per second)
llama_print_timings:       total time = 11734.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He asked, "Did he die?" Wang Mu had remembered that there were many soft-skinned people along the way. Some of them were broken and had not yet returned. That was the one the followers wanted to burn to keep warm.



llama_print_timings:        load time =   645.61 ms
llama_print_timings:      sample time =    23.56 ms /    51 runs   (    0.46 ms per token,  2164.50 tokens per second)
llama_print_timings: prompt eval time =   645.56 ms /    76 tokens (    8.49 ms per token,   117.73 tokens per second)
llama_print_timings:        eval time =  4572.07 ms /    50 runs   (   91.44 ms per token,    10.94 tokens per second)
llama_print_timings:       total time =  5328.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said, “No. I'm sorry to say it is impossible.” He picked up the soft hide from his followers and smoothed out its dirt before putting it onto a rock. It looked like a soft rubber ball which he then put into water. He exclaimed excitedly, “It will come back to life! It will come back to life! Just like with the mushroom.”



llama_print_timings:        load time =   704.77 ms
llama_print_timings:      sample time =    38.73 ms /    84 runs   (    0.46 ms per token,  2169.03 tokens per second)
llama_print_timings: prompt eval time =   704.73 ms /    98 tokens (    7.19 ms per token,   139.06 tokens per second)
llama_print_timings:        eval time =  7650.46 ms /    83 runs   (   92.17 ms per token,    10.85 tokens per second)
llama_print_timings:       total time =  8532.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“His bones are also soft?”



llama_print_timings:        load time =   617.85 ms
llama_print_timings:      sample time =     4.52 ms /    10 runs   (    0.45 ms per token,  2212.88 tokens per second)
llama_print_timings: prompt eval time =   617.81 ms /    38 tokens (   16.26 ms per token,    61.51 tokens per second)
llama_print_timings:        eval time =   843.13 ms /     9 runs   (   93.68 ms per token,    10.67 tokens per second)
llama_print_timings:       total time =  1481.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, they are all dried and this makes it easier to carry.



llama_print_timings:        load time =   616.81 ms
llama_print_timings:      sample time =     7.16 ms /    16 runs   (    0.45 ms per token,  2233.39 tokens per second)
llama_print_timings: prompt eval time =   616.76 ms /    43 tokens (   14.34 ms per token,    69.72 tokens per second)
llama_print_timings:        eval time =  1372.81 ms /    15 runs   (   91.52 ms per token,    10.93 tokens per second)
llama_print_timings:       total time =  2021.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Does everyone in this world have the ability to dehydrate?



llama_print_timings:        load time =   597.59 ms
llama_print_timings:      sample time =     7.25 ms /    15 runs   (    0.48 ms per token,  2070.11 tokens per second)
llama_print_timings: prompt eval time =   597.55 ms /    39 tokens (   15.32 ms per token,    65.27 tokens per second)
llama_print_timings:        eval time =  1258.89 ms /    14 runs   (   89.92 ms per token,    11.12 tokens per second)
llama_print_timings:       total time =  1890.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, you can too. Otherwise, we wouldn't be able to survive in this chaotic age.



llama_print_timings:        load time =   674.73 ms
llama_print_timings:      sample time =    16.39 ms /    25 runs   (    0.66 ms per token,  1525.60 tokens per second)
llama_print_timings: prompt eval time =   674.71 ms /    81 tokens (    8.33 ms per token,   120.05 tokens per second)
llama_print_timings:        eval time =  2358.84 ms /    24 runs   (   98.28 ms per token,    10.17 tokens per second)
llama_print_timings:       total time =  3107.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mo received the soft leather, it felt very light. Around half a roll of it would fit comfortably under his arm.



llama_print_timings:        load time =   636.70 ms
llama_print_timings:      sample time =    13.89 ms /    30 runs   (    0.46 ms per token,  2160.45 tokens per second)
llama_print_timings: prompt eval time =   636.66 ms /    55 tokens (   11.58 ms per token,    86.39 tokens per second)
llama_print_timings:        eval time =  2603.16 ms /    29 runs   (   89.76 ms per token,    11.14 tokens per second)
llama_print_timings:       total time =  3303.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

汪淼 and Zhou Wenwang are still traveling together in this unpredictable world, facing the same challenges as before. Despite the irregularities of sunrise and sunset in this universe, they continue to support each other through their journeys. Whenever one of them feels exhausted, the other will come along side him or her and share the burden together, whether it be staying by the fireside to keep warm in cold weather, or jumping into lakes to cool off during hot days. Fortunately, game time can be sped up, so they can complete one month's journey within a half-hour. This makes their travel through this unpredictable universe still bearable after all.



llama_print_timings:        load time =   773.71 ms
llama_print_timings:      sample time =    72.28 ms /   156 runs   (    0.46 ms per token,  2158.12 tokens per second)
llama_print_timings: prompt eval time =   773.66 ms /   137 tokens (    5.65 ms per token,   177.08 tokens per second)
llama_print_timings:        eval time = 13940.00 ms /   155 runs   (   89.94 ms per token,    11.12 tokens per second)
llama_print_timings:       total time = 15049.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On this day, the long night had lasted for nearly a week (according to an hourglass), and Zhou Wenwang suddenly pointed at the sky and cheered: "Flying star! Flying star! Two flying stars! !"



llama_print_timings:        load time =   638.76 ms
llama_print_timings:      sample time =    25.49 ms /    54 runs   (    0.47 ms per token,  2118.64 tokens per second)
llama_print_timings: prompt eval time =   638.72 ms /    72 tokens (    8.87 ms per token,   112.73 tokens per second)
llama_print_timings:        eval time =  4892.18 ms /    53 runs   (   92.31 ms per token,    10.83 tokens per second)
llama_print_timings:       total time =  5653.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In fact, Wang Miao had noticed that strange celestial body before. It was larger than a star and could be seen as a ping-pong ball size disc moving rapidly through the sky. However, there were two of them this time.



llama_print_timings:        load time =   656.72 ms
llama_print_timings:      sample time =    25.80 ms /    52 runs   (    0.50 ms per token,  2015.89 tokens per second)
llama_print_timings: prompt eval time =   656.70 ms /    81 tokens (    8.11 ms per token,   123.34 tokens per second)
llama_print_timings:        eval time =  4578.32 ms /    51 runs   (   89.77 ms per token,    11.14 tokens per second)
llama_print_timings:       total time =  5353.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

King Chou explained, “When two flying stars appear, the stable epoch is coming!”



llama_print_timings:        load time =   656.57 ms
llama_print_timings:      sample time =    19.34 ms /    21 runs   (    0.92 ms per token,  1085.55 tokens per second)
llama_print_timings: prompt eval time =   656.48 ms /    49 tokens (   13.40 ms per token,    74.64 tokens per second)
llama_print_timings:        eval time =  1982.08 ms /    20 runs   (   99.10 ms per token,    10.09 tokens per second)
llama_print_timings:       total time =  2728.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have seen it before.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Only one.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Only two?”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“No, there will be three at most.”



llama_print_timings:        load time =  1832.19 ms
llama_print_timings:      sample time =     7.54 ms /    11 runs   (    0.69 ms per token,  1458.31 tokens per second)
llama_print_timings: prompt eval time =  1832.15 ms /    40 tokens (   45.80 ms per token,    21.83 tokens per second)
llama_print_timings:        eval time =   994.90 ms /    10 runs   (   99.49 ms per token,    10.05 tokens per second)
llama_print_timings:       total time =  2861.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Isn't it a sign of a better era when three stars appear in the sky?



llama_print_timings:        load time =   633.49 ms
llama_print_timings:      sample time =     9.11 ms /    20 runs   (    0.46 ms per token,  2194.67 tokens per second)
llama_print_timings: prompt eval time =   633.45 ms /    46 tokens (   13.77 ms per token,    72.62 tokens per second)
llama_print_timings:        eval time =  1783.24 ms /    19 runs   (   93.85 ms per token,    10.65 tokens per second)
llama_print_timings:       total time =  2457.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhou Wenwang stared at Wang Mankui with a fearful expression, “What are you talking about? Three flying stars… I hope they don’t appear.



llama_print_timings:        load time =   652.00 ms
llama_print_timings:      sample time =    19.00 ms /    39 runs   (    0.49 ms per token,  2052.52 tokens per second)
llama_print_timings: prompt eval time =   651.96 ms /    63 tokens (   10.35 ms per token,    96.63 tokens per second)
llama_print_timings:        eval time =  3627.57 ms /    38 runs   (   95.46 ms per token,    10.48 tokens per second)
llama_print_timings:       total time =  4371.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As Zhou Wenwang had said, the eternal era soon began to dawn. The sun rose and set with greater regularity, a day and night gradually becoming consistent, lasting roughly 18 hours. This change in weather patterns made it warmer at different times of year.



llama_print_timings:        load time =   671.11 ms
llama_print_timings:      sample time =    30.72 ms /    61 runs   (    0.50 ms per token,  1985.48 tokens per second)
llama_print_timings: prompt eval time =   671.06 ms /    77 tokens (    8.72 ms per token,   114.74 tokens per second)
llama_print_timings:        eval time =  5705.50 ms /    60 runs   (   95.09 ms per token,    10.52 tokens per second)
llama_print_timings:       total time =  6519.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"How long can the Heng Jian Yuan last?" asked Wang Miao.



llama_print_timings:        load time =   622.49 ms
llama_print_timings:      sample time =     8.87 ms /    19 runs   (    0.47 ms per token,  2142.78 tokens per second)
llama_print_timings: prompt eval time =   622.45 ms /    44 tokens (   14.15 ms per token,    70.69 tokens per second)
llama_print_timings:        eval time =  1670.54 ms /    18 runs   (   92.81 ms per token,    10.77 tokens per second)
llama_print_timings:       total time =  2333.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“A day or a century, who knows how long? ” Zhou Wenwang sat on the hourglass and looked up at noon sun. “According to records, there was a stable period in the Western Zhou dynasty for two centuries,” he sighed. “Life must have been so much better when you lived during that time.”



llama_print_timings:        load time =   694.51 ms
llama_print_timings:      sample time =    33.95 ms /    75 runs   (    0.45 ms per token,  2209.46 tokens per second)
llama_print_timings: prompt eval time =   694.47 ms /    87 tokens (    7.98 ms per token,   125.27 tokens per second)
llama_print_timings:        eval time =  6895.52 ms /    74 runs   (   93.18 ms per token,    10.73 tokens per second)
llama_print_timings:       total time =  7748.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How long will the New Era last?”



llama_print_timings:        load time =   612.02 ms
llama_print_timings:      sample time =     4.51 ms /    10 runs   (    0.45 ms per token,  2217.79 tokens per second)
llama_print_timings: prompt eval time =   612.00 ms /    40 tokens (   15.30 ms per token,    65.36 tokens per second)
llama_print_timings:        eval time =   825.67 ms /     9 runs   (   91.74 ms per token,    10.90 tokens per second)
llama_print_timings:       total time =  1458.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As I have said before, the only constant is change.



llama_print_timings:        load time =   644.25 ms
llama_print_timings:      sample time =     5.86 ms /    13 runs   (    0.45 ms per token,  2217.67 tokens per second)
llama_print_timings: prompt eval time =   644.21 ms /    50 tokens (   12.88 ms per token,    77.61 tokens per second)
llama_print_timings:        eval time =  1106.54 ms /    12 runs   (   92.21 ms per token,    10.84 tokens per second)
llama_print_timings:       total time =  1777.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That means this is a completely chaotic world with no rhyme or reason?"



llama_print_timings:        load time =   778.57 ms
llama_print_timings:      sample time =    21.35 ms /    18 runs   (    1.19 ms per token,   843.17 tokens per second)
llama_print_timings: prompt eval time =   778.49 ms /    43 tokens (   18.10 ms per token,    55.24 tokens per second)
llama_print_timings:        eval time =  1850.44 ms /    17 runs   (  108.85 ms per token,     9.19 tokens per second)
llama_print_timings:       total time =  2730.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes. Civilization can only develop over a longer period of warm climate, and the majority of the time is spent collectively storing moisture for later use when the extended period comes back into existence. Production and construction are then resumed.



llama_print_timings:        load time =   664.94 ms
llama_print_timings:      sample time =    24.95 ms /    50 runs   (    0.50 ms per token,  2003.77 tokens per second)
llama_print_timings: prompt eval time =   664.90 ms /    79 tokens (    8.42 ms per token,   118.81 tokens per second)
llama_print_timings:        eval time =  4661.09 ms /    49 runs   (   95.12 ms per token,    10.51 tokens per second)
llama_print_timings:       total time =  5442.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"How can we know the time and duration of each Epoch?"



llama_print_timings:        load time =   613.47 ms
llama_print_timings:      sample time =     7.68 ms /    16 runs   (    0.48 ms per token,  2083.88 tokens per second)
llama_print_timings: prompt eval time =   613.45 ms /    46 tokens (   13.34 ms per token,    74.99 tokens per second)
llama_print_timings:        eval time =  1403.77 ms /    15 runs   (   93.58 ms per token,    10.69 tokens per second)
llama_print_timings:       total time =  2054.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

In my opinion, the sun does not follow any rule.



llama_print_timings:        load time =   609.17 ms
llama_print_timings:      sample time =     6.21 ms /    13 runs   (    0.48 ms per token,  2091.71 tokens per second)
llama_print_timings: prompt eval time =   609.13 ms /    38 tokens (   16.03 ms per token,    62.38 tokens per second)
llama_print_timings:        eval time =  1075.70 ms /    12 runs   (   89.64 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  1713.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's because you haven't understood the original nature of the world.



llama_print_timings:        load time =   615.43 ms
llama_print_timings:      sample time =     7.71 ms /    17 runs   (    0.45 ms per token,  2204.93 tokens per second)
llama_print_timings: prompt eval time =   615.39 ms /    41 tokens (   15.01 ms per token,    66.62 tokens per second)
llama_print_timings:        eval time =  1498.90 ms /    16 runs   (   93.68 ms per token,    10.67 tokens per second)
llama_print_timings:       total time =  2148.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Did you figure it out?



llama_print_timings:        load time =   628.38 ms
llama_print_timings:      sample time =     3.11 ms /     7 runs   (    0.44 ms per token,  2250.08 tokens per second)
llama_print_timings: prompt eval time =   628.36 ms /    35 tokens (   17.95 ms per token,    55.70 tokens per second)
llama_print_timings:        eval time =   553.31 ms /     6 runs   (   92.22 ms per token,    10.84 tokens per second)
llama_print_timings:       total time =  1195.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, that is my goal. I will present Emperor Chun the precise ancient Chinese calendar.”



llama_print_timings:        load time =   644.59 ms
llama_print_timings:      sample time =     9.68 ms /    21 runs   (    0.46 ms per token,  2169.65 tokens per second)
llama_print_timings: prompt eval time =   644.55 ms /    54 tokens (   11.94 ms per token,    83.78 tokens per second)
llama_print_timings:        eval time =  1813.11 ms /    20 runs   (   90.66 ms per token,    11.03 tokens per second)
llama_print_timings:       total time =  2501.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You didn't seem to have this ability on the way.



llama_print_timings:        load time =   610.16 ms
llama_print_timings:      sample time =     7.00 ms /    14 runs   (    0.50 ms per token,  2000.29 tokens per second)
llama_print_timings: prompt eval time =   610.12 ms /    42 tokens (   14.53 ms per token,    68.84 tokens per second)
llama_print_timings:        eval time =  1241.68 ms /    13 runs   (   95.51 ms per token,    10.47 tokens per second)
llama_print_timings:       total time =  1884.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They traveled for a long time in the harsh era and then experienced a brief era of stability. Finally, they arrived at Chengxia.



llama_print_timings:        load time =   698.68 ms
llama_print_timings:      sample time =    15.04 ms /    33 runs   (    0.46 ms per token,  2193.42 tokens per second)
llama_print_timings: prompt eval time =   698.63 ms /    91 tokens (    7.68 ms per token,   130.25 tokens per second)
llama_print_timings:        eval time =  2983.19 ms /    32 runs   (   93.22 ms per token,    10.73 tokens per second)
llama_print_timings:       total time =  3750.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Mu heard a continuous, rumbling noise like thunder. This sound was being produced by many strange things on the Aiguo Danggaotang, it is huge and has several meters high: The pendulum of these pendula is made up of a block of rock that is suspended from a taut cable in the airbridge connecting two tall stone towers. These pendula all move in synchrony = they are driven by a group of armored soldiers who sing strange chants together. They pull on the rope hanging the pendulum made up of a rock, maintaining its swing. Wang Mu finds that the swings of all these pendula are synchronized, and if you look far away from them, they will make you feel dazzled, like standing tall walking clocks or like many huge and abstract symbols dropping from the sky.



llama_print_timings:        load time =   924.35 ms
llama_print_timings:      sample time =    86.60 ms /   188 runs   (    0.46 ms per token,  2170.95 tokens per second)
llama_print_timings: prompt eval time =   924.28 ms /   207 tokens (    4.47 ms per token,   223.96 tokens per second)
llama_print_timings:        eval time = 17845.66 ms /   187 runs   (   95.43 ms per token,    10.48 tokens per second)
llama_print_timings:       total time = 19174.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They walked along a long tunnel with narrow and dark walls, only a torch at intervals.



llama_print_timings:        load time =   755.09 ms
llama_print_timings:      sample time =    10.18 ms /    20 runs   (    0.51 ms per token,  1964.25 tokens per second)
llama_print_timings: prompt eval time =   755.05 ms /   124 tokens (    6.09 ms per token,   164.23 tokens per second)
llama_print_timings:        eval time =  1857.52 ms /    19 runs   (   97.76 ms per token,    10.23 tokens per second)
llama_print_timings:       total time =  2662.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In the Age of Chaos, the entire nation was plunged into turmoil, but Emperor Xu had remained awake and remained with the lifeless land. If one wished to survive during the Age of Chaos, they would have to live in this thick-walled building like a human living underground, escaping both the cold and the heat.”



llama_print_timings:        load time =   686.80 ms
llama_print_timings:      sample time =    35.40 ms /    78 runs   (    0.45 ms per token,  2203.70 tokens per second)
llama_print_timings: prompt eval time =   686.77 ms /   103 tokens (    6.67 ms per token,   149.98 tokens per second)
llama_print_timings:        eval time =  6761.72 ms /    77 runs   (   87.81 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  7611.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After a long walk, I entered the Great Hall of King Chou located at the center of the pyramid. The hall was not very large, resembling more like a cave than anything else. Seated in high up on a platform covered with a huge sheet of tiger hide, an individual who is apparently King Chou stood out to me. However, my attention was first drawn by a man dressed all in black seated in a shadow-filled corner. His pale face seemed to float in the air like a ghost.



llama_print_timings:        load time =   704.81 ms
llama_print_timings:      sample time =    49.79 ms /   111 runs   (    0.45 ms per token,  2229.23 tokens per second)
llama_print_timings: prompt eval time =   704.76 ms /   117 tokens (    6.02 ms per token,   166.01 tokens per second)
llama_print_timings:        eval time =  9747.31 ms /   110 runs   (   88.61 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 10679.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is Fuxi, ” said the King of Xiong to Zhou Wenwang and Wang Miao as he introduced the black-clothed man. He appeared to have been there forever and the man was the newcomer. “He thought the sun was an unruly deity, who was constantly changing moods from daytime to nighttime and vice versa, so it was called a Wanjing Period; while when he was asleep his breathing was even, which meant we were in a Huajing Period. He proposed that the big frames outside should be kept moving at all times so that they would have a strong hypnotic effect on the sun god, which could make him fall into a long slumber,” said Fuxi. “But until now, we see that the sun god is still awake, and it only dozes off occasionally.”



llama_print_timings:        load time =   832.13 ms
llama_print_timings:      sample time =    89.50 ms /   189 runs   (    0.47 ms per token,  2111.76 tokens per second)
llama_print_timings: prompt eval time =   832.07 ms /   161 tokens (    5.17 ms per token,   193.49 tokens per second)
llama_print_timings:        eval time = 17813.96 ms /   188 runs   (   94.76 ms per token,    10.55 tokens per second)
llama_print_timings:       total time = 19071.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The King of Xiayan waved his hand and someone brought a pot made of clay to the small stone table in front of Wushui. The Duke of Dian later knew that it was an ingredient for cooking. Wushui took a deep breath and picked up the pot, then drank it. The noisy sound of liquid pouring into his stomach resembled a beating heart from the depths of the darkness. After half of the pot of sauce had been drunk down, he threw away the pot and climbed to the top of the bronze large pot placed on the floor in the corner of the palace, then jumped inside it.



llama_print_timings:        load time =   809.69 ms
llama_print_timings:      sample time =    64.92 ms /   144 runs   (    0.45 ms per token,  2218.18 tokens per second)
llama_print_timings: prompt eval time =   809.66 ms /   147 tokens (    5.51 ms per token,   181.56 tokens per second)
llama_print_timings:        eval time = 13002.79 ms /   143 runs   (   90.93 ms per token,    11.00 tokens per second)
llama_print_timings:       total time = 14113.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Han Chang sat down and soon a feast was prepared.



llama_print_timings:        load time =   599.43 ms
llama_print_timings:      sample time =     6.72 ms /    15 runs   (    0.45 ms per token,  2230.81 tokens per second)
llama_print_timings: prompt eval time =   599.39 ms /    51 tokens (   11.75 ms per token,    85.09 tokens per second)
llama_print_timings:        eval time =  1225.11 ms /    14 runs   (   87.51 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1856.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The stupid magic.



llama_print_timings:        load time =   597.21 ms
llama_print_timings:      sample time =     2.46 ms /     5 runs   (    0.49 ms per token,  2035.00 tokens per second)
llama_print_timings: prompt eval time =   597.17 ms /    50 tokens (   11.94 ms per token,    83.73 tokens per second)
llama_print_timings:        eval time =   347.93 ms /     4 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =   956.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The king asked, "What have you learned from the sun?"



llama_print_timings:        load time =   604.58 ms
llama_print_timings:      sample time =     6.18 ms /    14 runs   (    0.44 ms per token,  2266.47 tokens per second)
llama_print_timings: prompt eval time =   604.53 ms /    52 tokens (   11.63 ms per token,    86.02 tokens per second)
llama_print_timings:        eval time =  1131.45 ms /    13 runs   (   87.03 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  1763.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The sun is not a god, but the sunlight; the darkness of night follows it. The world moves in balance between these two energies, which are not under our control. However, their pattern can be predicted,” said Zhou Wenwang as he pulled out his bronze sword and proceeded to draw giant yin-yang fishes on the floor where the torchlight shone through. He then rapidly painted 64 hexagrams around him in a way that seemed to have hidden meanings – like year circles glowing between light and darkness – “Your Majesty, this is the message of the universe. With it, I will give you an accurate, millennial calendar for your dynasty.”



llama_print_timings:        load time =   755.43 ms
llama_print_timings:      sample time =    66.73 ms /   150 runs   (    0.44 ms per token,  2247.70 tokens per second)
llama_print_timings: prompt eval time =   755.38 ms /   142 tokens (    5.32 ms per token,   187.98 tokens per second)
llama_print_timings:        eval time = 13417.94 ms /   149 runs   (   90.05 ms per token,    11.10 tokens per second)
llama_print_timings:       total time = 14481.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Ji Chang, I need to know when the next Long-lasting Era will come."



llama_print_timings:        load time =   613.98 ms
llama_print_timings:      sample time =     9.78 ms /    22 runs   (    0.44 ms per token,  2249.26 tokens per second)
llama_print_timings: prompt eval time =   613.96 ms /    50 tokens (   12.28 ms per token,    81.44 tokens per second)
llama_print_timings:        eval time =  1870.28 ms /    21 runs   (   89.06 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  2528.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I will instantly foretell your fortune,” said Chou Wen Wang as he approached the middle of the yin and yang fish, sitting down with his legs crossed and looked up at the ceiling of the vast hall. His fingers were moving simultaneously in complex movements, combining to form a high-speed calculator. In silence, only the sound of boiling water from the great pot was heard, as if the wizard was mumbling in his dreams.



llama_print_timings:        load time =   699.76 ms
llama_print_timings:      sample time =    44.68 ms /    99 runs   (    0.45 ms per token,  2215.95 tokens per second)
llama_print_timings: prompt eval time =   699.73 ms /   122 tokens (    5.74 ms per token,   174.35 tokens per second)
llama_print_timings:        eval time =  8445.17 ms /    98 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  9350.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The King of Zhou stood up from the Yin-Yang Diagram and looked up. "After this period of forty days, a Period of Constant will follow for five days. Then, a period of twenty-three days will be followed by another period of eighteen days, followed by a period of eight days. And after that, the long-term Constant era you have been expecting will come, which lasts three years and nine months, with warm weather. It is a Golden Age."



llama_print_timings:        load time =   741.30 ms
llama_print_timings:      sample time =    45.96 ms /   103 runs   (    0.45 ms per token,  2241.08 tokens per second)
llama_print_timings: prompt eval time =   741.25 ms /   138 tokens (    5.37 ms per token,   186.17 tokens per second)
llama_print_timings:        eval time =  8875.62 ms /   102 runs   (   87.02 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  9827.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We need to confirm your predictions first.



llama_print_timings:        load time =   589.26 ms
llama_print_timings:      sample time =     3.98 ms /     9 runs   (    0.44 ms per token,  2259.60 tokens per second)
llama_print_timings: prompt eval time =   589.22 ms /    48 tokens (   12.28 ms per token,    81.46 tokens per second)
llama_print_timings:        eval time =   698.21 ms /     8 runs   (   87.28 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1304.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao heard a loud noise from above, and a stone slate in the ceiling of the palace shifted open to reveal a rectangular hole. Wang Miao adjusted her direction to look at this rectangular hole which led out of the pyramid, and at its end she saw several twinkling stars.



llama_print_timings:        load time =   706.75 ms
llama_print_timings:      sample time =    31.01 ms /    70 runs   (    0.44 ms per token,  2257.48 tokens per second)
llama_print_timings: prompt eval time =   706.71 ms /    95 tokens (    7.44 ms per token,   134.43 tokens per second)
llama_print_timings:        eval time =  6114.06 ms /    69 runs   (   88.61 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  6965.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The game was sped up, with two soldiers guarding the hourglass. It revolved once in twenty seconds, marking eight hours of passage. The windows above flitted不定格， with regular intervals of light streaming in through them. Sometimes they were weak like a crescent moon; sometimes they were very strong and threw down bright squares of light on the ground, dazzling all the torches nearby. Wang Mao counted the turns of the hourglass until it reached one hundred twenty, at which point the regular stream of light from the windows began to resume, as per the prediction made by Zhou Wenwang. The light stream continued for another fifteen turns before becoming irregular again, beginning a new period of continuous light. Then was an interval of darkness followed by another period of constant light, and so on until Wang Mao counted twenty days had passed. Finally, when the final eight-day period of fluctuating light had ended, the game time was restored to normal.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Hou Wang, I will erect a monument for you that is even taller than this palace. ”



llama_print_timings:        load time =   620.21 ms
llama_print_timings:      sample time =    11.44 ms /    24 runs   (    0.48 ms per token,  2098.27 tokens per second)
llama_print_timings: prompt eval time =   620.18 ms /    62 tokens (   10.00 ms per token,    99.97 tokens per second)
llama_print_timings:        eval time =  2083.46 ms /    23 runs   (   90.59 ms per token,    11.04 tokens per second)
llama_print_timings:       total time =  2757.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Chou Wenwang bowed deep and said, “May your kingdom awaken and prosper!”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The King of Xi, standing on the stone platform, spreading out his arms, seemed to be embracing the whole world. He shouted in a very strange way, using the pitch he had adapted from a song he heard a while ago: "Dip your body—"


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

They all ran towards the door. Under Zhou Wenwang's direction, Wang Moran followed him along a long tunnel to the outside of the pyramid. After walking out of the door, Wang Sen saw that it was noon in the sky with a calm sunlight illuminating the earth. He seemed to smell the scent of spring when a gentle breeze blew past. Zhou Wenwang and Wang Moran arrived at a lakeside not far from the pyramid. The ice on the lake had melted, and the sunlight was dancing in small waves.



llama_print_timings:        load time =   750.65 ms
llama_print_timings:      sample time =    60.69 ms /   131 runs   (    0.46 ms per token,  2158.62 tokens per second)
llama_print_timings: prompt eval time =   750.61 ms /   142 tokens (    5.29 ms per token,   189.18 tokens per second)
llama_print_timings:        eval time = 11947.05 ms /   130 runs   (   91.90 ms per token,    10.88 tokens per second)
llama_print_timings:       total time = 12986.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The first team of soldiers shouted with joy, "dip! dip!" and ran towards a large stone-built high silo-like structure which was built in the shape of a barn. During their journey, Wang Mo from afar noticed such structures that he had been told by Zhou Wenwang were called “drying rooms.” The soldiers opened the stone door to the drying room and pushed out many dusty and worn-out leather sheets. Each soldier carried or supported a few of these sheets as they ran towards the lakeside, where they threw them into the lake. Those sheets immediately expanded upon entering water and floated on the surface of the lake, like cutouts made of thin human figures. And soon, the lake was filled with human shapes, who gradually came to life and rose up from the lakes, struggling to get out of the deep waters. "dip!", one of them shouted triumphantly, attracting cheers from others: "dip! dip!". The same thing also happened in other lakes and ponds around the world, so that all of creation was revived.


llama_print_timings:        load time =  1110.47 ms
llama_print_timings:      sample time =   109.37 ms /   238 runs   (    0.46 ms per token,  2176.20 tokens per second)
llama_print_timings: prompt eval time =  1110.43 ms /   320 tokens (    3.47 ms per token,   288.18 tokens per second)
llama_print_timings:        eval time = 21782.75 ms /   237 runs   (   91.91 ms per token,    10.88 tokens per second)
llama_print_timings:       total time = 23409.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, my fingers!



llama_print_timings:        load time =   624.12 ms
llama_print_timings:      sample time =     2.78 ms /     6 runs   (    0.46 ms per token,  2159.05 tokens per second)
llama_print_timings: prompt eval time =   624.08 ms /    41 tokens (   15.22 ms per token,    65.70 tokens per second)
llama_print_timings:        eval time =   420.10 ms /     5 runs   (   84.02 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  1057.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He looked in the direction of the voice and saw a newly revived person standing in the lake. With one hand, which had lost its middle finger, he cried out with tears as blood oozed from the severed joint to fall into the lake. The other resurrectees crowded around him happily, but none paid attention to him.



llama_print_timings:        load time =   665.05 ms
llama_print_timings:      sample time =    37.06 ms /    73 runs   (    0.51 ms per token,  1969.78 tokens per second)
llama_print_timings: prompt eval time =   665.01 ms /    95 tokens (    7.00 ms per token,   142.85 tokens per second)
llama_print_timings:        eval time =  6574.40 ms /    72 runs   (   91.31 ms per token,    10.95 tokens per second)
llama_print_timings:       total time =  7412.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Enough is enough!” a resurrected said. “Some of us lost an arm and leg, some of us have holes in our heads! If we don't keep immersing ourselves in water, I fear we will be eaten alive by rats from the Era of Chaos!”



llama_print_timings:        load time =   644.28 ms
llama_print_timings:      sample time =    33.13 ms /    64 runs   (    0.52 ms per token,  1931.78 tokens per second)
llama_print_timings: prompt eval time =   644.24 ms /    82 tokens (    7.86 ms per token,   127.28 tokens per second)
llama_print_timings:        eval time =  5530.07 ms /    63 runs   (   87.78 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  6326.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How long have we been dehydrated?” the other resurrected man asked.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Just look at how thick the sand is on top of the Great King's palace. I just heard that the current Great King has not been the same since his dehydration and it is unclear if he is the son or grandson of the old Great King.”



llama_print_timings:        load time =   619.21 ms
llama_print_timings:      sample time =    48.91 ms /    58 runs   (    0.84 ms per token,  1185.92 tokens per second)
llama_print_timings: prompt eval time =   619.18 ms /    66 tokens (    9.38 ms per token,   106.59 tokens per second)
llama_print_timings:        eval time =  5430.51 ms /    57 runs   (   95.27 ms per token,    10.50 tokens per second)
llama_print_timings:       total time =  6279.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After eight days of immersion, all dehydrated people had been resurrected. At this point, the world once again gained new life. In these eight days, people enjoyed 20 hours a day of constant sunlight and accurate phases of dawn and dusk. They were bathed in the scent of spring and heartily praised the sun and the gods who govern the universe. By the eighth night, the fireflies on earth were as plentiful as the stars in the sky, as if the ruins of civilization had once again been revived with new vitality after a long dark age. Everyone would bustle up all night and rejoice at the dawn of the new life.



llama_print_timings:        load time =   771.72 ms
llama_print_timings:      sample time =    68.72 ms /   153 runs   (    0.45 ms per token,  2226.52 tokens per second)
llama_print_timings: prompt eval time =   771.68 ms /   148 tokens (    5.21 ms per token,   191.79 tokens per second)
llama_print_timings:        eval time = 13622.19 ms /   152 runs   (   89.62 ms per token,    11.16 tokens per second)
llama_print_timings:       total time = 14715.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But the sun never rose again.



llama_print_timings:        load time =   586.81 ms
llama_print_timings:      sample time =     3.59 ms /     8 runs   (    0.45 ms per token,  2229.03 tokens per second)
llama_print_timings: prompt eval time =   586.79 ms /    35 tokens (   16.77 ms per token,    59.65 tokens per second)
llama_print_timings:        eval time =   628.50 ms /     7 runs   (   89.79 ms per token,    11.14 tokens per second)
llama_print_timings:       total time =  1230.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The various clocks indicate that dawn has passed, but the horizons in all directions are still black. Another ten hours have passed, and not even a faint ray of light; no trace of the sun's shadow appears throughout the night. A whole day has gone by, but the endless night continues on; another day goes by, and cold like an enormous hand descends upon the earth in the dark night.



llama_print_timings:        load time =   666.38 ms
llama_print_timings:      sample time =    38.98 ms /    88 runs   (    0.44 ms per token,  2257.57 tokens per second)
llama_print_timings: prompt eval time =   666.34 ms /   104 tokens (    6.41 ms per token,   156.08 tokens per second)
llama_print_timings:        eval time =  7656.96 ms /    87 runs   (   88.01 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  8502.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Please believe me, this is just temporary. I have seen the yang energies converging in the universe and the sun will soon rise up. The Great Age and spring are continuing!” Standing in front of King Zhou’s stone seat, which he was sitting on, Lord Chunyu bowed down and prayed.



llama_print_timings:        load time =   653.13 ms
llama_print_timings:      sample time =    47.03 ms /    72 runs   (    0.65 ms per token,  1530.94 tokens per second)
llama_print_timings: prompt eval time =   653.10 ms /    85 tokens (    7.68 ms per token,   130.15 tokens per second)
llama_print_timings:        eval time =  6511.61 ms /    71 runs   (   91.71 ms per token,    10.90 tokens per second)
llama_print_timings:       total time =  7380.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Let's still burn the jade cymbals.” said King Xi, with a sigh.



llama_print_timings:        load time =   590.81 ms
llama_print_timings:      sample time =    11.11 ms /    25 runs   (    0.44 ms per token,  2251.24 tokens per second)
llama_print_timings: prompt eval time =   590.77 ms /    46 tokens (   12.84 ms per token,    77.86 tokens per second)
llama_print_timings:        eval time =  2032.36 ms /    24 runs   (   84.68 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2675.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The prime minister cried out, "Oh great king, there are three stars in the sky!!" from the doorway.



llama_print_timings:        load time =   631.03 ms
llama_print_timings:      sample time =    12.16 ms /    26 runs   (    0.47 ms per token,  2138.33 tokens per second)
llama_print_timings: prompt eval time =   630.99 ms /    70 tokens (    9.01 ms per token,   110.94 tokens per second)
llama_print_timings:        eval time =  2158.89 ms /    25 runs   (   86.36 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  2847.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The great hall was filled with stunned silence, only King Xi of the Shang made a light smile. "You don't know what it means for three celestial stars to appear, Heng Wang, let me tell him."



llama_print_timings:        load time =   663.22 ms
llama_print_timings:      sample time =    22.56 ms /    51 runs   (    0.44 ms per token,  2260.64 tokens per second)
llama_print_timings: prompt eval time =   663.18 ms /    84 tokens (    7.89 ms per token,   126.66 tokens per second)
llama_print_timings:        eval time =  4327.62 ms /    50 runs   (   86.55 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  5093.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This means long and cold winters, so cold that stones can be turned into powder.



llama_print_timings:        load time =   600.09 ms
llama_print_timings:      sample time =     8.85 ms /    20 runs   (    0.44 ms per token,  2260.91 tokens per second)
llama_print_timings: prompt eval time =   600.04 ms /    54 tokens (   11.11 ms per token,    89.99 tokens per second)
llama_print_timings:        eval time =  1637.14 ms /    19 runs   (   86.17 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  2277.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“脱水——/”纣王 again used that singing voice to shout. In fact, people had been slowly dehydrating outside and were re-becoming human being in order to get through the impending long night. Fortunately, they were moved into thousands of shelters and countless others were thrown away in the wasteland. Zhou Wenwang stood up slowly and walked towards the bronze Great Dish that was on the fire. He climbed on it and jumped in before stopping for a few seconds. Maybe he saw the smirking face of Fuxi, who had been cooked long enough to make soup out of him.



llama_print_timings:        load time =   764.59 ms
llama_print_timings:      sample time =    63.10 ms /   142 runs   (    0.44 ms per token,  2250.47 tokens per second)
llama_print_timings: prompt eval time =   764.54 ms /   147 tokens (    5.20 ms per token,   192.27 tokens per second)
llama_print_timings:        eval time = 12316.24 ms /   141 runs   (   87.35 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 13371.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“With charcoal fire, ” said the king of Shi Le helplessly. He turned to the others and said, “Let’s just EXIT the game at this point since it's getting less and less interesting.”



llama_print_timings:        load time =   625.05 ms
llama_print_timings:      sample time =    24.53 ms /    51 runs   (    0.48 ms per token,  2079.51 tokens per second)
llama_print_timings: prompt eval time =   625.03 ms /    64 tokens (    9.77 ms per token,   102.39 tokens per second)
llama_print_timings:        eval time =  4705.98 ms /    50 runs   (   94.12 ms per token,    10.62 tokens per second)
llama_print_timings:       total time =  5445.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The EXIT sign above the door glowed red, and people headed for it. Wang Sen followed as well, passing through the door and long tunnel until he arrived outside the pyramid underneath a cold blast of wind. The corner of the sky revealed that the game was speeding up.



llama_print_timings:        load time =   728.63 ms
llama_print_timings:      sample time =    38.82 ms /    65 runs   (    0.60 ms per token,  1674.31 tokens per second)
llama_print_timings: prompt eval time =   728.54 ms /    98 tokens (    7.43 ms per token,   134.52 tokens per second)
llama_print_timings:        eval time =  6264.71 ms /    64 runs   (   97.89 ms per token,    10.22 tokens per second)
llama_print_timings:       total time =  7181.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

10 days later. Snow is still falling, but the snowflakes are thick and dark as if they had been congealed in the darkness. Someone whispers into Wang Mu's ear: “It's being condensed carbon dioxide. ” When he turns to look, he sees the follower of Zhou Wenwang.



llama_print_timings:        load time =   719.42 ms
llama_print_timings:      sample time =    35.61 ms /    76 runs   (    0.47 ms per token,  2134.05 tokens per second)
llama_print_timings: prompt eval time =   719.38 ms /    89 tokens (    8.08 ms per token,   123.72 tokens per second)
llama_print_timings:        eval time =  7018.73 ms /    75 runs   (   93.58 ms per token,    10.69 tokens per second)
llama_print_timings:       total time =  7908.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ten days have passed, but the snow is still falling. The snowflakes are thin and translucent, with a superb blue shimmering under the torchlight from the Pyramid door. Like countless swirling cloud-sheets, they dance in the light.



llama_print_timings:        load time =   708.18 ms
llama_print_timings:      sample time =    29.68 ms /    62 runs   (    0.48 ms per token,  2089.09 tokens per second)
llama_print_timings: prompt eval time =   708.14 ms /    77 tokens (    9.20 ms per token,   108.74 tokens per second)
llama_print_timings:        eval time =  5708.20 ms /    61 runs   (   93.58 ms per token,    10.69 tokens per second)
llama_print_timings:       total time =  6558.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This snowflake is already solidified oxygen and nitrogen, the atmosphere layer is vanishing at absolute zero.



llama_print_timings:        load time =   641.82 ms
llama_print_timings:      sample time =    12.04 ms /    27 runs   (    0.45 ms per token,  2241.78 tokens per second)
llama_print_timings: prompt eval time =   641.77 ms /    52 tokens (   12.34 ms per token,    81.03 tokens per second)
llama_print_timings:        eval time =  2362.28 ms /    26 runs   (   90.86 ms per token,    11.01 tokens per second)
llama_print_timings:       total time =  3060.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The pyramid is buried under the snow, with water at the bottom, solid oxygen and nitrogen in the middle, and dry ice on top. The night sky becomes very clear and stars glisten like a silver flame against the backdrop of the sky:



llama_print_timings:        load time =   667.76 ms
llama_print_timings:      sample time =    25.86 ms /    57 runs   (    0.45 ms per token,  2204.01 tokens per second)
llama_print_timings: prompt eval time =   667.74 ms /    89 tokens (    7.50 ms per token,   133.28 tokens per second)
llama_print_timings:        eval time =  4832.94 ms /    56 runs   (   86.30 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  5622.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This night lasted for forty-eight years, and civilization number 137 was destroyed in the bitter cold. It had evolved to the level of Warring States.



llama_print_timings:        load time =   597.46 ms
llama_print_timings:      sample time =    18.48 ms /    39 runs   (    0.47 ms per token,  2110.96 tokens per second)
llama_print_timings: prompt eval time =   597.42 ms /    56 tokens (   10.67 ms per token,    93.74 tokens per second)
llama_print_timings:        eval time =  3356.16 ms /    38 runs   (   88.32 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  4040.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The seeds of civilization remain, and they will once again be revived to start the evolutionary journey in the Three-Body Universe. Welcome back!



llama_print_timings:        load time =   606.29 ms
llama_print_timings:      sample time =    19.72 ms /    34 runs   (    0.58 ms per token,  1723.96 tokens per second)
llama_print_timings: prompt eval time =   606.27 ms /    56 tokens (   10.83 ms per token,    92.37 tokens per second)
llama_print_timings:        eval time =  2981.41 ms /    33 runs   (   90.35 ms per token,    11.07 tokens per second)
llama_print_timings:       total time =  3677.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he was about to leave, Wang Miao noticed the three stars in the night sky. They were very close to each other and seemed to be dancing a strange dance in the abyss of space.



llama_print_timings:        load time =   623.54 ms
llama_print_timings:      sample time =    19.37 ms /    43 runs   (    0.45 ms per token,  2220.04 tokens per second)
llama_print_timings: prompt eval time =   623.50 ms /    68 tokens (    9.17 ms per token,   109.06 tokens per second)
llama_print_timings:        eval time =  3644.70 ms /    42 runs   (   86.78 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  4356.80 ms


translated 132.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After removing his V-mask, Wang Miao found that his underwear had been soaked with cold sweat. It felt like he had just woken up from a nightmare. He drove out of the Nanocenter and went to Yang Dong's mother's house based on her address.



llama_print_timings:        load time =   805.90 ms
llama_print_timings:      sample time =    31.79 ms /    66 runs   (    0.48 ms per token,  2076.12 tokens per second)
llama_print_timings: prompt eval time =   805.83 ms /    80 tokens (   10.07 ms per token,    99.28 tokens per second)
llama_print_timings:        eval time =  5805.77 ms /    65 runs   (   89.32 ms per token,    11.20 tokens per second)
llama_print_timings:       total time =  6751.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Innovation, innovation, innovation…



llama_print_timings:        load time =   605.91 ms
llama_print_timings:      sample time =     4.81 ms /    11 runs   (    0.44 ms per token,  2287.38 tokens per second)
llama_print_timings: prompt eval time =   605.87 ms /    42 tokens (   14.43 ms per token,    69.32 tokens per second)
llama_print_timings:        eval time =   843.30 ms /    10 runs   (   84.33 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1470.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$This concept was swirling in Wang Miao's head. Why did the sun of that world run without any rule? A spherical planet, no matter its orbit is a round or long ellipsoid, must have a periodical motion around its star; there are no irregularities……Wang Miao felt very frustrated. He shook his head to dismiss all these thoughts, it was just a game after all, but he failed.



llama_print_timings:        load time =   694.36 ms
llama_print_timings:      sample time =    42.28 ms /    95 runs   (    0.45 ms per token,  2246.77 tokens per second)
llama_print_timings: prompt eval time =   694.29 ms /   120 tokens (    5.79 ms per token,   172.84 tokens per second)
llama_print_timings:        eval time =  8169.32 ms /    94 runs   (   86.91 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  9052.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Innovation, innovation, innovation…



llama_print_timings:        load time =   603.14 ms
llama_print_timings:      sample time =     4.90 ms /    11 runs   (    0.45 ms per token,  2246.27 tokens per second)
llama_print_timings: prompt eval time =   603.12 ms /    42 tokens (   14.36 ms per token,    69.64 tokens per second)
llama_print_timings:        eval time =   863.33 ms /    10 runs   (   86.33 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  1487.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Goddamn it! Stop thinking about it! Why? Why do you have to think about it?



llama_print_timings:        load time =   589.89 ms
llama_print_timings:      sample time =    10.63 ms /    23 runs   (    0.46 ms per token,  2162.87 tokens per second)
llama_print_timings: prompt eval time =   589.85 ms /    47 tokens (   12.55 ms per token,    79.68 tokens per second)
llama_print_timings:        eval time =  1836.77 ms /    22 runs   (   83.49 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  2473.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Soon, Wang Miao found the answer. He had not played electronic games for many years, and the technology of soft-hardware in electronic games has obviously evolved a lot in those years. Among which are the virtual reality scenes and the added effects that cannot be compared with his school days. But Wang Miao knew that the realistic value of the game was not just in these aspects. Remember when he was in college, a professor showed up two large pictures in an information class: one is a complicated painting called "Sky and Water Flow", and the other is a single piece of sky full of nothing but a thin layer of white clouds. The professor asked which picture contained more information, the answer was that the latter was 1-2 order of magnitude more than the former!



llama_print_timings:        load time =   817.96 ms
llama_print_timings:      sample time =    73.39 ms /   162 runs   (    0.45 ms per token,  2207.33 tokens per second)
llama_print_timings: prompt eval time =   817.91 ms /   174 tokens (    4.70 ms per token,   212.74 tokens per second)
llama_print_timings:        eval time = 13821.44 ms /   161 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time = 14971.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Three-Body Problem is just like this. He felt that there was something hidden, but he couldn't say it clearly. Suddenly, he realized that what was extraordinary about the Three-Body Problem was that its designer went against the common trend of other games - The designers of other games always strive to increase the amount of information they display in order to produce a sense of realism; whereas the designer of the Three-Body Problem, however, was working to minimize information. They were like a vast empty sky photograph that hid some kind of tremendous reality behind it.



llama_print_timings:        load time =   739.53 ms
llama_print_timings:      sample time =    67.65 ms /   124 runs   (    0.55 ms per token,  1833.02 tokens per second)
llama_print_timings: prompt eval time =   739.50 ms /   131 tokens (    5.65 ms per token,   177.15 tokens per second)
llama_print_timings:        eval time = 10917.59 ms /   123 runs   (   88.76 ms per token,    11.27 tokens per second)
llama_print_timings:       total time = 11964.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He relaxed his mind and let it return to the world of Three Body.



llama_print_timings:        load time =   579.01 ms
llama_print_timings:      sample time =     7.54 ms /    17 runs   (    0.44 ms per token,  2254.64 tokens per second)
llama_print_timings: prompt eval time =   578.97 ms /    48 tokens (   12.06 ms per token,    82.91 tokens per second)
llama_print_timings:        eval time =  1364.27 ms /    16 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1978.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Flying stars! The important thing is the flying star that you don't notice. One flying star, two flying stars, three flying stars... What do these mean?



llama_print_timings:        load time =   604.10 ms
llama_print_timings:      sample time =    16.08 ms /    36 runs   (    0.45 ms per token,  2238.81 tokens per second)
llama_print_timings: prompt eval time =   604.06 ms /    61 tokens (    9.90 ms per token,   100.98 tokens per second)
llama_print_timings:        eval time =  2986.69 ms /    35 runs   (   85.33 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  3661.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He was thinking about it while the car had already driven to his destination.



llama_print_timings:        load time =   574.11 ms
llama_print_timings:      sample time =     7.39 ms /    16 runs   (    0.46 ms per token,  2165.38 tokens per second)
llama_print_timings: prompt eval time =   574.07 ms /    43 tokens (   13.35 ms per token,    74.90 tokens per second)
llama_print_timings:        eval time =  1272.84 ms /    15 runs   (   84.86 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1878.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When he arrived at the building where she was, Wang Mole saw a woman in her early sixties with white hair and a slender frame. She was wearing glasses and carrying a big basket of food upstairs, making it difficult for her to move. Wang Mole guessed that she might be the person he was looking for. After he explained his reason for coming, she expressed a genuine sense of gratitude and showed him that gentle quality people with a lot of education tend to have.



llama_print_timings:        load time =   753.12 ms
llama_print_timings:      sample time =    47.36 ms /   106 runs   (    0.45 ms per token,  2238.41 tokens per second)
llama_print_timings: prompt eval time =   753.09 ms /   145 tokens (    5.19 ms per token,   192.54 tokens per second)
llama_print_timings:        eval time =  8853.27 ms /   105 runs   (   84.32 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  9820.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao entered the house with her, he was surprised to find that it wasn't as cold and desolate as he had expected—three children were playing together, none older than five or younger than walk-ing age. Yang mother told him that these were the neighbor's kids.



llama_print_timings:        load time =   657.06 ms
llama_print_timings:      sample time =    30.98 ms /    63 runs   (    0.49 ms per token,  2033.37 tokens per second)
llama_print_timings: prompt eval time =   657.01 ms /    91 tokens (    7.22 ms per token,   138.51 tokens per second)
llama_print_timings:        eval time =  5362.81 ms /    62 runs   (   86.50 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  6162.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They like to play here, today is Sunday, their parents have to work late, so they throw them to me … Oh, Nana, your picture painted? Yes, really good! Give a title, "The Ducks Under the Sun", well done, Coco, 6.9, Coco made. . . Do you want to eat any food at noon today, Yaoyao? Baked eggplant? Okay; Nana wants to eat something yesterday already eaten? Holland bean? Okay; You, Mimimi? Meatmeat? No, your mother said, not too much meatmeat is bad for digestion, try fishfish instead. See the big fishfish Coco bought back.



llama_print_timings:        load time =   787.21 ms
llama_print_timings:      sample time =    68.51 ms /   154 runs   (    0.44 ms per token,  2247.75 tokens per second)
llama_print_timings: prompt eval time =   787.17 ms /   168 tokens (    4.69 ms per token,   213.42 tokens per second)
llama_print_timings:        eval time = 13189.71 ms /   153 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time = 14287.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She must want a grandson or granddaughter, but will she want children even if Yang Dong is still alive? Seeing Yang mother and the children talking passionately about it, Wang Meng thought.



llama_print_timings:        load time =   603.12 ms
llama_print_timings:      sample time =    20.41 ms /    45 runs   (    0.45 ms per token,  2204.48 tokens per second)
llama_print_timings: prompt eval time =   603.10 ms /    64 tokens (    9.42 ms per token,   106.12 tokens per second)
llama_print_timings:        eval time =  3836.83 ms /    44 runs   (   87.20 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  4530.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mother Yang carried the basket into the kitchen and came out later, saying to Wang Meng, "Little Wang, I'm going to prepare the vegetables first. There are lots of pesticides residues on the vegetables these days, so at least it should be soaked for two hours or more before serving it to the children... You can go into Wendy's room and have a look."



llama_print_timings:        load time =   653.89 ms
llama_print_timings:      sample time =    39.49 ms /    87 runs   (    0.45 ms per token,  2202.81 tokens per second)
llama_print_timings: prompt eval time =   653.86 ms /    88 tokens (    7.43 ms per token,   134.58 tokens per second)
llama_print_timings:        eval time =  7342.41 ms /    86 runs   (   85.38 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  8172.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Yang’s last suggestion made Wang Meng tense and uneasy. She obviously realized Wang Meng’s true motive behind this journey. After saying it, she turned her back to the kitchen and did not look at Wang Meng at all. Therefore, she could not see Wang Meng embarrassed at that moment, who was almost moved by her almost perfect understanding of human nature.



llama_print_timings:        load time =   691.30 ms
llama_print_timings:      sample time =    41.63 ms /    84 runs   (    0.50 ms per token,  2017.82 tokens per second)
llama_print_timings: prompt eval time =   691.26 ms /   105 tokens (    6.58 ms per token,   151.90 tokens per second)
llama_print_timings:        eval time =  7196.84 ms /    83 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  8079.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He turned around and walked towards the room where Yang mother had pointed. As he entered the door, a strange feeling overcame him, like returning to his youth. Some memories, as fragile as dewdrops in the morning, rose from the depths of his mind and were both hurtful and rosy.



llama_print_timings:        load time =   670.65 ms
llama_print_timings:      sample time =    30.30 ms /    68 runs   (    0.45 ms per token,  2244.59 tokens per second)
llama_print_timings: prompt eval time =   670.61 ms /   104 tokens (    6.45 ms per token,   155.08 tokens per second)
llama_print_timings:        eval time =  5759.39 ms /    67 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  6567.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As Wang Miao gently opened the door, the faint scent of forest washed over him. He did not expect that; he had been expecting something more luxurious and modern, something more than a shelter built by trees and bamboo sticks.



llama_print_timings:        load time =   773.12 ms
llama_print_timings:      sample time =    27.53 ms /    56 runs   (    0.49 ms per token,  2034.44 tokens per second)
llama_print_timings: prompt eval time =   773.07 ms /   160 tokens (    4.83 ms per token,   206.97 tokens per second)
llama_print_timings:        eval time =  4709.51 ms /    55 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  5609.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He went to the writing desk and saw that there was nothing academic on it. Neither any female-related items had been taken away, nor had they ever existed here beforehand. He noticed a black-and-white photo of Yang Dong's mother and herself as young children that was framed in a wooden frame. They stood at the same height, and their hair had been blown by the strong wind to hang together like an interlocking design on the background, which was made up of a huge metal structure with jagged edges that extended beyond the camera's field of vision. From these observations, he suspected it was a paraboloid antenna or something similar.



llama_print_timings:        load time =   796.68 ms
llama_print_timings:      sample time =    64.86 ms /   144 runs   (    0.45 ms per token,  2220.34 tokens per second)
llama_print_timings: prompt eval time =   796.63 ms /   168 tokens (    4.74 ms per token,   210.89 tokens per second)
llama_print_timings:        eval time = 12297.95 ms /   143 runs   (   86.00 ms per token,    11.63 tokens per second)
llama_print_timings:       total time = 13389.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the photo, Xiao Yang Dong's big eyes shine with a kind of fear that makes Wang Mu feel trembled. It seems as though the outside world is causing her fear in this moment. Wang Mu notices two things about the photograph. The first is Xiao Yang Dong's large and bright eyes. The second is a thick leather book at the writing desk. At first, Wang Mu was confused by the materials of the book because on the cover is written: “Yang Dong's Birchbark Notebook”. He touched the book curiously but hesitated before retracting his hand.



llama_print_timings:        load time =   754.80 ms
llama_print_timings:      sample time =    64.56 ms /   133 runs   (    0.49 ms per token,  2060.04 tokens per second)
llama_print_timings: prompt eval time =   754.77 ms /   149 tokens (    5.07 ms per token,   197.41 tokens per second)
llama_print_timings:        eval time = 11446.11 ms /   132 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 12494.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang's mother said, "Look at that, it's a painting when young Winter."



llama_print_timings:        load time =   578.75 ms
llama_print_timings:      sample time =     9.74 ms /    22 runs   (    0.44 ms per token,  2259.19 tokens per second)
llama_print_timings: prompt eval time =   578.71 ms /    48 tokens (   12.06 ms per token,    82.94 tokens per second)
llama_print_timings:        eval time =  1818.22 ms /    21 runs   (   86.58 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  2441.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Mopu holds up a birch bamboo pad and turns the pages one by one. Each painting has dates written on it, just as he saw when he first entered the door. Wang Mopu also discovered something that confounded him more than any other thing: From the date of the painting, Yang Dong was already three years old, but at this age, a normal child is able to paint relatively distinct objects and people; however, Yang Dong's painting still consists of random lines which he can't understand. Wang Mopu senses out strong frustration and despair in it, which are not typical of a normal child of this age.



llama_print_timings:        load time =   802.81 ms
llama_print_timings:      sample time =    64.07 ms /   143 runs   (    0.45 ms per token,  2231.86 tokens per second)
llama_print_timings: prompt eval time =   802.74 ms /   170 tokens (    4.72 ms per token,   211.78 tokens per second)
llama_print_timings:        eval time = 12279.57 ms /   142 runs   (   86.48 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 13373.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$YangMu slowly sat down on the bed frame, staring blankly at Wang Mo's birch bark notebook. Her daughter Yunyun had passed away in here, leaving her a lifeless body to sleep with. Wang Mo sat down next to Yang Mu, and he never felt so strong an urge to share his pain with someone else. Yang Mu took the birch bark notebook from Wang Mo's hand and held it close to her breast. She quietly said: “I did not know how to educate Yunyun properly. When she first showed a great interest in those abstract theories, I told her that the world of men was difficult for women to enter into. She asked about Madame Curie. I told her that Madame Curie had never truly entered the world of men and that her success was due to hard work and perseverance rather than an inherent talent. But Madame Curie's success did not exceed Yunyun by much, so she did not enter as far into the world as Yunyun would have like to have done. Female thinking is different from male thought. This is not a matter of who is better 


llama_print_timings:        load time =  1070.77 ms
llama_print_timings:      sample time =   126.23 ms /   265 runs   (    0.48 ms per token,  2099.33 tokens per second)
llama_print_timings: prompt eval time =  1070.73 ms /   300 tokens (    3.57 ms per token,   280.18 tokens per second)
llama_print_timings:        eval time = 23644.62 ms /   264 runs   (   89.56 ms per token,    11.17 tokens per second)
llama_print_timings:       total time = 25299.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Winter didn't argue with me. Later, I really discovered something special about her, like if I told her a formula, other children would say “That formula is clever”. She then said the formula was beautiful and elegant, which looked as if she had seen a beautiful wild flower.”



llama_print_timings:        load time =   808.63 ms
llama_print_timings:      sample time =    27.50 ms /    61 runs   (    0.45 ms per token,  2218.02 tokens per second)
llama_print_timings: prompt eval time =   808.59 ms /   171 tokens (    4.73 ms per token,   211.48 tokens per second)
llama_print_timings:        eval time =  5177.26 ms /    60 runs   (   86.29 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  6109.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm so impressed with your daughter's education.



llama_print_timings:        load time =   584.53 ms
llama_print_timings:      sample time =     6.52 ms /    14 runs   (    0.47 ms per token,  2147.90 tokens per second)
llama_print_timings: prompt eval time =   584.50 ms /    44 tokens (   13.28 ms per token,    75.28 tokens per second)
llama_print_timings:        eval time =  1121.60 ms /    13 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  1736.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, it's a failure! Her world is too simple and only has those empty theories. Those things will collapse when they stop supporting her.”



llama_print_timings:        load time =   610.23 ms
llama_print_timings:      sample time =    14.55 ms /    32 runs   (    0.45 ms per token,  2198.86 tokens per second)
llama_print_timings: prompt eval time =   610.19 ms /    62 tokens (    9.84 ms per token,   101.61 tokens per second)
llama_print_timings:        eval time =  2607.57 ms /    31 runs   (   84.12 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  3284.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Mr. Ye, I don’t think that’s right. There have been some unimaginable events that have occurred and this is a far-reaching theoretical disaster with the scientists involved in it being more than just one person.”



llama_print_timings:        load time =   626.50 ms
llama_print_timings:      sample time =    23.96 ms /    53 runs   (    0.45 ms per token,  2212.02 tokens per second)
llama_print_timings: prompt eval time =   626.45 ms /    67 tokens (    9.35 ms per token,   106.95 tokens per second)
llama_print_timings:        eval time =  4409.78 ms /    52 runs   (   84.80 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  5143.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Only one woman, and a woman should be like water, flowing anywhere.



llama_print_timings:        load time =   593.98 ms
llama_print_timings:      sample time =     7.77 ms /    17 runs   (    0.46 ms per token,  2189.03 tokens per second)
llama_print_timings: prompt eval time =   593.94 ms /    51 tokens (   11.65 ms per token,    85.87 tokens per second)
llama_print_timings:        eval time =  1302.89 ms /    16 runs   (   81.43 ms per token,    12.28 tokens per second)
llama_print_timings:       total time =  1931.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

……



llama_print_timings:        load time =  1064.26 ms
llama_print_timings:      sample time =     1.38 ms /     3 runs   (    0.46 ms per token,  2181.82 tokens per second)
llama_print_timings: prompt eval time =  1064.22 ms /    31 tokens (   34.33 ms per token,    29.13 tokens per second)
llama_print_timings:        eval time =   167.86 ms /     2 runs   (   83.93 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  1237.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he was saying goodbye, Wang Miao remembered that the other reason for his visit to Yang's home. So he asked Yang's mother about their observations on cosmic microwave background radiation.



llama_print_timings:        load time =   611.92 ms
llama_print_timings:      sample time =    19.53 ms /    44 runs   (    0.44 ms per token,  2253.06 tokens per second)
llama_print_timings: prompt eval time =   611.88 ms /    59 tokens (   10.37 ms per token,    96.42 tokens per second)
llama_print_timings:        eval time =  3593.99 ms /    43 runs   (   83.58 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  4292.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Oh, there are two places in China that are doing it, one is at the Urumqi Observation Base of the Chinese Academy of Sciences Space Environmental Monitoring Center, and the other is not far from Beijing on a site of the University of Beijing's joint Institute for Theoretical Astrophysics with CAAS. The former is actual ground observations while the latter just receives data from satellites but the data is more accurate and complete. There is one of my students at that second place, I'll help you contact him.” Yang mother said, looking to find a phone number and then calling the student who is at the second site, it seems to go smoothly.



llama_print_timings:        load time =   747.41 ms
llama_print_timings:      sample time =    66.28 ms /   147 runs   (    0.45 ms per token,  2217.90 tokens per second)
llama_print_timings: prompt eval time =   747.37 ms /   142 tokens (    5.26 ms per token,   190.00 tokens per second)
llama_print_timings:        eval time = 12657.66 ms /   146 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 13707.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Okay, I will give you the address. He is called Sahar Shah and he works in night shift tomorrow … Do you have experience of it?” asked Yang mother on the phone.



llama_print_timings:        load time =   632.89 ms
llama_print_timings:      sample time =    18.28 ms /    41 runs   (    0.45 ms per token,  2243.13 tokens per second)
llama_print_timings: prompt eval time =   632.85 ms /    71 tokens (    8.91 ms per token,   112.19 tokens per second)
llama_print_timings:        eval time =  3439.30 ms /    40 runs   (   85.98 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  4154.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm in nano, and I'm doing it for other things. Wang Mu is afraid that Yang Muduan will ask further, but she doesn't.



llama_print_timings:        load time =   604.53 ms
llama_print_timings:      sample time =    16.46 ms /    37 runs   (    0.44 ms per token,  2248.15 tokens per second)
llama_print_timings: prompt eval time =   604.51 ms /    58 tokens (   10.42 ms per token,    95.95 tokens per second)
llama_print_timings:        eval time =  3064.07 ms /    36 runs   (   85.11 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  3743.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Little Wang, why are you looking so bad?” Yang mother asked with concern. “You seem to be very weak.”



llama_print_timings:        load time =   648.34 ms
llama_print_timings:      sample time =    11.96 ms /    27 runs   (    0.44 ms per token,  2257.90 tokens per second)
llama_print_timings: prompt eval time =   648.25 ms /    54 tokens (   12.00 ms per token,    83.30 tokens per second)
llama_print_timings:        eval time =  2192.84 ms /    26 runs   (   84.34 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  2895.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Just like that.



llama_print_timings:        load time =   578.33 ms
llama_print_timings:      sample time =     2.24 ms /     5 runs   (    0.45 ms per token,  2230.15 tokens per second)
llama_print_timings: prompt eval time =   578.25 ms /    44 tokens (   13.14 ms per token,    76.09 tokens per second)
llama_print_timings:        eval time =   328.81 ms /     4 runs   (   82.20 ms per token,    12.17 tokens per second)
llama_print_timings:       total time =   916.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Wait a minute,” said Yang’s mother, who had just pulled out an wooden box from the closet. “It was given to me by an old soldier in the Base a few days ago. He said he came to see me……But don't take it, artificial cultivation, not something valuable, I have high blood pressure and don't need it either. You can cut it into thin slices for drinking tea, after seeing your poor color, I think you are short of blood. Young people should take care of themselves.”



llama_print_timings:        load time =   702.44 ms
llama_print_timings:      sample time =    58.26 ms /   115 runs   (    0.51 ms per token,  1974.01 tokens per second)
llama_print_timings: prompt eval time =   702.40 ms /   125 tokens (    5.62 ms per token,   177.96 tokens per second)
llama_print_timings:        eval time =  9877.36 ms /   114 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 10847.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He felt a warmth in his heart, and his eyes welled up. His strained heart was like a soft cushion put on it, making it feel like soft cotton. “Ms. Ye. I will visit you often.” He accepted the wooden box from her.



llama_print_timings:        load time =   640.63 ms
llama_print_timings:      sample time =    26.84 ms /    60 runs   (    0.45 ms per token,  2235.47 tokens per second)
llama_print_timings: prompt eval time =   640.58 ms /    82 tokens (    7.81 ms per token,   128.01 tokens per second)
llama_print_timings:        eval time =  5046.22 ms /    59 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  5807.25 ms


translated 37.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Driving along Jingmifu Road, Wang Miao turned eastward to the town of Beilitang and then up the mountain highway. Later on he reached the National Astronomical Observatory Center of the Chinese Academy of Sciences. There was a row of twenty-eight spheres with a diameter of nine meters spread out in front of him like a line of stunning steel plants, and two towering 50 meters telescope antennas stood at the end of the row. These telescopes reminded Wang Miao of the backdrop for Yang Dong's photo with her daughters.



llama_print_timings:        load time =   794.01 ms
llama_print_timings:      sample time =    57.61 ms /   128 runs   (    0.45 ms per token,  2221.72 tokens per second)
llama_print_timings: prompt eval time =   793.98 ms /   163 tokens (    4.87 ms per token,   205.30 tokens per second)
llama_print_timings:        eval time = 11048.86 ms /   127 runs   (   87.00 ms per token,    11.49 tokens per second)
llama_print_timings:       total time = 12099.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lee Wenjie's students are not involved in the projects these radio telescopes, but Sha Rishan is mainly responsible for receiving the observation data from three satellites launched in November 1989 and about to be retired: the COBE microwave background detectors satellite; the Wilkinson Microwave Anisotropy Probe (WMAP) satellite, launched in 2003; and Planek, a proton-antiproton accelerator launched by the European Space Agency in 2007.



llama_print_timings:        load time =   701.23 ms
llama_print_timings:      sample time =    60.21 ms /   121 runs   (    0.50 ms per token,  2009.77 tokens per second)
llama_print_timings: prompt eval time =   701.18 ms /   128 tokens (    5.48 ms per token,   182.55 tokens per second)
llama_print_timings:        eval time = 10642.95 ms /   120 runs   (   88.69 ms per token,    11.28 tokens per second)
llama_print_timings:       total time = 11613.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sharshin's lab is not big. The main room is packed with satellite data receiving equipment, and three terminals display data from the three satellites at a time.



llama_print_timings:        load time =   705.78 ms
llama_print_timings:      sample time =    18.89 ms /    40 runs   (    0.47 ms per token,  2118.08 tokens per second)
llama_print_timings: prompt eval time =   705.75 ms /   126 tokens (    5.60 ms per token,   178.53 tokens per second)
llama_print_timings:        eval time =  3375.19 ms /    39 runs   (   86.54 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  4164.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shay Shan immediately showed the enthusiasm of a lonely person when he saw Wang Mu, asking him what kind of data he wanted to know.



llama_print_timings:        load time =   607.85 ms
llama_print_timings:      sample time =    15.09 ms /    33 runs   (    0.46 ms per token,  2186.88 tokens per second)
llama_print_timings: prompt eval time =   607.81 ms /    60 tokens (   10.13 ms per token,    98.72 tokens per second)
llama_print_timings:        eval time =  2753.89 ms /    32 runs   (   86.06 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3428.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I want to observe the overall fluctuations of cosmic background radiation.



llama_print_timings:        load time =   581.05 ms
llama_print_timings:      sample time =     7.53 ms /    17 runs   (    0.44 ms per token,  2258.54 tokens per second)
llama_print_timings: prompt eval time =   581.01 ms /    40 tokens (   14.53 ms per token,    68.85 tokens per second)
llama_print_timings:        eval time =  1427.43 ms /    16 runs   (   89.21 ms per token,    11.21 tokens per second)
llama_print_timings:       total time =  2042.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shayinmountain looked at Wangmiao with a strange look.



llama_print_timings:        load time =   597.19 ms
llama_print_timings:      sample time =     7.28 ms /    16 runs   (    0.46 ms per token,  2196.60 tokens per second)
llama_print_timings: prompt eval time =   597.15 ms /    54 tokens (   11.06 ms per token,    90.43 tokens per second)
llama_print_timings:        eval time =  1312.80 ms /    15 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1941.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yeah, the cosmic microwave background radiation is homogeneous on a large scale and its amplitude varies between 1% to 5%.



llama_print_timings:        load time =   614.37 ms
llama_print_timings:      sample time =    14.55 ms /    33 runs   (    0.44 ms per token,  2268.04 tokens per second)
llama_print_timings: prompt eval time =   614.32 ms /    61 tokens (   10.07 ms per token,    99.30 tokens per second)
llama_print_timings:        eval time =  2677.28 ms /    32 runs   (   83.66 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  3356.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sha Liangshan smiled, and since the beginning of this century, the Beijing Radio Observatory had opened to visitors. To make a bit more money, Sha laughed frequently when answering the questions of the tourists (he was used to their ridiculous level of scientific ignorance). "Mr. Wang, you ... are not in the field, right?"



llama_print_timings:        load time =   673.22 ms
llama_print_timings:      sample time =    35.24 ms /    78 runs   (    0.45 ms per token,  2213.52 tokens per second)
llama_print_timings: prompt eval time =   673.18 ms /   107 tokens (    6.29 ms per token,   158.95 tokens per second)
llama_print_timings:        eval time =  6559.78 ms /    77 runs   (   85.19 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  7393.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I study nanomaterials.



llama_print_timings:        load time =   581.21 ms
llama_print_timings:      sample time =     3.64 ms /     8 runs   (    0.45 ms per token,  2200.83 tokens per second)
llama_print_timings: prompt eval time =   581.17 ms /    36 tokens (   16.14 ms per token,    61.94 tokens per second)
llama_print_timings:        eval time =   572.11 ms /     7 runs   (   81.73 ms per token,    12.24 tokens per second)
llama_print_timings:       total time =  1169.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, that's right. But do you have any knowledge about the cosmic background radiation of Universe 3K?



llama_print_timings:        load time =   606.96 ms
llama_print_timings:      sample time =    12.39 ms /    27 runs   (    0.46 ms per token,  2179.00 tokens per second)
llama_print_timings: prompt eval time =   606.88 ms /    52 tokens (   11.67 ms per token,    85.68 tokens per second)
llama_print_timings:        eval time =  2271.59 ms /    26 runs   (   87.37 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2934.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know much about it. According to current cosmological theories, the universe was born about 14 billion years ago during a big bang. At that time, the temperature of the universe was extremely high, and then cooled down gradually, forming the ‘background radiation’ called cosmic microwave background. This residual background radiation can be observed in the millimeter wavelength region, which is believed to have been left over from the big bang nearly 14 billion years ago. In 1960, two Americans unintentionally found this cosmic microwave background while tuning a high-precision satellite receiver…



llama_print_timings:        load time =   745.60 ms
llama_print_timings:      sample time =    62.81 ms /   138 runs   (    0.46 ms per token,  2197.03 tokens per second)
llama_print_timings: prompt eval time =   745.50 ms /   127 tokens (    5.87 ms per token,   170.35 tokens per second)
llama_print_timings:        eval time = 11681.32 ms /   137 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time = 12711.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Enough, ” said Sharashan, waving his hand to interrupt Wang Mo. “You should know that the different parts of the universe are inversely proportional to their unevenities, and that the cosmic background radiation fluctuates slowly over time, reaching Planck's precision after a million years may not necessarily be able to detect such fluctuations. Do you understand what this means? It means that the entire universe is shaking like an old fluorescent light tube!”



llama_print_timings:        load time =   736.75 ms
llama_print_timings:      sample time =    48.37 ms /   108 runs   (    0.45 ms per token,  2232.88 tokens per second)
llama_print_timings: prompt eval time =   736.72 ms /   140 tokens (    5.26 ms per token,   190.03 tokens per second)
llama_print_timings:        eval time =  9419.07 ms /   107 runs   (   88.03 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 10372.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

And it was shining for me, Wang Mu said in his mind.



llama_print_timings:        load time =   580.31 ms
llama_print_timings:      sample time =     7.46 ms /    16 runs   (    0.47 ms per token,  2145.06 tokens per second)
llama_print_timings: prompt eval time =   580.26 ms /    42 tokens (   13.82 ms per token,    72.38 tokens per second)
llama_print_timings:        eval time =  1323.15 ms /    15 runs   (   88.21 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  1935.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is just a joke, Miss Ye,” said Sharry mountain shook his head.



llama_print_timings:        load time =   586.30 ms
llama_print_timings:      sample time =     9.37 ms /    21 runs   (    0.45 ms per token,  2240.48 tokens per second)
llama_print_timings: prompt eval time =   586.26 ms /    47 tokens (   12.47 ms per token,    80.17 tokens per second)
llama_print_timings:        eval time =  1738.06 ms /    20 runs   (   86.90 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  2365.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But I hope it's just a joke. Wang Moxi said, thinking that he would tell him the truth about how little Yue Wenjie knows but afraid of bringing him into the troubled waters, however this was actually what was in his heart.



llama_print_timings:        load time =   627.64 ms
llama_print_timings:      sample time =    32.00 ms /    57 runs   (    0.56 ms per token,  1781.47 tokens per second)
llama_print_timings: prompt eval time =   627.60 ms /    69 tokens (    9.10 ms per token,   109.94 tokens per second)
llama_print_timings:        eval time =  4906.47 ms /    56 runs   (   87.62 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  5675.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Since it was explained by Professor Ye, let’s observe it, after all, it doesn’t cost us much effort, only one percent of precision is required. The old-fashioned COBE is sufficient.” said Sha Ri Shan as he got busy on the terminal, and soon a straight green line appeared on the screen, “See, this is the current real-time numerical curve of the overall background radiation in the cosmos. Oh, it should be called a linear instead of curve, the numerical value is 2.726+0.010K, which is caused by the relativistic effect of galaxy motion. It has been filtered out already. If there is a fluctuation with a amplitude more than one percent, the line will turn red and reveal it.” “I bet it still stays as a straight green line until the end of the world. To see such a change even in time to observe the Sun’s destruction, we may have to wait for longer than observing the Sun’s extinction.”



llama_print_timings:        load time =   843.35 ms
llama_print_timings:      sample time =    98.86 ms /   221 runs   (    0.45 ms per token,  2235.60 tokens per second)
llama_print_timings: prompt eval time =   843.30 ms /   197 tokens (    4.28 ms per token,   233.61 tokens per second)
llama_print_timings:        eval time = 19301.23 ms /   220 runs   (   87.73 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 20594.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This will not affect your normal work, right?"



llama_print_timings:        load time =   585.46 ms
llama_print_timings:      sample time =     5.88 ms /    12 runs   (    0.49 ms per token,  2042.55 tokens per second)
llama_print_timings: prompt eval time =   585.44 ms /    39 tokens (   15.01 ms per token,    66.62 tokens per second)
llama_print_timings:        eval time =   943.40 ms /    11 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  1555.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course not. The precision required by the COBE observation data is already high enough. Well, if that great wave does appear, then the numerical values will automatically be recorded on the hard drive.



llama_print_timings:        load time =   626.00 ms
llama_print_timings:      sample time =    18.59 ms /    41 runs   (    0.45 ms per token,  2205.72 tokens per second)
llama_print_timings: prompt eval time =   625.96 ms /    69 tokens (    9.07 ms per token,   110.23 tokens per second)
llama_print_timings:        eval time =  3298.43 ms /    40 runs   (   82.46 ms per token,    12.13 tokens per second)
llama_print_timings:       total time =  4007.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Maybe it will be until midnight.



llama_print_timings:        load time =   577.90 ms
llama_print_timings:      sample time =     3.99 ms /     9 runs   (    0.44 ms per token,  2256.77 tokens per second)
llama_print_timings: prompt eval time =   577.86 ms /    37 tokens (   15.62 ms per token,    64.03 tokens per second)
llama_print_timings:        eval time =   668.90 ms /     8 runs   (   83.61 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  1264.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Wow, so precise? No problem, I'm just on the night shift. Did you eat anything? That's good, let me show you around.”



llama_print_timings:        load time =   605.42 ms
llama_print_timings:      sample time =    23.95 ms /    36 runs   (    0.67 ms per token,  1502.88 tokens per second)
llama_print_timings: prompt eval time =   605.38 ms /    58 tokens (   10.44 ms per token,    95.81 tokens per second)
llama_print_timings:        eval time =  3215.01 ms /    35 runs   (   91.86 ms per token,    10.89 tokens per second)
llama_print_timings:       total time =  3926.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This night has no moon. They walk along the long antenna array. Sharer Mountain points at the antenna and says, "Fascinating? Too bad it's for deaf ears only."



llama_print_timings:        load time =   617.10 ms
llama_print_timings:      sample time =    20.77 ms /    46 runs   (    0.45 ms per token,  2215.16 tokens per second)
llama_print_timings: prompt eval time =   617.05 ms /    65 tokens (    9.49 ms per token,   105.34 tokens per second)
llama_print_timings:        eval time =  3825.14 ms /    45 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  4534.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Why?



llama_print_timings:        load time =   595.04 ms
llama_print_timings:      sample time =     1.34 ms /     3 runs   (    0.45 ms per token,  2230.48 tokens per second)
llama_print_timings: prompt eval time =   595.00 ms /    33 tokens (   18.03 ms per token,    55.46 tokens per second)
llama_print_timings:        eval time =   165.61 ms /     2 runs   (   82.81 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =   766.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Since they were built, they have been interfering on the frequency band. At first it was the paging stations in the late 1980s and now there are a wild fire of mobile communication networks. Most of these projects like survey at microwave frequencies, study of radio source, supernova remnants research etc., which can't be carried on normally. I have asked to no avail. We will not play with the Chinese Mobile Communications Corporation (China Mobile), Unicom and Netease? Without money, we have nothing but to find ways to solve the problems caused by mobile communication.”



llama_print_timings:        load time =   767.18 ms
llama_print_timings:      sample time =    59.37 ms /   132 runs   (    0.45 ms per token,  2223.27 tokens per second)
llama_print_timings: prompt eval time =   767.14 ms /   155 tokens (    4.95 ms per token,   202.05 tokens per second)
llama_print_timings:        eval time = 11256.15 ms /   131 runs   (   85.92 ms per token,    11.64 tokens per second)
llama_print_timings:       total time = 12291.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Recently, many basic researches are running quite successfully, such as high energy physics. It should be better to build the observatory in a place far away from cities?”



llama_print_timings:        load time =   618.82 ms
llama_print_timings:      sample time =    16.98 ms /    37 runs   (    0.46 ms per token,  2179.03 tokens per second)
llama_print_timings: prompt eval time =   618.79 ms /    61 tokens (   10.14 ms per token,    98.58 tokens per second)
llama_print_timings:        eval time =  3080.24 ms /    36 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3775.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's still a money problem. For now, the only way is to block interference technically. Sigh, Professor Ye could have helped us if she was here.



llama_print_timings:        load time =   600.69 ms
llama_print_timings:      sample time =    16.61 ms /    37 runs   (    0.45 ms per token,  2227.71 tokens per second)
llama_print_timings: prompt eval time =   600.65 ms /    60 tokens (   10.01 ms per token,    99.89 tokens per second)
llama_print_timings:        eval time =  3102.76 ms /    36 runs   (   86.19 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  3776.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   694.30 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   694.26 ms /   116 tokens (    5.99 ms per token,   167.08 tokens per second)
llama_print_timings:        eval time =    92.88 ms /     1 runs   (   92.88 ms per token,    10.77 tokens per second)
llama_print_timings:       total time =   791.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I recently learned that she spent the last twenty years in Red Bank Base.



llama_print_timings:        load time =   587.94 ms
llama_print_timings:      sample time =     6.99 ms /    16 runs   (    0.44 ms per token,  2289.64 tokens per second)
llama_print_timings: prompt eval time =   587.90 ms /    46 tokens (   12.78 ms per token,    78.25 tokens per second)
llama_print_timings:        eval time =  1273.61 ms /    15 runs   (   84.91 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1892.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Red Bank?! Wang Mo stopped in his tracks, "Is it possible that those legends..."



llama_print_timings:        load time =   642.12 ms
llama_print_timings:      sample time =    10.01 ms /    22 runs   (    0.45 ms per token,  2198.90 tokens per second)
llama_print_timings: prompt eval time =   642.04 ms /    52 tokens (   12.35 ms per token,    80.99 tokens per second)
llama_print_timings:        eval time =  1825.66 ms /    21 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2512.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The majority of the things you say are true. One of the developers of Red Cap moved to Europe last year and wrote a book about it, most of which I understand is accurate. The creators of the program are still alive as far as I know.



llama_print_timings:        load time =   687.83 ms
llama_print_timings:      sample time =    34.05 ms /    53 runs   (    0.64 ms per token,  1556.67 tokens per second)
llama_print_timings: prompt eval time =   687.74 ms /    75 tokens (    9.17 ms per token,   109.05 tokens per second)
llama_print_timings:        eval time =  4715.86 ms /    52 runs   (   90.69 ms per token,    11.03 tokens per second)
llama_print_timings:       total time =  5558.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is really a legend!



llama_print_timings:        load time =   577.95 ms
llama_print_timings:      sample time =     3.08 ms /     7 runs   (    0.44 ms per token,  2271.99 tokens per second)
llama_print_timings: prompt eval time =   577.91 ms /    39 tokens (   14.82 ms per token,    67.48 tokens per second)
llama_print_timings:        eval time =   493.96 ms /     6 runs   (   82.33 ms per token,    12.15 tokens per second)
llama_print_timings:       total time =  1085.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

especially during that time, it was a legendary story.



llama_print_timings:        load time =   630.25 ms
llama_print_timings:      sample time =     6.16 ms /    14 runs   (    0.44 ms per token,  2271.25 tokens per second)
llama_print_timings: prompt eval time =   630.21 ms /    41 tokens (   15.37 ms per token,    65.06 tokens per second)
llama_print_timings:        eval time =  1136.42 ms /    13 runs   (   87.42 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  1793.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

……



llama_print_timings:        load time =  1092.94 ms
llama_print_timings:      sample time =     1.38 ms /     3 runs   (    0.46 ms per token,  2180.23 tokens per second)
llama_print_timings: prompt eval time =  1092.90 ms /    31 tokens (   35.25 ms per token,    28.36 tokens per second)
llama_print_timings:        eval time =   173.29 ms /     2 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1271.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He asked them about the purpose of this strange observation. Wang Miao avoided answering, and he didn't ask again either. Obviously, the dignity of a professional prevented him from being interested in such an irrational observation.



llama_print_timings:        load time =   631.26 ms
llama_print_timings:      sample time =    22.29 ms /    49 runs   (    0.45 ms per token,  2198.10 tokens per second)
llama_print_timings: prompt eval time =   631.21 ms /    80 tokens (    7.89 ms per token,   126.74 tokens per second)
llama_print_timings:        eval time =  4171.11 ms /    48 runs   (   86.90 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  4901.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They then went to a bar for tourists that was open 24 hours and sat there for over two hours, with Sha Rui Shan drinking beer after beer, becoming increasingly gregarious, while Wang Moxuan's mind was already flitting between thoughts. It wasn't until ten past midnight that he finally agreed to Wang Moxuan's repeated suggestions and got up to return to the laboratory.



llama_print_timings:        load time =   670.88 ms
llama_print_timings:      sample time =    42.87 ms /    95 runs   (    0.45 ms per token,  2216.26 tokens per second)
llama_print_timings: prompt eval time =   670.83 ms /   107 tokens (    6.27 ms per token,   159.50 tokens per second)
llama_print_timings:        eval time =  8175.64 ms /    94 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  9043.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this time, the spotlights shining on the radio telescope array had been turned off, and the antennas in the night sky became abstract black two-dimensional patterns resembling symbols of the same azimuth facing up to the cosmos. This image made Wang Miao uneasy; he thought of the "big bows" from "The Three Body Problem."



llama_print_timings:        load time =   694.16 ms
llama_print_timings:      sample time =    36.03 ms /    79 runs   (    0.46 ms per token,  2192.80 tokens per second)
llama_print_timings: prompt eval time =   694.13 ms /   111 tokens (    6.25 ms per token,   159.91 tokens per second)
llama_print_timings:        eval time =  6814.97 ms /    78 runs   (   87.37 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  7670.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When they returned to the lab, the wave just appeared. The straight line became a curve, and there were irregular spikes on the top of it with changing color. Just like a snake that had woken up from hibernation, its blood began circulating.



llama_print_timings:        load time =   652.43 ms
llama_print_timings:      sample time =    30.84 ms /    56 runs   (    0.55 ms per token,  1815.88 tokens per second)
llama_print_timings: prompt eval time =   652.39 ms /    85 tokens (    7.68 ms per token,   130.29 tokens per second)
llama_print_timings:        eval time =  4862.17 ms /    55 runs   (   88.40 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  5651.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It must be a malfunction of the COBE satellite!" said Sharer Mountain nervously.



llama_print_timings:        load time =   587.35 ms
llama_print_timings:      sample time =    10.20 ms /    22 runs   (    0.46 ms per token,  2156.02 tokens per second)
llama_print_timings: prompt eval time =   587.32 ms /    48 tokens (   12.24 ms per token,    81.73 tokens per second)
llama_print_timings:        eval time =  1802.71 ms /    21 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2436.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's not a fault," Wang Miao said calmly. In front of such things, he had learned to control himself.



llama_print_timings:        load time =   613.12 ms
llama_print_timings:      sample time =    13.05 ms /    29 runs   (    0.45 ms per token,  2222.90 tokens per second)
llama_print_timings: prompt eval time =   613.06 ms /    54 tokens (   11.35 ms per token,    88.08 tokens per second)
llama_print_timings:        eval time =  2410.39 ms /    28 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3081.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"We will know soon," Sharashen said, and quickly typed in the real-time data from two other satellites, WMAP and Planck. A couple of curves soon appeared on the screens –



llama_print_timings:        load time =   647.68 ms
llama_print_timings:      sample time =    20.20 ms /    45 runs   (    0.45 ms per token,  2228.05 tokens per second)
llama_print_timings: prompt eval time =   647.64 ms /    81 tokens (    8.00 ms per token,   125.07 tokens per second)
llama_print_timings:        eval time =  3797.63 ms /    44 runs   (   86.31 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  4535.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The three curves synchronously oscillate with each other and they are exactly the same.



llama_print_timings:        load time =   579.66 ms
llama_print_timings:      sample time =     8.28 ms /    18 runs   (    0.46 ms per token,  2173.39 tokens per second)
llama_print_timings: prompt eval time =   579.62 ms /    40 tokens (   14.49 ms per token,    69.01 tokens per second)
llama_print_timings:        eval time =  1460.97 ms /    17 runs   (   85.94 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2077.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shayi Shan, who had brought out a laptop computer, hurriedly restarted the system and plugged in the broadband cable. Then he picked up his phone, heard Sanya listening to it—he was talking with people at Urumqi's radio astronomy station—and waited for them to speak. Shayi didn't explain anything to Sanya but looked eagerly at the screen of his computer and a window with a coordinates chart appeared on it. A red curve joined three more curved waves, which were synchronized to each other in exact proportion.



llama_print_timings:        load time =   736.72 ms
llama_print_timings:      sample time =    54.78 ms /   120 runs   (    0.46 ms per token,  2190.74 tokens per second)
llama_print_timings: prompt eval time =   736.68 ms /   138 tokens (    5.34 ms per token,   187.33 tokens per second)
llama_print_timings:        eval time = 10281.70 ms /   119 runs   (   86.40 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11270.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, three satellites and a ground-based observatory have verified the same thing: the universe is flickering!



llama_print_timings:        load time =   592.39 ms
llama_print_timings:      sample time =    11.96 ms /    27 runs   (    0.44 ms per token,  2257.34 tokens per second)
llama_print_timings: prompt eval time =   592.33 ms /    48 tokens (   12.34 ms per token,    81.04 tokens per second)
llama_print_timings:        eval time =  2194.19 ms /    26 runs   (   84.39 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  2840.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Can you print the curve on the front?" asks Wang Miao.



llama_print_timings:        load time =   589.93 ms
llama_print_timings:      sample time =    13.11 ms /    16 runs   (    0.82 ms per token,  1220.35 tokens per second)
llama_print_timings: prompt eval time =   589.89 ms /    46 tokens (   12.82 ms per token,    77.98 tokens per second)
llama_print_timings:        eval time =  1419.64 ms /    15 runs   (   94.64 ms per token,    10.57 tokens per second)
llama_print_timings:       total time =  2069.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shay Shan wiped his head with a cold sweat, nodded and then clicked on the print program. Wang Mu hurriedly grabbed the laser printer spewing out its first piece of paper, tracing it with a pencil to compare the distances of wave crests.



llama_print_timings:        load time =   693.55 ms
llama_print_timings:      sample time =    51.74 ms /    63 runs   (    0.82 ms per token,  1217.56 tokens per second)
llama_print_timings: prompt eval time =   693.49 ms /    96 tokens (    7.22 ms per token,   138.43 tokens per second)
llama_print_timings:        eval time =  6049.27 ms /    62 runs   (   97.57 ms per token,    10.25 tokens per second)
llama_print_timings:       total time =  6976.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is 1108 times 21, which equals 2318. Then subtract 37 from this number to get 2281, then multiply by 1108 to get the final result of 2561946.



llama_print_timings:        load time =   650.09 ms
llama_print_timings:      sample time =    27.53 ms /    60 runs   (    0.46 ms per token,  2179.68 tokens per second)
llama_print_timings: prompt eval time =   650.05 ms /    92 tokens (    7.07 ms per token,   141.53 tokens per second)
llama_print_timings:        eval time =  4919.73 ms /    59 runs   (   83.39 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  5696.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is 1108 short, long, and medium length syllables. There are also 21 short syllables, 36 syllables of varying length, as well as shorter syllables with the words "short" and "long". This can be written 1108:21:36.



llama_print_timings:        load time =   650.36 ms
llama_print_timings:      sample time =    32.31 ms /    73 runs   (    0.44 ms per token,  2259.15 tokens per second)
llama_print_timings: prompt eval time =   650.34 ms /    92 tokens (    7.07 ms per token,   141.47 tokens per second)
llama_print_timings:        eval time =  6135.91 ms /    72 runs   (   85.22 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6933.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is 1108 times 21 plus 35, which equals 2524.



llama_print_timings:        load time =   654.14 ms
llama_print_timings:      sample time =    11.15 ms /    25 runs   (    0.45 ms per token,  2242.55 tokens per second)
llama_print_timings: prompt eval time =   654.11 ms /    92 tokens (    7.11 ms per token,   140.65 tokens per second)
llama_print_timings:        eval time =  2055.44 ms /    24 runs   (   85.64 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  2760.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

……



llama_print_timings:        load time =  1071.62 ms
llama_print_timings:      sample time =     1.31 ms /     3 runs   (    0.44 ms per token,  2283.11 tokens per second)
llama_print_timings: prompt eval time =  1071.58 ms /    31 tokens (   34.57 ms per token,    28.93 tokens per second)
llama_print_timings:        eval time =   175.18 ms /     2 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  1252.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The countdown continues on the cosmic scale, with 78 hours passed and 1108 more to go.



llama_print_timings:        load time =   585.04 ms
llama_print_timings:      sample time =    11.98 ms /    27 runs   (    0.44 ms per token,  2253.57 tokens per second)
llama_print_timings: prompt eval time =   585.00 ms /    51 tokens (   11.47 ms per token,    87.18 tokens per second)
llama_print_timings:        eval time =  2188.46 ms /    26 runs   (   84.17 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  2827.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shayimans烦躁地来回踱步，不时在WangMu的背后停下来看他正在输入的那些数字。 “你怎么可以隐瞒真相与我谈？！” 他愤怒地说道。



llama_print_timings:        load time =   637.62 ms
llama_print_timings:      sample time =    21.17 ms /    46 runs   (    0.46 ms per token,  2172.99 tokens per second)
llama_print_timings: prompt eval time =   637.58 ms /    74 tokens (    8.62 ms per token,   116.06 tokens per second)
llama_print_timings:        eval time =  3863.98 ms /    45 runs   (   85.87 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  4598.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Sand Doctor, believe me for now. Maybe the satellites and the ground observation point are both malfunctioning.”



llama_print_timings:        load time =   638.41 ms
llama_print_timings:      sample time =    12.44 ms /    27 runs   (    0.46 ms per token,  2171.29 tokens per second)
llama_print_timings: prompt eval time =   638.37 ms /    81 tokens (    7.88 ms per token,   126.89 tokens per second)
llama_print_timings:        eval time =  2246.57 ms /    26 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2940.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You know that's impossible!



llama_print_timings:        load time =   730.61 ms
llama_print_timings:      sample time =     8.24 ms /     8 runs   (    1.03 ms per token,   970.64 tokens per second)
llama_print_timings: prompt eval time =   730.57 ms /    35 tokens (   20.87 ms per token,    47.91 tokens per second)
llama_print_timings:        eval time =   695.11 ms /     7 runs   (   99.30 ms per token,    10.07 tokens per second)
llama_print_timings:       total time =  1463.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If someone intentionally destroys it?



llama_print_timings:        load time =   575.58 ms
llama_print_timings:      sample time =     4.52 ms /    10 runs   (    0.45 ms per token,  2210.92 tokens per second)
llama_print_timings: prompt eval time =   575.54 ms /    37 tokens (   15.56 ms per token,    64.29 tokens per second)
llama_print_timings:        eval time =   742.12 ms /     9 runs   (   82.46 ms per token,    12.13 tokens per second)
llama_print_timings:       total time =  1336.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's impossible! Changing the data of three satellites and a ground station at the same time? That would be supernatural.



llama_print_timings:        load time =   599.49 ms
llama_print_timings:      sample time =    13.55 ms /    30 runs   (    0.45 ms per token,  2213.70 tokens per second)
llama_print_timings: prompt eval time =   599.45 ms /    55 tokens (   10.90 ms per token,    91.75 tokens per second)
llama_print_timings:        eval time =  2550.26 ms /    29 runs   (   87.94 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  3214.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mop nodded, but he preferred the supernatural to the universe's glow. However, Sharshan took away his only lifesaving grain in an instant.



llama_print_timings:        load time =   618.72 ms
llama_print_timings:      sample time =    18.18 ms /    41 runs   (    0.44 ms per token,  2255.10 tokens per second)
llama_print_timings: prompt eval time =   618.68 ms /    66 tokens (    9.37 ms per token,   106.68 tokens per second)
llama_print_timings:        eval time =  3404.55 ms /    40 runs   (   85.11 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  4106.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If you want to prove this for good, it's actually very simple. The cosmic background radiation is so intense that we can see it with our own eyes.



llama_print_timings:        load time =   609.24 ms
llama_print_timings:      sample time =    15.96 ms /    36 runs   (    0.44 ms per token,  2255.92 tokens per second)
llama_print_timings: prompt eval time =   609.20 ms /    60 tokens (   10.15 ms per token,    98.49 tokens per second)
llama_print_timings:        eval time =  2999.87 ms /    35 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3680.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What are you talking about? You're violating common sense: The wavelength of background radiation is 7 cm, which is seven to eight orders of magnitude greater than visible light. How can it be seen?”



llama_print_timings:        load time =   610.41 ms
llama_print_timings:      sample time =    21.34 ms /    47 runs   (    0.45 ms per token,  2202.54 tokens per second)
llama_print_timings: prompt eval time =   610.37 ms /    63 tokens (    9.69 ms per token,   103.22 tokens per second)
llama_print_timings:        eval time =  3836.33 ms /    46 runs   (   83.40 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  4543.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“With 3K glasses.”



llama_print_timings:        load time =   576.38 ms
llama_print_timings:      sample time =     4.02 ms /     9 runs   (    0.45 ms per token,  2237.14 tokens per second)
llama_print_timings: prompt eval time =   576.35 ms /    36 tokens (   16.01 ms per token,    62.46 tokens per second)
llama_print_timings:        eval time =   709.30 ms /     8 runs   (   88.66 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  1303.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Three thousand dollar glasses?"



llama_print_timings:        load time =   594.27 ms
llama_print_timings:      sample time =     4.03 ms /     9 runs   (    0.45 ms per token,  2233.25 tokens per second)
llama_print_timings: prompt eval time =   594.25 ms /    35 tokens (   16.98 ms per token,    58.90 tokens per second)
llama_print_timings:        eval time =   659.25 ms /     8 runs   (   82.41 ms per token,    12.14 tokens per second)
llama_print_timings:       total time =  1271.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We made this educational gadget for the National Museum of Astronomy in Beijing. Nowadays, with modern technology, we can make a twenty-foot sound antenna, like that used by Pengzi and Wilson forty years ago to detect 3K background radiation, into a pair of glasses with a conversion system, compressing seven orders of magnitude to turn the 7cm wavelength received into red light. When you wear this pair of glasses at night, you will see the universe twinkling in red light.



llama_print_timings:        load time =   734.60 ms
llama_print_timings:      sample time =    49.93 ms /   113 runs   (    0.44 ms per token,  2263.17 tokens per second)
llama_print_timings: prompt eval time =   734.55 ms /   136 tokens (    5.40 ms per token,   185.15 tokens per second)
llama_print_timings:        eval time =  9714.92 ms /   112 runs   (   86.74 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 10676.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Where is this now?



llama_print_timings:        load time =   724.28 ms
llama_print_timings:      sample time =     3.81 ms /     6 runs   (    0.63 ms per token,  1576.04 tokens per second)
llama_print_timings: prompt eval time =   724.21 ms /    34 tokens (   21.30 ms per token,    46.95 tokens per second)
llama_print_timings:        eval time =   440.95 ms /     5 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  1182.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In the astronomical museum, there are twenty displays.”



llama_print_timings:        load time =   580.22 ms
llama_print_timings:      sample time =     6.15 ms /    13 runs   (    0.47 ms per token,  2112.79 tokens per second)
llama_print_timings: prompt eval time =   580.18 ms /    40 tokens (   14.50 ms per token,    68.94 tokens per second)
llama_print_timings:        eval time =  1026.92 ms /    12 runs   (   85.58 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1633.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have to get it before five o'clock.



llama_print_timings:        load time =   580.34 ms
llama_print_timings:      sample time =     5.33 ms /    12 runs   (    0.44 ms per token,  2252.25 tokens per second)
llama_print_timings: prompt eval time =   580.32 ms /    40 tokens (   14.51 ms per token,    68.93 tokens per second)
llama_print_timings:        eval time =   918.41 ms /    11 runs   (   83.49 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  1521.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shayin picked up the phone and called a number, but it took quite a long time for the person to answer. Shayin persisted in convincing the person to wait for Wankou at the Observatory an hour later.



llama_print_timings:        load time =   618.65 ms
llama_print_timings:      sample time =    22.13 ms /    50 runs   (    0.44 ms per token,  2259.38 tokens per second)
llama_print_timings: prompt eval time =   618.61 ms /    71 tokens (    8.71 ms per token,   114.77 tokens per second)
llama_print_timings:        eval time =  4114.19 ms /    49 runs   (   83.96 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  4833.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shay Shan said goodbye to me before he left. "I don't need any more proof than I have just seen, " he said. "If there is something significant that results from this, I won't forget you."



llama_print_timings:        load time =   633.61 ms
llama_print_timings:      sample time =    23.14 ms /    51 runs   (    0.45 ms per token,  2203.69 tokens per second)
llama_print_timings: prompt eval time =   633.57 ms /    77 tokens (    8.23 ms per token,   121.53 tokens per second)
llama_print_timings:        eval time =  4252.97 ms /    50 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  4992.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It will stop at 5 am and don’t investigate it further. Trust me, there won’t be any results.” Wang Miao holds the door.



llama_print_timings:        load time =   609.31 ms
llama_print_timings:      sample time =    15.66 ms /    35 runs   (    0.45 ms per token,  2234.42 tokens per second)
llama_print_timings: prompt eval time =   609.28 ms /    64 tokens (    9.52 ms per token,   105.04 tokens per second)
llama_print_timings:        eval time =  2885.24 ms /    34 runs   (   84.86 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3565.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shay Shan looked at Wang Mu for a long time, and nodded with a point: "I understand, there are something happening in the scientific community ……"



llama_print_timings:        load time =   604.03 ms
llama_print_timings:      sample time =    16.27 ms /    36 runs   (    0.45 ms per token,  2213.07 tokens per second)
llama_print_timings: prompt eval time =   603.98 ms /    58 tokens (   10.41 ms per token,    96.03 tokens per second)
llama_print_timings:        eval time =  2932.24 ms /    35 runs   (   83.78 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  3609.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes.” Wang Miao said, getting into the car and not wanting to go on talking about it.



llama_print_timings:        load time =   591.35 ms
llama_print_timings:      sample time =    10.18 ms /    23 runs   (    0.44 ms per token,  2260.00 tokens per second)
llama_print_timings: prompt eval time =   591.30 ms /    54 tokens (   10.95 ms per token,    91.32 tokens per second)
llama_print_timings:        eval time =  1928.45 ms /    22 runs   (   87.66 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  2565.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Is it our turn?”



llama_print_timings:        load time =   591.34 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2257.34 tokens per second)
llama_print_timings: prompt eval time =   591.30 ms /    36 tokens (   16.42 ms per token,    60.88 tokens per second)
llama_print_timings:        eval time =   498.79 ms /     6 runs   (   83.13 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =  1104.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"At least it's my turn," said Wang Miao as she started the car.



llama_print_timings:        load time =   581.02 ms
llama_print_timings:      sample time =     8.83 ms /    20 runs   (    0.44 ms per token,  2265.78 tokens per second)
llama_print_timings: prompt eval time =   581.00 ms /    47 tokens (   12.36 ms per token,    80.90 tokens per second)
llama_print_timings:        eval time =  1599.10 ms /    19 runs   (   84.16 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  2219.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao arrived in the city an hour later, he got out of his car in front of the new astronomical museum. The lights of the city at night shone through this enormous glass building and created a faint impression of its internal structure. Wang Miao now realized that if the architect of the new astronomical museum wanted to convey a feeling of the universe, then he succeeded—more transparent things are more mysterious; the universe itself is translucent and as far as your eyes can see, you can go further and look at how far you want, but it gets more mysterious.



llama_print_timings:        load time =   737.43 ms
llama_print_timings:      sample time =    55.96 ms /   124 runs   (    0.45 ms per token,  2215.83 tokens per second)
llama_print_timings: prompt eval time =   737.39 ms /   131 tokens (    5.63 ms per token,   177.65 tokens per second)
llama_print_timings:        eval time = 10591.79 ms /   123 runs   (   86.11 ms per token,    11.61 tokens per second)
llama_print_timings:       total time = 11583.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The groggy astronomer staff member handed a hand-carrying case to Wang Mo, “There are five 3K glasses in it. The left button is on and the right button adjusts light intensity. There are also more than ten pairs underneath. Please use them at your leisure, I’m going to go take a nap. This Shah Daobo, he’s really crazy.” He then walked back into the dark building.



llama_print_timings:        load time =   734.71 ms
llama_print_timings:      sample time =    44.28 ms /    99 runs   (    0.45 ms per token,  2235.72 tokens per second)
llama_print_timings: prompt eval time =   734.66 ms /   135 tokens (    5.44 ms per token,   183.76 tokens per second)
llama_print_timings:        eval time =  8451.70 ms /    98 runs   (   86.24 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  9384.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Miao put the box on the car seat, took out a pair of 3K glasses which looked like the helmet displays in his v-suit. He picked up the glasses and donned them, seeing that the nightscape didn't change much through the lenses but only dimmed down, then he remembered to switch on the switch. Immediately, the city was transformed into a flickering halo of light, most of its brightness remaining unchanged while some of it was moving or blinking. He knew that these were converted microwave radiation in visible spectrum and could only see a faint glow with no shape discernible due to the wavelengths.



llama_print_timings:        load time =   764.08 ms
llama_print_timings:      sample time =    66.28 ms /   149 runs   (    0.44 ms per token,  2247.90 tokens per second)
llama_print_timings: prompt eval time =   764.03 ms /   149 tokens (    5.13 ms per token,   195.02 tokens per second)
llama_print_timings:        eval time = 12952.97 ms /   148 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 14019.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He lifted his head and saw a sky emitting a dim red light. This was the cosmic background radiation, which originated from 13.8 billion years ago, the legacy of the Big Bang. Stars, by default, should have been black dots in the visible light spectrum but diffracted rays from the millimeter waves of the CMB overwhelmed all shapes and details.



llama_print_timings:        load time =   683.61 ms
llama_print_timings:      sample time =    36.84 ms /    83 runs   (    0.44 ms per token,  2253.17 tokens per second)
llama_print_timings: prompt eval time =   683.58 ms /   107 tokens (    6.39 ms per token,   156.53 tokens per second)
llama_print_timings:        eval time =  6979.08 ms /    82 runs   (   85.11 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  7830.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao's eyes adjusted to this, he saw that the red background of the sky was flickering slightly, and the entire space was synchronizing in a flashing blink. It seemed like the entire universe was just an isolated lamp in the wind.



llama_print_timings:        load time =   620.58 ms
llama_print_timings:      sample time =    25.75 ms /    57 runs   (    0.45 ms per token,  2213.25 tokens per second)
llama_print_timings: prompt eval time =   620.54 ms /    72 tokens (    8.62 ms per token,   116.03 tokens per second)
llama_print_timings:        eval time =  4819.39 ms /    56 runs   (   86.06 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  5557.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Standing under the twinkling sky, Xun suddenly felt that the universe was so small, so small as to confine him within it. The universe was like a tiny heart or womb, with a red glow radiating throughout its interior. He floated in the bloodlight and its irregular pulse seemed to mimic the irregular beating of a human heart. From within this, he felt something bizarre and perverse that he could never understand, despite his intelligence.



llama_print_timings:        load time =   731.04 ms
llama_print_timings:      sample time =    46.19 ms /   103 runs   (    0.45 ms per token,  2230.11 tokens per second)
llama_print_timings: prompt eval time =   731.00 ms /   130 tokens (    5.62 ms per token,   177.84 tokens per second)
llama_print_timings:        eval time =  8794.17 ms /   102 runs   (   86.22 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  9735.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

汪森 was sitting on the ground, weakly holding his 3K glasses. He saw the city in visible light again. But his eyes roamed over the night - something else had caught his attention. In front of the zoo gate, there was a broken strip of neon light, flickering irregularly; near it, some leaves were shaking on a tree, reflecting street lights and flickering irregularly; far in the distance, the five stars on the Russian-style dome of the Beijing Exhibition Hall were reflecting car headlights from different streets and flickering irrationally... Wang Sen was trying to decode these flickers with morse code. He even felt that the creases in some banners floating in the wind, or ripples on the surface of a puddle, were also sending morse codes.



llama_print_timings:        load time =   901.69 ms
llama_print_timings:      sample time =    83.34 ms /   186 runs   (    0.45 ms per token,  2231.85 tokens per second)
llama_print_timings: prompt eval time =   901.64 ms /   231 tokens (    3.90 ms per token,   256.20 tokens per second)
llama_print_timings:        eval time = 16332.86 ms /   185 runs   (   88.29 ms per token,    11.33 tokens per second)
llama_print_timings:       total time = 17614.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He didn't know how long it had been since the astronomer came out of the planetarium, but when he saw him, his sleepiness vanished instantly. After collecting his 3K glasses box, he looked at Wang Miao for a few seconds and walked back with a brisk step.



llama_print_timings:        load time =   668.80 ms
llama_print_timings:      sample time =    29.25 ms /    65 runs   (    0.45 ms per token,  2222.30 tokens per second)
llama_print_timings: prompt eval time =   668.77 ms /    95 tokens (    7.04 ms per token,   142.05 tokens per second)
llama_print_timings:        eval time =  5452.91 ms /    64 runs   (   85.20 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  6257.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Sen took out his phone and called Shen Yufei. She answered quickly, probably having also had a sleepless night.



llama_print_timings:        load time =   603.62 ms
llama_print_timings:      sample time =    13.71 ms /    31 runs   (    0.44 ms per token,  2261.78 tokens per second)
llama_print_timings: prompt eval time =   603.58 ms /    57 tokens (   10.59 ms per token,    94.44 tokens per second)
llama_print_timings:        eval time =  2605.69 ms /    30 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  3270.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What is the end of a countdown?” Wang Mu asked helplessly.



llama_print_timings:        load time =   580.70 ms
llama_print_timings:      sample time =     8.33 ms /    19 runs   (    0.44 ms per token,  2281.19 tokens per second)
llama_print_timings: prompt eval time =   580.66 ms /    46 tokens (   12.62 ms per token,    79.22 tokens per second)
llama_print_timings:        eval time =  1629.26 ms /    18 runs   (   90.51 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =  2247.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't know.” said the woman after saying these three words. The phone then hung up.



llama_print_timings:        load time =   583.84 ms
llama_print_timings:      sample time =    10.26 ms /    23 runs   (    0.45 ms per token,  2241.72 tokens per second)
llama_print_timings: prompt eval time =   583.79 ms /    47 tokens (   12.42 ms per token,    80.51 tokens per second)
llama_print_timings:        eval time =  1818.27 ms /    22 runs   (   82.65 ms per token,    12.10 tokens per second)
llama_print_timings:       total time =  2448.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Whatever it is, maybe your own death like Yang Dong; maybe a big disaster like the tsunami in 2004; maybe global destruction. In this weird universe, this is all very relief for anyone…



llama_print_timings:        load time =   917.37 ms
llama_print_timings:      sample time =    22.29 ms /    49 runs   (    0.45 ms per token,  2198.59 tokens per second)
llama_print_timings: prompt eval time =   917.27 ms /   162 tokens (    5.66 ms per token,   176.61 tokens per second)
llama_print_timings:        eval time =  4156.16 ms /    48 runs   (   86.59 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  5176.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He drifted into his car and drove away from the observatory, wandering aimlessly in the city. It was still early in the morning when he stopped the car on a side street. Getting out to begin walking, he once again had no destination in mind. His consciousness was a blanket of emptiness, only the ticking clock was visible against the dark red background radiation, pulsating and making him feel like an innocuous clock that hadn't any purpose but simply ticking away until it stopped. As dawn arrived and he felt tired, he sat down on a long bench. Looking up at his destination, he shivered with shock.



llama_print_timings:        load time =   852.54 ms
llama_print_timings:      sample time =    71.47 ms /   139 runs   (    0.51 ms per token,  1944.82 tokens per second)
llama_print_timings: prompt eval time =   852.44 ms /   181 tokens (    4.71 ms per token,   212.33 tokens per second)
llama_print_timings:        eval time = 12287.77 ms /   138 runs   (   89.04 ms per token,    11.23 tokens per second)
llama_print_timings:       total time = 13466.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He is sitting in front of the Wangfujing Catholic Church. In the dim gray sky, the pointed tower of the church resembles three black fingers pointing at what lay hidden somewhere deep in the void of space.



llama_print_timings:        load time =   631.21 ms
llama_print_timings:      sample time =    20.47 ms /    46 runs   (    0.44 ms per token,  2247.30 tokens per second)
llama_print_timings: prompt eval time =   631.18 ms /    72 tokens (    8.77 ms per token,   114.07 tokens per second)
llama_print_timings:        eval time =  3872.97 ms /    45 runs   (   86.07 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  4597.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He rose to leave, but a chorus of sanctified music from the church stopped him. It was probably a rehearsal for Easter mass as they sang "Lux Aeterna," a melody usually played during the Holy Mass. In the solemn and majestic music, Mr. Wang felt his universe shrunk again, becoming a small ant crawling on the tiles of an empty church that vanished in a background glittering with red light. He felt his fragile heart being caressed by an unseen giant hand, and for a moment he experienced the helplessness of childhood as something soft like wax melted and collapsed. He covered his face and cried.



llama_print_timings:        load time =   845.12 ms
llama_print_timings:      sample time =    66.76 ms /   148 runs   (    0.45 ms per token,  2217.00 tokens per second)
llama_print_timings: prompt eval time =   845.07 ms /   195 tokens (    4.33 ms per token,   230.75 tokens per second)
llama_print_timings:        eval time = 12958.26 ms /   147 runs   (   88.15 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 14108.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Haha, another one down!” Wang Miao’s cry was interrupted by a wave of laughter. He turned around and saw that big Shi stood there with smoke spitting out from his mouth.



llama_print_timings:        load time =   621.51 ms
llama_print_timings:      sample time =    20.25 ms /    44 runs   (    0.46 ms per token,  2172.41 tokens per second)
llama_print_timings: prompt eval time =   621.47 ms /    70 tokens (    8.88 ms per token,   112.64 tokens per second)
llama_print_timings:        eval time =  3598.22 ms /    43 runs   (   83.68 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  4312.92 ms


translated 86.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

$$大史 sat down next to Wang Miao and handed him a car key. "Just find a parking spot near Dongshan Road, and I'll just get dragged away by the police if I arrive late."



llama_print_timings:        load time =   619.12 ms
llama_print_timings:      sample time =    22.30 ms /    49 runs   (    0.46 ms per token,  2197.21 tokens per second)
llama_print_timings: prompt eval time =   619.10 ms /    67 tokens (    9.24 ms per token,   108.22 tokens per second)
llama_print_timings:        eval time =  4154.44 ms /    48 runs   (   86.55 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  4871.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You know, I always follow you. At least it makes me feel better.



llama_print_timings:        load time =   807.32 ms
llama_print_timings:      sample time =    10.64 ms /    17 runs   (    0.63 ms per token,  1596.99 tokens per second)
llama_print_timings: prompt eval time =   807.23 ms /    88 tokens (    9.17 ms per token,   109.02 tokens per second)
llama_print_timings:        eval time =  1420.70 ms /    16 runs   (   88.79 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  2275.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How are you, brother? I said you couldn't handle it. You still have six fingers to spare.”



llama_print_timings:        load time =   750.54 ms
llama_print_timings:      sample time =    15.39 ms /    25 runs   (    0.62 ms per token,  1624.75 tokens per second)
llama_print_timings: prompt eval time =   750.47 ms /    53 tokens (   14.16 ms per token,    70.62 tokens per second)
llama_print_timings:        eval time =  2119.22 ms /    24 runs   (   88.30 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  2938.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You don’t understand, ” said Wang Meng as he took a few deep draws on his cigarette.



llama_print_timings:        load time =   734.08 ms
llama_print_timings:      sample time =    15.28 ms /    27 runs   (    0.57 ms per token,  1766.55 tokens per second)
llama_print_timings: prompt eval time =   733.99 ms /    47 tokens (   15.62 ms per token,    64.03 tokens per second)
llama_print_timings:        eval time =  2345.40 ms /    26 runs   (   90.21 ms per token,    11.09 tokens per second)
llama_print_timings:       total time =  3147.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I see. Alright, let's go eat.



llama_print_timings:        load time =   727.23 ms
llama_print_timings:      sample time =     9.43 ms /    13 runs   (    0.73 ms per token,  1378.58 tokens per second)
llama_print_timings: prompt eval time =   727.17 ms /    43 tokens (   16.91 ms per token,    59.13 tokens per second)
llama_print_timings:        eval time =  1110.66 ms /    12 runs   (   92.56 ms per token,    10.80 tokens per second)
llama_print_timings:       total time =  1879.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't want to eat.



llama_print_timings:        load time =   719.36 ms
llama_print_timings:      sample time =     5.11 ms /     9 runs   (    0.57 ms per token,  1762.98 tokens per second)
llama_print_timings: prompt eval time =   719.27 ms /    34 tokens (   21.16 ms per token,    47.27 tokens per second)
llama_print_timings:        eval time =   743.85 ms /     8 runs   (   92.98 ms per token,    10.75 tokens per second)
llama_print_timings:       total time =  1485.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Let's go drinking! I will treat you!



llama_print_timings:        load time =   723.27 ms
llama_print_timings:      sample time =     7.05 ms /    13 runs   (    0.54 ms per token,  1844.76 tokens per second)
llama_print_timings: prompt eval time =   723.19 ms /    38 tokens (   19.03 ms per token,    52.55 tokens per second)
llama_print_timings:        eval time =  1085.90 ms /    12 runs   (   90.49 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =  1842.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao got into the car driven by Dash and they went to a small restaurant nearby. It was still early in the day, so there were not many people in the restaurant at this time.



llama_print_timings:        load time =   641.48 ms
llama_print_timings:      sample time =    19.13 ms /    43 runs   (    0.44 ms per token,  2247.43 tokens per second)
llama_print_timings: prompt eval time =   641.40 ms /    55 tokens (   11.66 ms per token,    85.75 tokens per second)
llama_print_timings:        eval time =  3505.69 ms /    42 runs   (   83.47 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  4231.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Two pork intestines, two bottles of Erguotou! " Dashan yelled, and didn't even look up. He was clearly familiar with this place.



llama_print_timings:        load time =   766.49 ms
llama_print_timings:      sample time =    22.13 ms /    41 runs   (    0.54 ms per token,  1853.02 tokens per second)
llama_print_timings: prompt eval time =   766.40 ms /    57 tokens (   13.45 ms per token,    74.37 tokens per second)
llama_print_timings:        eval time =  3503.62 ms /    40 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  4367.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$He saw the two large plates of black and foul-smelling food and felt his stomach turn over. He was about to vomit when Laoshi insisted on bringing him some milk and pancakes. He forced himself to eat a bit, and then he drank cup after cup with Laoshi. He felt light and loquacious, telling all that happened in these three days, though he knew Laoshi probably knew even more than him.



llama_print_timings:        load time =   850.81 ms
llama_print_timings:      sample time =    45.07 ms /    97 runs   (    0.46 ms per token,  2152.16 tokens per second)
llama_print_timings: prompt eval time =   850.72 ms /   124 tokens (    6.86 ms per token,   145.76 tokens per second)
llama_print_timings:        eval time =  8195.32 ms /    96 runs   (   85.37 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  9246.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You mean, the universe is winking at you?" Da Shi asked with a full plate of pork dumplings in his mouth.



llama_print_timings:        load time =   613.78 ms
llama_print_timings:      sample time =    14.37 ms /    32 runs   (    0.45 ms per token,  2227.02 tokens per second)
llama_print_timings: prompt eval time =   613.74 ms /    58 tokens (   10.58 ms per token,    94.50 tokens per second)
llama_print_timings:        eval time =  2606.48 ms /    31 runs   (   84.08 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  3284.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This metaphor is very appropriate.



llama_print_timings:        load time =   577.97 ms
llama_print_timings:      sample time =     4.05 ms /     9 runs   (    0.45 ms per token,  2221.13 tokens per second)
llama_print_timings: prompt eval time =   577.93 ms /    36 tokens (   16.05 ms per token,    62.29 tokens per second)
llama_print_timings:        eval time =   674.47 ms /     8 runs   (   84.31 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1269.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Nonsense.”



llama_print_timings:        load time =   568.08 ms
llama_print_timings:      sample time =     2.66 ms /     6 runs   (    0.44 ms per token,  2253.94 tokens per second)
llama_print_timings: prompt eval time =   568.04 ms /    33 tokens (   17.21 ms per token,    58.09 tokens per second)
llama_print_timings:        eval time =   441.84 ms /     5 runs   (   88.37 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  1021.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Your courage comes from your ignorance.



llama_print_timings:        load time =   569.20 ms
llama_print_timings:      sample time =     3.94 ms /     9 runs   (    0.44 ms per token,  2284.84 tokens per second)
llama_print_timings: prompt eval time =   569.16 ms /    37 tokens (   15.38 ms per token,    65.01 tokens per second)
llama_print_timings:        eval time =   708.50 ms /     8 runs   (   88.56 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  1295.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Still nonsense, come on, let's do it!



llama_print_timings:        load time =   576.19 ms
llama_print_timings:      sample time =     7.04 ms /    16 runs   (    0.44 ms per token,  2271.44 tokens per second)
llama_print_timings: prompt eval time =   576.17 ms /    38 tokens (   15.16 ms per token,    65.95 tokens per second)
llama_print_timings:        eval time =  1239.25 ms /    15 runs   (   82.62 ms per token,    12.10 tokens per second)
llama_print_timings:       total time =  1847.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After drinking this cup, Wang Mogan felt that the world was spinning around her. Only big history seemed to be very stable as he said, "Big history, have you considered some ultimate philosophical questions? Oh, for example, where did humans come from and are going to go; what about the universe?"



llama_print_timings:        load time =   664.34 ms
llama_print_timings:      sample time =    30.35 ms /    68 runs   (    0.45 ms per token,  2240.53 tokens per second)
llama_print_timings: prompt eval time =   664.30 ms /    99 tokens (    6.71 ms per token,   149.03 tokens per second)
llama_print_timings:        eval time =  5772.69 ms /    67 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  6573.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No.”



llama_print_timings:        load time =   569.26 ms
llama_print_timings:      sample time =     1.76 ms /     4 runs   (    0.44 ms per token,  2275.31 tokens per second)
llama_print_timings: prompt eval time =   569.23 ms /    33 tokens (   17.25 ms per token,    57.97 tokens per second)
llama_print_timings:        eval time =   263.21 ms /     3 runs   (   87.74 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =   839.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Never?



llama_print_timings:        load time =   576.41 ms
llama_print_timings:      sample time =     1.75 ms /     4 runs   (    0.44 ms per token,  2284.41 tokens per second)
llama_print_timings: prompt eval time =   576.39 ms /    35 tokens (   16.47 ms per token,    60.72 tokens per second)
llama_print_timings:        eval time =   245.76 ms /     3 runs   (   81.92 ms per token,    12.21 tokens per second)
llama_print_timings:       total time =   830.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Never.”



llama_print_timings:        load time =   578.12 ms
llama_print_timings:      sample time =     2.21 ms /     5 runs   (    0.44 ms per token,  2260.40 tokens per second)
llama_print_timings: prompt eval time =   578.10 ms /    35 tokens (   16.52 ms per token,    60.54 tokens per second)
llama_print_timings:        eval time =   338.92 ms /     4 runs   (   84.73 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =   926.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have ever seen the stars, haven't you? Didn't it make you feel awe and wonder?



llama_print_timings:        load time =   578.28 ms
llama_print_timings:      sample time =    11.67 ms /    26 runs   (    0.45 ms per token,  2228.32 tokens per second)
llama_print_timings: prompt eval time =   578.24 ms /    48 tokens (   12.05 ms per token,    83.01 tokens per second)
llama_print_timings:        eval time =  2117.73 ms /    25 runs   (   84.71 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2747.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I never watch the sky at night.



llama_print_timings:        load time =   581.09 ms
llama_print_timings:      sample time =     4.37 ms /     9 runs   (    0.49 ms per token,  2061.38 tokens per second)
llama_print_timings: prompt eval time =   581.05 ms /    38 tokens (   15.29 ms per token,    65.40 tokens per second)
llama_print_timings:        eval time =   694.74 ms /     8 runs   (   86.84 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  1294.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Why would you say that? Aren't you always on night shift?



llama_print_timings:        load time =   599.94 ms
llama_print_timings:      sample time =     7.53 ms /    17 runs   (    0.44 ms per token,  2256.74 tokens per second)
llama_print_timings: prompt eval time =   599.90 ms /    42 tokens (   14.28 ms per token,    70.01 tokens per second)
llama_print_timings:        eval time =  1371.72 ms /    16 runs   (   85.73 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  2005.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Brother, if I look up at the sky while patrolling at night, what will happen to the surveillance object?”



llama_print_timings:        load time =   599.75 ms
llama_print_timings:      sample time =    12.73 ms /    28 runs   (    0.45 ms per token,  2199.36 tokens per second)
llama_print_timings: prompt eval time =   599.71 ms /    52 tokens (   11.53 ms per token,    86.71 tokens per second)
llama_print_timings:        eval time =  2293.24 ms /    27 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2949.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We don't have anything to talk about, let's just go!



llama_print_timings:        load time =   579.04 ms
llama_print_timings:      sample time =     7.61 ms /    17 runs   (    0.45 ms per token,  2233.02 tokens per second)
llama_print_timings: prompt eval time =   579.00 ms /    39 tokens (   14.85 ms per token,    67.36 tokens per second)
llama_print_timings:        eval time =  1351.19 ms /    16 runs   (   84.45 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1964.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Well, I don't think about ultimate philosophy when looking at the stars in the sky. There are so many things to worry about - housing, tuition for my children, and an endless list of lawsuits... I am a direct-thinking person who can see your mouth all the way down to your ass. So it is no surprise that I am not very popular with the bosses, having worked as an army officer for many years and still retaining this bearish appearance. These thoughts are not enough. I also have time to think about stars and philosophy!”



llama_print_timings:        load time =   732.02 ms
llama_print_timings:      sample time =    54.91 ms /   122 runs   (    0.45 ms per token,  2221.62 tokens per second)
llama_print_timings: prompt eval time =   731.98 ms /   133 tokens (    5.50 ms per token,   181.70 tokens per second)
llama_print_timings:        eval time = 10466.54 ms /   121 runs   (   86.50 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 11447.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's okay, let's get to work!



llama_print_timings:        load time =   580.61 ms
llama_print_timings:      sample time =     6.03 ms /    13 runs   (    0.46 ms per token,  2156.24 tokens per second)
llama_print_timings: prompt eval time =   580.58 ms /    39 tokens (   14.89 ms per token,    67.17 tokens per second)
llama_print_timings:        eval time =  1046.70 ms /    12 runs   (   87.22 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1654.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, I actually invented a ultimate theorem.



llama_print_timings:        load time =   583.13 ms
llama_print_timings:      sample time =     6.05 ms /    12 runs   (    0.50 ms per token,  1983.14 tokens per second)
llama_print_timings: prompt eval time =   583.09 ms /    43 tokens (   13.56 ms per token,    73.75 tokens per second)
llama_print_timings:        eval time =   976.23 ms /    11 runs   (   88.75 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  1587.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Let's talk.



llama_print_timings:        load time =   572.90 ms
llama_print_timings:      sample time =     2.62 ms /     6 runs   (    0.44 ms per token,  2287.46 tokens per second)
llama_print_timings: prompt eval time =   572.89 ms /    33 tokens (   17.36 ms per token,    57.60 tokens per second)
llama_print_timings:        eval time =   431.19 ms /     5 runs   (   86.24 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1015.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If there are ghosts in the house, they will come.



llama_print_timings:        load time =   570.34 ms
llama_print_timings:      sample time =     6.44 ms /    14 runs   (    0.46 ms per token,  2174.93 tokens per second)
llama_print_timings: prompt eval time =   570.32 ms /    39 tokens (   14.62 ms per token,    68.38 tokens per second)
llama_print_timings:        eval time =  1122.68 ms /    13 runs   (   86.36 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  1721.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What the hell is this dogshit theorem!"



llama_print_timings:        load time =   579.31 ms
llama_print_timings:      sample time =     5.77 ms /    12 runs   (    0.48 ms per token,  2078.28 tokens per second)
llama_print_timings: prompt eval time =   579.27 ms /    40 tokens (   14.48 ms per token,    69.05 tokens per second)
llama_print_timings:        eval time =   960.97 ms /    11 runs   (   87.36 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  1566.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I said ‘no ghost’, but it means that someone is messing around.



llama_print_timings:        load time =   590.88 ms
llama_print_timings:      sample time =     8.60 ms /    19 runs   (    0.45 ms per token,  2210.59 tokens per second)
llama_print_timings: prompt eval time =   590.85 ms /    47 tokens (   12.57 ms per token,    79.55 tokens per second)
llama_print_timings:        eval time =  1506.16 ms /    18 runs   (   83.68 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  2136.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“If you have some basic scientific knowledge, it is impossible to imagine the force required to achieve these two things, especially the second one, on a scale much greater than our universe and which is beyond the understanding of both science and the supernatural. It’s something I just can’t fathom.”



llama_print_timings:        load time =   662.48 ms
llama_print_timings:      sample time =    28.51 ms /    64 runs   (    0.45 ms per token,  2244.51 tokens per second)
llama_print_timings: prompt eval time =   662.44 ms /    90 tokens (    7.36 ms per token,   135.86 tokens per second)
llama_print_timings:        eval time =  5407.14 ms /    63 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  6199.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Same old story: Nonsense! I've seen it all.



llama_print_timings:        load time =   579.64 ms
llama_print_timings:      sample time =     7.42 ms /    17 runs   (    0.44 ms per token,  2289.87 tokens per second)
llama_print_timings: prompt eval time =   579.60 ms /    43 tokens (   13.48 ms per token,    74.19 tokens per second)
llama_print_timings:        eval time =  1336.47 ms /    16 runs   (   83.53 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  1949.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Give me some advice, what should I do next?"



llama_print_timings:        load time =   576.38 ms
llama_print_timings:      sample time =     6.13 ms /    14 runs   (    0.44 ms per token,  2282.36 tokens per second)
llama_print_timings: prompt eval time =   576.34 ms /    38 tokens (   15.17 ms per token,    65.93 tokens per second)
llama_print_timings:        eval time =  1061.58 ms /    13 runs   (   81.66 ms per token,    12.25 tokens per second)
llama_print_timings:       total time =  1666.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Keep drinking until you fall asleep.”



llama_print_timings:        load time =   573.98 ms
llama_print_timings:      sample time =     5.37 ms /    12 runs   (    0.45 ms per token,  2235.89 tokens per second)
llama_print_timings: prompt eval time =   573.96 ms /    38 tokens (   15.10 ms per token,    66.21 tokens per second)
llama_print_timings:        eval time =   945.29 ms /    11 runs   (   85.94 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1544.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Alright.



llama_print_timings:        load time =   573.71 ms
llama_print_timings:      sample time =     1.81 ms /     4 runs   (    0.45 ms per token,  2212.39 tokens per second)
llama_print_timings: prompt eval time =   573.67 ms /    33 tokens (   17.38 ms per token,    57.52 tokens per second)
llama_print_timings:        eval time =   265.82 ms /     3 runs   (   88.61 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =   846.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

……


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

He didn't know how he got back to his car, lying on the back seat and falling into a dreamless sleep. He felt that it was not long since he closed his eyes, but when he opened them again, the sun had set in the western part of the city. He walked out of the car and, although he still felt weak after drinking last night, he felt better now. He found himself in a corner of the Imperial Palace in Beijing, where the sunset reflected the gold on the guarded river. In his eyes, the world had returned to its classical and stable form. Wang Moxiang just enjoyed such a peaceful moment until the darkened sky, when a familiar black Santana came out from among the cars and stopped before him.



llama_print_timings:        load time =   828.13 ms
llama_print_timings:      sample time =    71.38 ms /   161 runs   (    0.44 ms per token,  2255.57 tokens per second)
llama_print_timings: prompt eval time =   828.08 ms /   185 tokens (    4.48 ms per token,   223.41 tokens per second)
llama_print_timings:        eval time = 13895.80 ms /   160 runs   (   86.85 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 15049.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Did you sleep well?" the big stone jar asked gravely.



llama_print_timings:        load time =   575.33 ms
llama_print_timings:      sample time =     6.78 ms /    15 runs   (    0.45 ms per token,  2211.08 tokens per second)
llama_print_timings: prompt eval time =   575.30 ms /    43 tokens (   13.38 ms per token,    74.74 tokens per second)
llama_print_timings:        eval time =  1184.70 ms /    14 runs   (   84.62 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1791.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, what should we do next?



llama_print_timings:        load time =   586.43 ms
llama_print_timings:      sample time =     4.03 ms /     9 runs   (    0.45 ms per token,  2232.70 tokens per second)
llama_print_timings: prompt eval time =   586.39 ms /    36 tokens (   16.29 ms per token,    61.39 tokens per second)
llama_print_timings:        eval time =   695.62 ms /     8 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  1299.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who, you? Go have dinner and drink some more, then go to sleep after.



llama_print_timings:        load time =   593.13 ms
llama_print_timings:      sample time =     8.65 ms /    19 runs   (    0.46 ms per token,  2196.28 tokens per second)
llama_print_timings: prompt eval time =   593.09 ms /    49 tokens (   12.10 ms per token,    82.62 tokens per second)
llama_print_timings:        eval time =  1556.80 ms /    18 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  2188.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“And then?”



llama_print_timings:        load time =   570.57 ms
llama_print_timings:      sample time =     2.20 ms /     5 runs   (    0.44 ms per token,  2275.83 tokens per second)
llama_print_timings: prompt eval time =   570.54 ms /    34 tokens (   16.78 ms per token,    59.59 tokens per second)
llama_print_timings:        eval time =   329.50 ms /     4 runs   (   82.38 ms per token,    12.14 tokens per second)
llama_print_timings:       total time =   909.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“And then? You've got to go to work tomorrow.”



llama_print_timings:        load time =   578.43 ms
llama_print_timings:      sample time =     7.36 ms /    16 runs   (    0.46 ms per token,  2175.10 tokens per second)
llama_print_timings: prompt eval time =   578.39 ms /    41 tokens (   14.11 ms per token,    70.89 tokens per second)
llama_print_timings:        eval time =  1270.44 ms /    15 runs   (   84.70 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1883.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The countdown has been reduced to 1091 hours.



llama_print_timings:        load time =   585.60 ms
llama_print_timings:      sample time =     6.63 ms /    15 runs   (    0.44 ms per token,  2263.47 tokens per second)
llama_print_timings: prompt eval time =   585.56 ms /    46 tokens (   12.73 ms per token,    78.56 tokens per second)
llama_print_timings:        eval time =  1194.96 ms /    14 runs   (   85.35 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  1809.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Fuck the countdown, first you have to make sure that you're standing straight and not falling down before you can say anything.”



llama_print_timings:        load time =   587.85 ms
llama_print_timings:      sample time =    13.76 ms /    30 runs   (    0.46 ms per token,  2180.39 tokens per second)
llama_print_timings: prompt eval time =   587.81 ms /    53 tokens (   11.09 ms per token,    90.17 tokens per second)
llama_print_timings:        eval time =  2516.74 ms /    29 runs   (   86.78 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  3167.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Dashi, can't you just tell me the truth?” Even if I beg you.



llama_print_timings:        load time =   584.90 ms
llama_print_timings:      sample time =     9.42 ms /    21 runs   (    0.45 ms per token,  2230.48 tokens per second)
llama_print_timings: prompt eval time =   584.86 ms /    47 tokens (   12.44 ms per token,    80.36 tokens per second)
llama_print_timings:        eval time =  1724.24 ms /    20 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2352.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Big Story looked at Wang Miao for a while, and then smiled upward. “I have also told Constant Wisdom this word several times. We are poor brothers in misery. Let me tell you the truth, I don’t know anything about it, and my status is low. They don’t tell me much about it. Sometimes it really feels like a nightmare.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“You know more than I do.”



llama_print_timings:        load time =   584.95 ms
llama_print_timings:      sample time =     4.30 ms /     9 runs   (    0.48 ms per token,  2094.00 tokens per second)
llama_print_timings: prompt eval time =   584.91 ms /    38 tokens (   15.39 ms per token,    64.97 tokens per second)
llama_print_timings:        eval time =   680.60 ms /     8 runs   (   85.08 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  1285.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Okay, I’ll tell you all about it now.” Da Shi pointed to the riverbank and they found a place to sit down. It had grown dark out of, with a car light flickering in the river behind them. They watched their own silhouettes on the water, changing from short to long as they moved around.



llama_print_timings:        load time =   732.15 ms
llama_print_timings:      sample time =    41.37 ms /    73 runs   (    0.57 ms per token,  1764.56 tokens per second)
llama_print_timings: prompt eval time =   732.06 ms /    90 tokens (    8.13 ms per token,   122.94 tokens per second)
llama_print_timings:        eval time =  6429.61 ms /    72 runs   (   89.30 ms per token,    11.20 tokens per second)
llama_print_timings:       total time =  7352.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In our line of work, it’s about stringing together a lot of seemingly unrelated things and making sense of them. There were a lot of strange occurrences recently – the explosion at Lei Zhang Accelerator Facility and the murder of a Nobel laureate scholar – for which the motives were bizarre, not money, no revenge, and with no political background either, it was just plain destruction. And besides that there are other things too – like ‘Science Boundaries’ and those scholars committing suicide…etc. Environmental activists have been really active recently, blocking construction of dams and nuclear power plants, as well as trying to recreate a primitive society in various ways…etc. And then there are these seemingly insignificant occurrences - did you watch any movies lately?”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

I don't read it.



llama_print_timings:        load time =   598.53 ms
llama_print_timings:      sample time =     3.80 ms /     8 runs   (    0.47 ms per token,  2105.82 tokens per second)
llama_print_timings: prompt eval time =   598.51 ms /    35 tokens (   17.10 ms per token,    58.48 tokens per second)
llama_print_timings:        eval time =   601.48 ms /     7 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1217.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The recent blockbusters are all shit. The movie features beautiful green mountains and clear water, with handsome men and pretty women living a comfortable life that has been portrayed by directors as being before the era of technology. For example, 'Xiayao Garden', which was so ugly no one even wanted to see it, but still had some people throw several million dollars at it… There's a science fiction short-story competition with a prize of 50 million yuan, whoever portrays the future in the most disgusting way will win it, and then more than a few hundred million will be poured into making those stories into movies… Weird cults have also appeared. Each leader is extraordinarily wealthy.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“What has this to do with what you said before?”



llama_print_timings:        load time =   598.53 ms
llama_print_timings:      sample time =     5.80 ms /    13 runs   (    0.45 ms per token,  2242.54 tokens per second)
llama_print_timings: prompt eval time =   598.49 ms /    39 tokens (   15.35 ms per token,    65.16 tokens per second)
llama_print_timings:        eval time =  1067.40 ms /    12 runs   (   88.95 ms per token,    11.24 tokens per second)
llama_print_timings:       total time =  1691.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I can string them together, and that's what I do. It's my gift to be able to connect the dots even though I didn't need it when I worked at heavy crime division.



llama_print_timings:        load time =   644.01 ms
llama_print_timings:      sample time =    28.18 ms /    44 runs   (    0.64 ms per token,  1561.17 tokens per second)
llama_print_timings: prompt eval time =   643.97 ms /    85 tokens (    7.58 ms per token,   131.99 tokens per second)
llama_print_timings:        eval time =  3930.47 ms /    43 runs   (   91.41 ms per token,    10.94 tokens per second)
llama_print_timings:       total time =  4700.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What's the conclusion?



llama_print_timings:        load time =   568.86 ms
llama_print_timings:      sample time =     3.34 ms /     7 runs   (    0.48 ms per token,  2095.18 tokens per second)
llama_print_timings: prompt eval time =   568.82 ms /    36 tokens (   15.80 ms per token,    63.29 tokens per second)
llama_print_timings:        eval time =   490.46 ms /     6 runs   (   81.74 ms per token,    12.23 tokens per second)
llama_print_timings:       total time =  1074.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Everything has and only one background that wants to completely undermine scientific research.



llama_print_timings:        load time =   587.66 ms
llama_print_timings:      sample time =     8.38 ms /    18 runs   (    0.47 ms per token,  2147.97 tokens per second)
llama_print_timings: prompt eval time =   587.62 ms /    47 tokens (   12.50 ms per token,    79.98 tokens per second)
llama_print_timings:        eval time =  1487.39 ms /    17 runs   (   87.49 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  2113.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who?



llama_print_timings:        load time =   572.46 ms
llama_print_timings:      sample time =     1.35 ms /     3 runs   (    0.45 ms per token,  2214.02 tokens per second)
llama_print_timings: prompt eval time =   572.43 ms /    33 tokens (   17.35 ms per token,    57.65 tokens per second)
llama_print_timings:        eval time =   164.60 ms /     2 runs   (   82.30 ms per token,    12.15 tokens per second)
llama_print_timings:       total time =   742.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't know, but I can feel that there is a plan for it. It's a very grand and comprehensive plan: destroy research facilities and kill scientists; or get you to kill yourselves or go mad... But the main point is for you to think wrongly so that you become stupider than usual.”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

That was really insightful!



llama_print_timings:        load time =   581.23 ms
llama_print_timings:      sample time =     3.06 ms /     7 runs   (    0.44 ms per token,  2286.09 tokens per second)
llama_print_timings: prompt eval time =   581.19 ms /    38 tokens (   15.29 ms per token,    65.38 tokens per second)
llama_print_timings:        eval time =   499.92 ms /     6 runs   (   83.32 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  1094.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On the other hand, it is also necessary to discredit science in society, which has been done by some people before but this time it was definitely organized.



llama_print_timings:        load time =   595.07 ms
llama_print_timings:      sample time =    15.45 ms /    34 runs   (    0.45 ms per token,  2201.22 tokens per second)
llama_print_timings: prompt eval time =   595.02 ms /    56 tokens (   10.63 ms per token,    94.11 tokens per second)
llama_print_timings:        eval time =  2849.58 ms /    33 runs   (   86.35 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  3516.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I believe what you said.



llama_print_timings:        load time =   574.68 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2261.71 tokens per second)
llama_print_timings: prompt eval time =   574.64 ms /    34 tokens (   16.90 ms per token,    59.17 tokens per second)
llama_print_timings:        eval time =   486.18 ms /     6 runs   (   81.03 ms per token,    12.34 tokens per second)
llama_print_timings:       total time =  1074.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Huh, just now. You science and technology experts can't see this but I, a technical graduate, can. When I said this idea out loud, they didn't spare my laughter from the leaders and scholars.



llama_print_timings:        load time =   622.26 ms
llama_print_timings:      sample time =    23.43 ms /    52 runs   (    0.45 ms per token,  2219.76 tokens per second)
llama_print_timings: prompt eval time =   622.22 ms /    73 tokens (    8.52 ms per token,   117.32 tokens per second)
llama_print_timings:        eval time =  4327.18 ms /    51 runs   (   84.85 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  5056.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You know some pseudoscience, do you? Do you know what the people who practice pseudoscience are most afraid of?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Scientist.



llama_print_timings:        load time =   595.75 ms
llama_print_timings:      sample time =     2.20 ms /     5 runs   (    0.44 ms per token,  2270.66 tokens per second)
llama_print_timings: prompt eval time =   595.72 ms /    34 tokens (   17.52 ms per token,    57.07 tokens per second)
llama_print_timings:        eval time =   348.13 ms /     4 runs   (   87.03 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =   953.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wrong, there are many first-rate scholars who have been deceived by pseudoscience and ended up defending it. But the real enemy of pseudoscience is another kind of people: magicians. In fact, most major pseudoscience frauds have been exposed by magicians. Compared to science nerds, you with your years of police experience and social knowledge are far better equipped to detect this form of mass crime.



llama_print_timings:        load time =   681.75 ms
llama_print_timings:      sample time =    42.55 ms /    95 runs   (    0.45 ms per token,  2232.82 tokens per second)
llama_print_timings: prompt eval time =   681.71 ms /   107 tokens (    6.37 ms per token,   156.96 tokens per second)
llama_print_timings:        eval time =  8133.90 ms /    94 runs   (   86.53 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  9008.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Actually, there are people who are smarter than me. This has been noticed by the higher-ups before. At first, I was joked about finding the wrong place to work, but then I was recruited here and just does errand running … Well, that is all I know besides you.”



llama_print_timings:        load time =   669.12 ms
llama_print_timings:      sample time =    29.15 ms /    66 runs   (    0.44 ms per token,  2263.84 tokens per second)
llama_print_timings: prompt eval time =   669.08 ms /    88 tokens (    7.60 ms per token,   131.52 tokens per second)
llama_print_timings:        eval time =  5626.26 ms /    65 runs   (   86.56 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  6426.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I have a question: What does this have to do with the military?”



llama_print_timings:        load time =   578.02 ms
llama_print_timings:      sample time =     7.83 ms /    17 runs   (    0.46 ms per token,  2171.14 tokens per second)
llama_print_timings: prompt eval time =   578.00 ms /    41 tokens (   14.10 ms per token,    70.93 tokens per second)
llama_print_timings:        eval time =  1397.60 ms /    16 runs   (   87.35 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2011.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I was also puzzled, and asked them. They said that the war had broken out and it was the army's business. I began to think they were just saying that in a joke. But they were not kidding, and now the troops really are at a combat-ready state.”



llama_print_timings:        load time =   658.99 ms
llama_print_timings:      sample time =    28.95 ms /    64 runs   (    0.45 ms per token,  2210.86 tokens per second)
llama_print_timings: prompt eval time =   658.95 ms /    95 tokens (    6.94 ms per token,   144.17 tokens per second)
llama_print_timings:        eval time =  5388.51 ms /    63 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  6180.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who is the enemy?



llama_print_timings:        load time =   572.25 ms
llama_print_timings:      sample time =     2.68 ms /     6 runs   (    0.45 ms per token,  2235.47 tokens per second)
llama_print_timings: prompt eval time =   572.21 ms /    34 tokens (   16.83 ms per token,    59.42 tokens per second)
llama_print_timings:        eval time =   409.73 ms /     5 runs   (   81.95 ms per token,    12.20 tokens per second)
llama_print_timings:       total time =   993.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know. NATO officers have been stationed in the Command Center, and there are also a lot of PLA soldiers at Fort Meade, who knows which side is the enemy?


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

It's so unbelievable, you say all this is true?!



llama_print_timings:        load time =   614.24 ms
llama_print_timings:      sample time =     8.06 ms /    18 runs   (    0.45 ms per token,  2234.36 tokens per second)
llama_print_timings: prompt eval time =   614.15 ms /    43 tokens (   14.28 ms per token,    70.02 tokens per second)
llama_print_timings:        eval time =  1444.58 ms /    17 runs   (   84.98 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2094.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have a few old army comrades who are now generals, so I know some things.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Isn't the media supposed to report on important news?”



llama_print_timings:        load time =   599.02 ms
llama_print_timings:      sample time =     7.17 ms /    15 runs   (    0.48 ms per token,  2091.18 tokens per second)
llama_print_timings: prompt eval time =   598.98 ms /    42 tokens (   14.26 ms per token,    70.12 tokens per second)
llama_print_timings:        eval time =  1266.79 ms /    14 runs   (   90.48 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =  1898.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is a very strange phenomenon: all countries are keeping secrets and doing so with great caution. I can now say for certain that the enemy is a ruthless one; it's above that they're afraid! I know Chang Weisihan very well; he's someone who's never scared of anything, but now even he seems to be worried. They're so terrified that they don't have the confidence to defeat their enemy.



llama_print_timings:        load time =   687.08 ms
llama_print_timings:      sample time =    47.13 ms /   103 runs   (    0.46 ms per token,  2185.35 tokens per second)
llama_print_timings: prompt eval time =   687.04 ms /   109 tokens (    6.30 ms per token,   158.65 tokens per second)
llama_print_timings:        eval time =  8736.28 ms /   102 runs   (   85.65 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  9644.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“If this is the case, that would be terrible.”



llama_print_timings:        load time =   570.99 ms
llama_print_timings:      sample time =     5.78 ms /    13 runs   (    0.44 ms per token,  2249.52 tokens per second)
llama_print_timings: prompt eval time =   570.95 ms /    39 tokens (   14.64 ms per token,    68.31 tokens per second)
llama_print_timings:        eval time =   990.29 ms /    12 runs   (   82.52 ms per token,    12.12 tokens per second)
llama_print_timings:       total time =  1586.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, everyone has something they're afraid of. Even the bad guy has its fears. The more powerful the character, the more fatal its fear is to it.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

"What's it afraid of?"



llama_print_timings:        load time =   585.31 ms
llama_print_timings:      sample time =     4.10 ms /     9 runs   (    0.46 ms per token,  2195.12 tokens per second)
llama_print_timings: prompt eval time =   585.27 ms /    36 tokens (   16.26 ms per token,    61.51 tokens per second)
llama_print_timings:        eval time =   706.11 ms /     8 runs   (   88.26 ms per token,    11.33 tokens per second)
llama_print_timings:       total time =  1311.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Fear you, fear scientists. And it's strange that the things you research are useless and far-fetched, like Yang Dong's group. It's more afraid of you than of the universe, so it can be this brutal. If it had a use in killing you, it would have killed you long ago. The most effective way to disturb your thought is to kill you off. But if you are disturbed in mind, science is over.”



llama_print_timings:        load time =   708.34 ms
llama_print_timings:      sample time =    54.60 ms /   100 runs   (    0.55 ms per token,  1831.64 tokens per second)
llama_print_timings: prompt eval time =   708.30 ms /   121 tokens (    5.85 ms per token,   170.83 tokens per second)
llama_print_timings:        eval time =  8793.34 ms /    99 runs   (   88.82 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  9751.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yeah, it's like he's afraid of science.



llama_print_timings:        load time =   580.24 ms
llama_print_timings:      sample time =     7.04 ms /    16 runs   (    0.44 ms per token,  2274.34 tokens per second)
llama_print_timings: prompt eval time =   580.20 ms /    38 tokens (   15.27 ms per token,    65.49 tokens per second)
llama_print_timings:        eval time =  1325.72 ms /    15 runs   (   88.38 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  1938.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, basic science.



llama_print_timings:        load time =   579.99 ms
llama_print_timings:      sample time =     2.65 ms /     6 runs   (    0.44 ms per token,  2265.86 tokens per second)
llama_print_timings: prompt eval time =   579.97 ms /    36 tokens (   16.11 ms per token,    62.07 tokens per second)
llama_print_timings:        eval time =   434.37 ms /     5 runs   (   86.87 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1026.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I and Yang Dong's research are very different. Nano materials is not basic science, it's just a high strength material that can threaten power?! ”



llama_print_timings:        load time =   600.15 ms
llama_print_timings:      sample time =    16.96 ms /    37 runs   (    0.46 ms per token,  2182.12 tokens per second)
llama_print_timings: prompt eval time =   600.11 ms /    56 tokens (   10.72 ms per token,    93.32 tokens per second)
llama_print_timings:        eval time =  3255.88 ms /    36 runs   (   90.44 ms per token,    11.06 tokens per second)
llama_print_timings:       total time =  3933.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are a special case. App researchers like you generally don't bother them, and maybe there is something in your materials that scares them.



llama_print_timings:        load time =   624.11 ms
llama_print_timings:      sample time =    24.29 ms /    32 runs   (    0.76 ms per token,  1317.25 tokens per second)
llama_print_timings: prompt eval time =   624.07 ms /    62 tokens (   10.07 ms per token,    99.35 tokens per second)
llama_print_timings:        eval time =  3120.16 ms /    31 runs   (  100.65 ms per token,     9.94 tokens per second)
llama_print_timings:       total time =  3854.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What should I do?"



llama_print_timings:        load time =   596.46 ms
llama_print_timings:      sample time =     7.61 ms /     7 runs   (    1.09 ms per token,   919.48 tokens per second)
llama_print_timings: prompt eval time =   596.42 ms /    34 tokens (   17.54 ms per token,    57.01 tokens per second)
llama_print_timings:        eval time =   628.17 ms /     6 runs   (  104.69 ms per token,     9.55 tokens per second)
llama_print_timings:       total time =  1260.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Go to work, study it, and that's the biggest blow. Don't care about what clock you have set for your gaming time if you need relaxation at the end of the day. If you can make it through the game, do so; otherwise, don't stress yourself out over a difficult level.



llama_print_timings:        load time =   635.20 ms
llama_print_timings:      sample time =    31.68 ms /    68 runs   (    0.47 ms per token,  2146.40 tokens per second)
llama_print_timings: prompt eval time =   635.15 ms /    68 tokens (    9.34 ms per token,   107.06 tokens per second)
llama_print_timings:        eval time =  5773.88 ms /    67 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  6552.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Games? The Three-Body Problem? Does it have something to do with all of this too?”



llama_print_timings:        load time =   582.19 ms
llama_print_timings:      sample time =    10.28 ms /    23 runs   (    0.45 ms per token,  2237.35 tokens per second)
llama_print_timings: prompt eval time =   582.15 ms /    45 tokens (   12.94 ms per token,    77.30 tokens per second)
llama_print_timings:        eval time =  1889.53 ms /    22 runs   (   85.89 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2517.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't know much about it, but I think it is really hard for people like me who are not experts in games to play. But since the game players include many experts from the Operations Center, it should be very challenging and exciting!”



llama_print_timings:        load time =   623.31 ms
llama_print_timings:      sample time =    25.27 ms /    57 runs   (    0.44 ms per token,  2255.37 tokens per second)
llama_print_timings: prompt eval time =   623.28 ms /    67 tokens (    9.30 ms per token,   107.50 tokens per second)
llama_print_timings:        eval time =  4778.72 ms /    56 runs   (   85.33 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5515.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, nothing else?



llama_print_timings:        load time =   581.74 ms
llama_print_timings:      sample time =     2.65 ms /     6 runs   (    0.44 ms per token,  2266.72 tokens per second)
llama_print_timings: prompt eval time =   581.71 ms /    37 tokens (   15.72 ms per token,    63.61 tokens per second)
llama_print_timings:        eval time =   421.35 ms /     5 runs   (   84.27 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  1015.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, I'll tell you later. Just stand up straight! When you're afraid, just remember my ultimate theorem.”



llama_print_timings:        load time =   619.36 ms
llama_print_timings:      sample time =    13.27 ms /    29 runs   (    0.46 ms per token,  2184.89 tokens per second)
llama_print_timings: prompt eval time =   619.32 ms /    62 tokens (    9.99 ms per token,   100.11 tokens per second)
llama_print_timings:        eval time =  2388.31 ms /    28 runs   (   85.30 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  3068.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But Wang Minglian didn't say thank you and instead got into the car with Mr. Shi.



llama_print_timings:        load time =   582.06 ms
llama_print_timings:      sample time =    11.61 ms /    25 runs   (    0.46 ms per token,  2153.32 tokens per second)
llama_print_timings: prompt eval time =   582.03 ms /    47 tokens (   12.38 ms per token,    80.75 tokens per second)
llama_print_timings:        eval time =  2042.46 ms /    24 runs   (   85.10 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2677.32 ms


translated 89.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Wang Miao returned home and had forgotten to buy a V-suit set in the game store. His wife told him that people from his workplace were looking for him every day. Wang Miao replied that he would go back to work tomorrow after opening the phone calls from the nanocenter, which had been turned off all day long. During dinner, he drank quite a lot of wine but was unable to fall asleep at all. After his family fell asleep, he sat in front of the computer and re-logged into 3691 with the V-suit set he had just bought back.



llama_print_timings:        load time =   731.14 ms
llama_print_timings:      sample time =    67.87 ms /   130 runs   (    0.52 ms per token,  1915.40 tokens per second)
llama_print_timings: prompt eval time =   731.11 ms /   129 tokens (    5.67 ms per token,   176.45 tokens per second)
llama_print_timings:        eval time = 11560.89 ms /   129 runs   (   89.62 ms per token,    11.16 tokens per second)
llama_print_timings:       total time = 12605.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He stood before the ruined pyramid of King Xi, and the snow covering it had long since melted. The stone blocks that formed the tower were eroded in hollows, as if by wind. Far away was a few huge structures, which he guessed to be grain silos, but they were shaped entirely different from those he had seen before. All of this signaled that endless time had passed by.



llama_print_timings:        load time =   698.18 ms
llama_print_timings:      sample time =    42.90 ms /    93 runs   (    0.46 ms per token,  2167.63 tokens per second)
llama_print_timings: prompt eval time =   698.15 ms /   114 tokens (    6.12 ms per token,   163.29 tokens per second)
llama_print_timings:        eval time =  8000.37 ms /    92 runs   (   86.96 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  8897.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He searched for the entrance of the pyramid under the sun, and he saw that it had been blocked by stones. But at the same time, he saw a long flight of stairs on one side, which led to the top of the pyramid. He looked up at the towering pyramid's peaks and saw that they had been shorn off into platforms with Aztec-style architecture instead of Egyptian-style architecture.



llama_print_timings:        load time =   703.58 ms
llama_print_timings:      sample time =    42.20 ms /    95 runs   (    0.44 ms per token,  2251.40 tokens per second)
llama_print_timings: prompt eval time =   703.53 ms /   125 tokens (    5.63 ms per token,   177.67 tokens per second)
llama_print_timings:        eval time =  8208.18 ms /    94 runs   (   87.32 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  9105.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Along the steps, Wang Miao climbed to the top of the pyramid and saw a place similar to an ancient observatory. At the corner of the platform was a telescope that was several meters tall, next to which were smaller telescopes. On the other side were a few strange instruments that looked like ancient China's dunwen instrument. What attracted Wang Miao's attention most was a large bronze ball, diameter two meters or so, placed on a complex machine and held up by many sizes of gears in slow rotation. Wang Miao noticed that its rotation direction and speed were constantly changing. Underneath the machine was a rectangular pit with dim firelight, where several slave-like people were pushing a disc to provide power for the machine below.



llama_print_timings:        load time =   821.12 ms
llama_print_timings:      sample time =    90.44 ms /   163 runs   (    0.55 ms per token,  1802.26 tokens per second)
llama_print_timings: prompt eval time =   821.08 ms /   184 tokens (    4.46 ms per token,   224.10 tokens per second)
llama_print_timings:        eval time = 14713.34 ms /   162 runs   (   90.82 ms per token,    11.01 tokens per second)
llama_print_timings:       total time = 15951.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

There was a man approaching Wang Mow, just like when he first encountered Zhou Wenwang. The man stood with his back towards the sunrise and only saw Wang Mow's shining eyes from the darkness. He was tall and slim with long black hair tied loosely atop his head.



llama_print_timings:        load time =   832.92 ms
llama_print_timings:      sample time =    48.66 ms /    68 runs   (    0.72 ms per token,  1397.31 tokens per second)
llama_print_timings: prompt eval time =   832.85 ms /    99 tokens (    8.41 ms per token,   118.87 tokens per second)
llama_print_timings:        eval time =  6373.91 ms /    67 runs   (   95.13 ms per token,    10.51 tokens per second)
llama_print_timings:       total time =  7428.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He introduced himself as "I am Moxie."



llama_print_timings:        load time =   592.28 ms
llama_print_timings:      sample time =     5.33 ms /    12 runs   (    0.44 ms per token,  2250.14 tokens per second)
llama_print_timings: prompt eval time =   592.24 ms /    41 tokens (   14.44 ms per token,    69.23 tokens per second)
llama_print_timings:        eval time =   927.03 ms /    11 runs   (   84.28 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  1543.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I am Haoren, how are you?”



llama_print_timings:        load time =   576.48 ms
llama_print_timings:      sample time =     4.91 ms /    11 runs   (    0.45 ms per token,  2240.78 tokens per second)
llama_print_timings: prompt eval time =   576.44 ms /    37 tokens (   15.58 ms per token,    64.19 tokens per second)
llama_print_timings:        eval time =   902.76 ms /    10 runs   (   90.28 ms per token,    11.08 tokens per second)
llama_print_timings:       total time =  1500.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ah, I know you! "Moxie said excitedly. "In civilization 137, you followed Zhou Wenwang."



llama_print_timings:        load time =   610.52 ms
llama_print_timings:      sample time =    15.11 ms /    33 runs   (    0.46 ms per token,  2184.13 tokens per second)
llama_print_timings: prompt eval time =   610.47 ms /    58 tokens (   10.53 ms per token,    95.01 tokens per second)
llama_print_timings:        eval time =  2731.62 ms /    32 runs   (   85.36 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  3410.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have been here with him, but I don't believe his theory.



llama_print_timings:        load time =   590.01 ms
llama_print_timings:      sample time =     7.57 ms /    17 runs   (    0.45 ms per token,  2246.60 tokens per second)
llama_print_timings: prompt eval time =   589.96 ms /    45 tokens (   13.11 ms per token,    76.28 tokens per second)
llama_print_timings:        eval time =  1341.76 ms /    16 runs   (   83.86 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  1964.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are right.” Moxi nodded gravely and moved closer to him. “Know that, in the 362 thousand years you have been away, civilization has restarted four times. During the intermittent and chaotic cycle of the Dissolved Period and Constant Periods, it struggled on painfully to develop. The shortest time only lasted for half of the Stone Age period, but the 139th Civilization created a record and got all the way to the Age of Steam!”



llama_print_timings:        load time =   692.01 ms
llama_print_timings:      sample time =    51.33 ms /   114 runs   (    0.45 ms per token,  2220.88 tokens per second)
llama_print_timings: prompt eval time =   691.96 ms /   121 tokens (    5.72 ms per token,   174.87 tokens per second)
llama_print_timings:        eval time =  9825.48 ms /   113 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 10746.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, did someone find the laws of the sun's motion in that civilization?



llama_print_timings:        load time =   585.47 ms
llama_print_timings:      sample time =     8.36 ms /    19 runs   (    0.44 ms per token,  2274.09 tokens per second)
llama_print_timings: prompt eval time =   585.43 ms /    44 tokens (   13.31 ms per token,    75.16 tokens per second)
llama_print_timings:        eval time =  1514.36 ms /    18 runs   (   84.13 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  2136.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No, it was just a lucky escape.



llama_print_timings:        load time =   585.81 ms
llama_print_timings:      sample time =     4.95 ms /    11 runs   (    0.45 ms per token,  2222.22 tokens per second)
llama_print_timings: prompt eval time =   585.79 ms /    45 tokens (   13.02 ms per token,    76.82 tokens per second)
llama_print_timings:        eval time =   861.50 ms /    10 runs   (   86.15 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  1468.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But people are always working hard, aren't they?



llama_print_timings:        load time =   579.00 ms
llama_print_timings:      sample time =     5.81 ms /    13 runs   (    0.45 ms per token,  2236.37 tokens per second)
llama_print_timings: prompt eval time =   578.96 ms /    37 tokens (   15.65 ms per token,    63.91 tokens per second)
llama_print_timings:        eval time =  1003.63 ms /    12 runs   (   83.64 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  1607.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, let me show you the last effort of civilization.



llama_print_timings:        load time =   865.23 ms
llama_print_timings:      sample time =     9.62 ms /    15 runs   (    0.64 ms per token,  1559.58 tokens per second)
llama_print_timings: prompt eval time =   865.13 ms /   182 tokens (    4.75 ms per token,   210.37 tokens per second)
llama_print_timings:        eval time =  1349.89 ms /    14 runs   (   96.42 ms per token,    10.37 tokens per second)
llama_print_timings:       total time =  2255.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He pointed to the direction and said, "That's Confucius. He believed that everything should conform to the rites, including the universe itself. So he created a system of celestial and solar calendar laws to predict the sun's movements."


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The result is obvious.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Yes, he calculated the time when the sun should rise and predicted a 5-year eon. You don't say. That prediction actually lasted for about one month.”



llama_print_timings:        load time =   619.39 ms
llama_print_timings:      sample time =    18.11 ms /    39 runs   (    0.46 ms per token,  2153.51 tokens per second)
llama_print_timings: prompt eval time =   619.35 ms /    66 tokens (    9.38 ms per token,   106.56 tokens per second)
llama_print_timings:        eval time =  3248.54 ms /    38 runs   (   85.49 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  3947.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Then, on a day the sun will never come out again?



llama_print_timings:        load time =   569.54 ms
llama_print_timings:      sample time =     6.28 ms /    14 runs   (    0.45 ms per token,  2230.01 tokens per second)
llama_print_timings: prompt eval time =   569.50 ms /    38 tokens (   14.99 ms per token,    66.73 tokens per second)
llama_print_timings:        eval time =  1126.44 ms /    13 runs   (   86.65 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1722.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No, the sun came out and rose into the sky, but then it went out.



llama_print_timings:        load time =   583.42 ms
llama_print_timings:      sample time =     8.68 ms /    19 runs   (    0.46 ms per token,  2189.70 tokens per second)
llama_print_timings: prompt eval time =   583.38 ms /    48 tokens (   12.15 ms per token,    82.28 tokens per second)
llama_print_timings:        eval time =  1521.17 ms /    18 runs   (   84.51 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  2143.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What? Extinguish?! ”



llama_print_timings:        load time =   582.02 ms
llama_print_timings:      sample time =     4.63 ms /    10 runs   (    0.46 ms per token,  2157.50 tokens per second)
llama_print_timings: prompt eval time =   582.00 ms /    37 tokens (   15.73 ms per token,    63.57 tokens per second)
llama_print_timings:        eval time =   749.96 ms /     9 runs   (   83.33 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  1352.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, it started to get dark slowly and then suddenly went out! The night comes, and the cold is so severe that Confucius froze into an ice pillar. He's still standing here today.



llama_print_timings:        load time =   629.89 ms
llama_print_timings:      sample time =    20.71 ms /    45 runs   (    0.46 ms per token,  2173.39 tokens per second)
llama_print_timings: prompt eval time =   629.85 ms /    71 tokens (    8.87 ms per token,   112.73 tokens per second)
llama_print_timings:        eval time =  3752.85 ms /    44 runs   (   85.29 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  4473.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Is there nothing left? I mean, the sun after it goes dark?"



llama_print_timings:        load time =   631.77 ms
llama_print_timings:      sample time =     7.66 ms /    17 runs   (    0.45 ms per token,  2219.32 tokens per second)
llama_print_timings: prompt eval time =   631.73 ms /    43 tokens (   14.69 ms per token,    68.07 tokens per second)
llama_print_timings:        eval time =  1421.36 ms /    16 runs   (   88.83 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  2088.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“At that place, a shooting star appeared. Like the soul of the sun after death.”



llama_print_timings:        load time =   577.85 ms
llama_print_timings:      sample time =     8.84 ms /    20 runs   (    0.44 ms per token,  2263.47 tokens per second)
llama_print_timings: prompt eval time =   577.81 ms /    46 tokens (   12.56 ms per token,    79.61 tokens per second)
llama_print_timings:        eval time =  1640.49 ms /    19 runs   (   86.34 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  2256.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, you must think that the sun just went out and shooting stars came out of nowhere.



llama_print_timings:        load time =   602.75 ms
llama_print_timings:      sample time =     9.87 ms /    21 runs   (    0.47 ms per token,  2127.88 tokens per second)
llama_print_timings: prompt eval time =   602.73 ms /    48 tokens (   12.56 ms per token,    79.64 tokens per second)
llama_print_timings:        eval time =  1764.29 ms /    20 runs   (   88.21 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  2410.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Yes. The light suddenly went out, and stars appeared in the sky. You can go check the log database; it's true what it records."



llama_print_timings:        load time =   589.60 ms
llama_print_timings:      sample time =    15.16 ms /    33 runs   (    0.46 ms per token,  2177.07 tokens per second)
llama_print_timings: prompt eval time =   589.56 ms /    53 tokens (   11.12 ms per token,    89.90 tokens per second)
llama_print_timings:        eval time =  2747.05 ms /    32 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3403.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh——Meng Yu muttered for a long time. Originally, regarding the mystery of the Three Body World, he had already formed a vague theory in his mind, but Mo Chi speaking of this made him rethink everything. "Why is it... so sudden?" He sighed miserably.



llama_print_timings:        load time =   666.28 ms
llama_print_timings:      sample time =    28.74 ms /    64 runs   (    0.45 ms per token,  2226.78 tokens per second)
llama_print_timings: prompt eval time =   666.26 ms /    89 tokens (    7.49 ms per token,   133.58 tokens per second)
llama_print_timings:        eval time =  5462.86 ms /    63 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  6254.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know if it is the Western Han Dynasty or the Eastern Han Dynasty.



llama_print_timings:        load time =   583.24 ms
llama_print_timings:      sample time =    10.42 ms /    23 runs   (    0.45 ms per token,  2206.45 tokens per second)
llama_print_timings: prompt eval time =   583.20 ms /    43 tokens (   13.56 ms per token,    73.73 tokens per second)
llama_print_timings:        eval time =  1869.37 ms /    22 runs   (   84.97 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2498.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Are you still alive?



llama_print_timings:        load time =   597.38 ms
llama_print_timings:      sample time =     2.63 ms /     6 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   597.34 ms /    37 tokens (   16.14 ms per token,    61.94 tokens per second)
llama_print_timings:        eval time =   427.65 ms /     5 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1036.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I have a mission to accurately observe the movement of the sun. Those wizards, diviners and sages are useless creatures, they can't do anything but languish in their own speculations. But I'm different; I can make things happen! ” He pointed to the numerous instruments on the stage.



llama_print_timings:        load time =   662.11 ms
llama_print_timings:      sample time =    31.77 ms /    71 runs   (    0.45 ms per token,  2235.09 tokens per second)
llama_print_timings: prompt eval time =   662.04 ms /    99 tokens (    6.69 ms per token,   149.54 tokens per second)
llama_print_timings:        eval time =  6141.03 ms /    70 runs   (   87.73 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  6941.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Are you sure this will achieve your goal?" asked Wang Miao, pointing at the instrument and especially the mysterious large bronze sphere.



llama_print_timings:        load time =   600.85 ms
llama_print_timings:      sample time =    13.15 ms /    29 runs   (    0.45 ms per token,  2204.82 tokens per second)
llama_print_timings: prompt eval time =   600.81 ms /    57 tokens (   10.54 ms per token,    94.87 tokens per second)
llama_print_timings:        eval time =  2419.24 ms /    28 runs   (   86.40 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  3078.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I also have my own theory, but it's not magic or superstition; it comes from extensive observation and summarization. First of all, do you know what the universe is? A machine.



llama_print_timings:        load time =   613.36 ms
llama_print_timings:      sample time =    19.16 ms /    43 runs   (    0.45 ms per token,  2244.38 tokens per second)
llama_print_timings: prompt eval time =   613.34 ms /    57 tokens (   10.76 ms per token,    92.93 tokens per second)
llama_print_timings:        eval time =  3538.17 ms /    42 runs   (   84.24 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  4236.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This equals nothing.



llama_print_timings:        load time =   586.86 ms
llama_print_timings:      sample time =     2.19 ms /     5 runs   (    0.44 ms per token,  2284.15 tokens per second)
llama_print_timings: prompt eval time =   586.82 ms /    36 tokens (   16.30 ms per token,    61.35 tokens per second)
llama_print_timings:        eval time =   329.61 ms /     4 runs   (   82.40 ms per token,    12.14 tokens per second)
llama_print_timings:       total time =   925.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Spoken more specifically, the universe is a suspended hollow sphere floating in fire, with many small holes and one big hole. The light of the fire emanates from these holes, which are stars, and a single, huge hole, the sun.



llama_print_timings:        load time =   621.24 ms
llama_print_timings:      sample time =    24.96 ms /    54 runs   (    0.46 ms per token,  2163.20 tokens per second)
llama_print_timings: prompt eval time =   621.19 ms /    78 tokens (    7.96 ms per token,   125.56 tokens per second)
llama_print_timings:        eval time =  4593.43 ms /    53 runs   (   86.67 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  5324.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's a very interesting model, Wang Miao looks at the large bronze ball and now can guess what it is. But there is one big flaw in that model: The Sun appears to rise and fall relative to the stars, but the relative positions of the holes on the bronze sphere should be fixed during its movement around the sun.



llama_print_timings:        load time =   677.47 ms
llama_print_timings:      sample time =    31.25 ms /    70 runs   (    0.45 ms per token,  2239.93 tokens per second)
llama_print_timings: prompt eval time =   677.43 ms /    98 tokens (    6.91 ms per token,   144.66 tokens per second)
llama_print_timings:        eval time =  5991.30 ms /    69 runs   (   86.83 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  6806.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So I released a modified model, where the universe is composed of two spheres. The sky we see is on the inner sphere, and there's a large hole on the outside sphere with lots of holes in it, the big hole in the outside sphere allows light from the inside to be reflected and scattered across the layer between the two spheres, making that layer bright and we can see stars from the small holes coming out.



llama_print_timings:        load time =   691.65 ms
llama_print_timings:      sample time =    39.97 ms /    88 runs   (    0.45 ms per token,  2201.76 tokens per second)
llama_print_timings: prompt eval time =   691.62 ms /   121 tokens (    5.72 ms per token,   174.95 tokens per second)
llama_print_timings:        eval time =  7568.56 ms /    87 runs   (   86.99 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  8439.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Where's the sun?"



llama_print_timings:        load time =   577.83 ms
llama_print_timings:      sample time =     3.58 ms /     8 runs   (    0.45 ms per token,  2235.26 tokens per second)
llama_print_timings: prompt eval time =   577.79 ms /    35 tokens (   16.51 ms per token,    60.58 tokens per second)
llama_print_timings:        eval time =   635.15 ms /     7 runs   (   90.74 ms per token,    11.02 tokens per second)
llama_print_timings:       total time =  1228.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The sun is a huge light spot projected on the inner shell from the outer shell. Its brightness is so high that it penetrates both shells, and we see the sun when its rays pierce the eggs. The scattered light surrounding the spots is strong enough to also pierce the inner shell, which is what we see as clear skies during the day.



llama_print_timings:        load time =   656.91 ms
llama_print_timings:      sample time =    35.92 ms /    79 runs   (    0.45 ms per token,  2199.21 tokens per second)
llama_print_timings: prompt eval time =   656.87 ms /    94 tokens (    6.99 ms per token,   143.10 tokens per second)
llama_print_timings:        eval time =  6754.90 ms /    78 runs   (   86.60 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  7571.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is the force that drives the irregular rotation of two shells?



llama_print_timings:        load time =   584.66 ms
llama_print_timings:      sample time =     6.88 ms /    15 runs   (    0.46 ms per token,  2181.82 tokens per second)
llama_print_timings: prompt eval time =   584.62 ms /    46 tokens (   12.71 ms per token,    78.68 tokens per second)
llama_print_timings:        eval time =  1230.39 ms /    14 runs   (   87.88 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  1845.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It is the power of fire beyond the universe.



llama_print_timings:        load time =   598.06 ms
llama_print_timings:      sample time =     5.22 ms /    11 runs   (    0.47 ms per token,  2108.90 tokens per second)
llama_print_timings: prompt eval time =   597.98 ms /    38 tokens (   15.74 ms per token,    63.55 tokens per second)
llama_print_timings:        eval time =   847.77 ms /    10 runs   (   84.78 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  1469.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In your double shell model, the size and brightness of the Sun should be constant. If it is unevenly distributed, at least its size should remain constant.



llama_print_timings:        load time =   735.44 ms
llama_print_timings:      sample time =    16.28 ms /    35 runs   (    0.47 ms per token,  2150.41 tokens per second)
llama_print_timings: prompt eval time =   735.35 ms /    72 tokens (   10.21 ms per token,    97.91 tokens per second)
llama_print_timings:        eval time =  2984.30 ms /    34 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  3792.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have oversimplified this model. With the changes in external conditions, the size of the outer shell of the universe can expand or contract, which results in changes to the sun's size and luminosity.



llama_print_timings:        load time =   675.63 ms
llama_print_timings:      sample time =    21.49 ms /    46 runs   (    0.47 ms per token,  2140.43 tokens per second)
llama_print_timings: prompt eval time =   675.54 ms /    68 tokens (    9.93 ms per token,   100.66 tokens per second)
llama_print_timings:        eval time =  4002.10 ms /    45 runs   (   88.94 ms per token,    11.24 tokens per second)
llama_print_timings:       total time =  4772.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What about the flying stars?



llama_print_timings:        load time =   588.33 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2255.15 tokens per second)
llama_print_timings: prompt eval time =   588.29 ms /    36 tokens (   16.34 ms per token,    61.19 tokens per second)
llama_print_timings:        eval time =   524.64 ms /     6 runs   (   87.44 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  1126.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Flying stars? Why do you keep mentioning flying stars? They are just dust particles floating around in the universe.”



llama_print_timings:        load time =   595.40 ms
llama_print_timings:      sample time =    11.44 ms /    26 runs   (    0.44 ms per token,  2271.93 tokens per second)
llama_print_timings: prompt eval time =   595.36 ms /    56 tokens (   10.63 ms per token,    94.06 tokens per second)
llama_print_timings:        eval time =  2084.27 ms /    25 runs   (   83.37 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  2729.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, I don't think it matters much. And how do you explain the sun going dark during Confucius's time?”



llama_print_timings:        load time =   599.45 ms
llama_print_timings:      sample time =    13.37 ms /    30 runs   (    0.45 ms per token,  2243.66 tokens per second)
llama_print_timings: prompt eval time =   599.42 ms /    53 tokens (   11.31 ms per token,    88.42 tokens per second)
llama_print_timings:        eval time =  2476.88 ms /    29 runs   (   85.41 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  3134.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That was a rare exception, probably a dark blotch or cloud of gas passing through the large hole in the outer shell of the Sun.



llama_print_timings:        load time =   622.46 ms
llama_print_timings:      sample time =    14.77 ms /    30 runs   (    0.49 ms per token,  2030.59 tokens per second)
llama_print_timings: prompt eval time =   622.42 ms /    60 tokens (   10.37 ms per token,    96.40 tokens per second)
llama_print_timings:        eval time =  2509.03 ms /    29 runs   (   86.52 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3197.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He pointed to the large bronze ball and asked, "This must be your cosmic model, right?"



llama_print_timings:        load time =   608.95 ms
llama_print_timings:      sample time =     9.92 ms /    22 runs   (    0.45 ms per token,  2218.41 tokens per second)
llama_print_timings: prompt eval time =   608.91 ms /    50 tokens (   12.18 ms per token,    82.11 tokens per second)
llama_print_timings:        eval time =  1781.50 ms /    21 runs   (   84.83 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  2433.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, I have created the Cosmic Machine. The complicated gears that rotate to simulate the effects of the external fire on the ball are the ones which embody the rule for how flames are distributed and move in the fire sea. This rule is what I have summarized after observing it for hundreds of years.”



llama_print_timings:        load time =   657.71 ms
llama_print_timings:      sample time =    30.05 ms /    67 runs   (    0.45 ms per token,  2229.39 tokens per second)
llama_print_timings: prompt eval time =   657.67 ms /    84 tokens (    7.83 ms per token,   127.72 tokens per second)
llama_print_timings:        eval time =  5466.30 ms /    66 runs   (   82.82 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =  6256.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Can this ball expand and contract?



llama_print_timings:        load time =   589.35 ms
llama_print_timings:      sample time =     3.54 ms /     8 runs   (    0.44 ms per token,  2256.70 tokens per second)
llama_print_timings: prompt eval time =   589.31 ms /    38 tokens (   15.51 ms per token,    64.48 tokens per second)
llama_print_timings:        eval time =   620.95 ms /     7 runs   (   88.71 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  1225.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, it is now contracting slowly.



llama_print_timings:        load time =   581.33 ms
llama_print_timings:      sample time =     5.16 ms /    11 runs   (    0.47 ms per token,  2131.78 tokens per second)
llama_print_timings: prompt eval time =   581.29 ms /    40 tokens (   14.53 ms per token,    68.81 tokens per second)
llama_print_timings:        eval time =   846.68 ms /    10 runs   (   84.67 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1450.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mu looked at the railing on the platform as a fixed reference point and saw that what Moxiu said was true.



llama_print_timings:        load time =   591.04 ms
llama_print_timings:      sample time =    12.49 ms /    28 runs   (    0.45 ms per token,  2241.97 tokens per second)
llama_print_timings: prompt eval time =   591.00 ms /    52 tokens (   11.37 ms per token,    87.99 tokens per second)
llama_print_timings:        eval time =  2293.41 ms /    27 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2938.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Does this ball have an inner shell?



llama_print_timings:        load time =   731.80 ms
llama_print_timings:      sample time =     6.47 ms /     9 runs   (    0.72 ms per token,  1391.68 tokens per second)
llama_print_timings: prompt eval time =   731.72 ms /    39 tokens (   18.76 ms per token,    53.30 tokens per second)
llama_print_timings:        eval time =   765.38 ms /     8 runs   (   95.67 ms per token,    10.45 tokens per second)
llama_print_timings:       total time =  1525.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, it is through a complex mechanism of interlocking cages to transmit power from the engine to the driveshaft.



llama_print_timings:        load time =   583.27 ms
llama_print_timings:      sample time =    16.91 ms /    28 runs   (    0.60 ms per token,  1656.12 tokens per second)
llama_print_timings: prompt eval time =   583.23 ms /    42 tokens (   13.89 ms per token,    72.01 tokens per second)
llama_print_timings:        eval time =  2414.73 ms /    27 runs   (   89.43 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  3074.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What a clever machine! " Wang Mo praised sincerely, "But why is there no holes in the shell to project laser beams from inside the inner casing?"



llama_print_timings:        load time =   632.46 ms
llama_print_timings:      sample time =    18.14 ms /    39 runs   (    0.47 ms per token,  2150.42 tokens per second)
llama_print_timings: prompt eval time =   632.42 ms /    68 tokens (    9.30 ms per token,   107.52 tokens per second)
llama_print_timings:        eval time =  3249.62 ms /    38 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3963.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I have installed a light source on the inner wall of the shell, which simulates a large hole. The light source is made from millions of fireflies' fluorescent materials, which emits cold light because the semi-transparent gypsum ball layer in the shell is not very conducive to heat diffusion, so that the recorder can stay in there for long periods without overheating.”



llama_print_timings:        load time =   686.79 ms
llama_print_timings:      sample time =    37.97 ms /    86 runs   (    0.44 ms per token,  2264.89 tokens per second)
llama_print_timings: prompt eval time =   686.75 ms /   113 tokens (    6.08 ms per token,   164.54 tokens per second)
llama_print_timings:        eval time =  7392.84 ms /    85 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  8248.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Is there still someone inside the ball?



llama_print_timings:        load time =   582.83 ms
llama_print_timings:      sample time =     4.08 ms /     9 runs   (    0.45 ms per token,  2208.05 tokens per second)
llama_print_timings: prompt eval time =   582.80 ms /    36 tokens (   16.19 ms per token,    61.77 tokens per second)
llama_print_timings:        eval time =   703.82 ms /     8 runs   (   87.98 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  1304.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, the clerk stands on a rack with a pulley at its bottom. Position隔一段固定的时间后，模拟宇宙 will start to simulate the universe at present state again, it can also simulate the running of the sun. Then that clerk record down everything he sees, and make a very accurate calendar which the past hundreds of civilizations had been waiting for. You are here at exactly the right moment. The simulation just started, a four-year period known as the Constantia would start, and Emperor Hwang has made the announcement according to my prediction, so let's wait for sunrise.



llama_print_timings:        load time =   772.61 ms
llama_print_timings:      sample time =    58.00 ms /   130 runs   (    0.45 ms per token,  2241.34 tokens per second)
llama_print_timings: prompt eval time =   772.56 ms /   154 tokens (    5.02 ms per token,   199.34 tokens per second)
llama_print_timings:        eval time = 11220.65 ms /   129 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 12251.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mozi activated the game interface, slightly speeding up time flow. The red sun rose over the horizon and the frozen lakes on the ground began to thaw. These frozen lakes originally covered by ice have turned into clear, sparkling mirrors, as if the earth was opening its countless eyes. In this high place, we can't see in detail those people gradually gathering along the lake banks, like spring ants coming out of their dens. The world has come to life again.



llama_print_timings:        load time =   759.25 ms
llama_print_timings:      sample time =    77.47 ms /   108 runs   (    0.72 ms per token,  1394.07 tokens per second)
llama_print_timings: prompt eval time =   759.21 ms /   145 tokens (    5.24 ms per token,   190.99 tokens per second)
llama_print_timings:        eval time = 10185.15 ms /   107 runs   (   95.19 ms per token,    10.51 tokens per second)
llama_print_timings:       total time = 11292.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Are you going to dive into this wonderful life?" Moxie pointed to the revived land below and said, "You have no reason to stay here anymore. The game has come to an end, and I am the victorious one."



llama_print_timings:        load time =   647.04 ms
llama_print_timings:      sample time =    23.14 ms /    52 runs   (    0.45 ms per token,  2247.19 tokens per second)
llama_print_timings: prompt eval time =   647.00 ms /    87 tokens (    7.44 ms per token,   134.47 tokens per second)
llama_print_timings:        eval time =  4270.65 ms /    51 runs   (   83.74 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  5022.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Your simulated universe is indeed ingenious, but as for the predictions it makes... Oh, may I use your telescope to observe celestial bodies?



llama_print_timings:        load time =   644.85 ms
llama_print_timings:      sample time =    15.07 ms /    33 runs   (    0.46 ms per token,  2190.36 tokens per second)
llama_print_timings: prompt eval time =   644.82 ms /    68 tokens (    9.48 ms per token,   105.46 tokens per second)
llama_print_timings:        eval time =  2733.94 ms /    32 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  3448.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sure, go ahead.



llama_print_timings:        load time =   594.22 ms
llama_print_timings:      sample time =     3.09 ms /     7 runs   (    0.44 ms per token,  2267.57 tokens per second)
llama_print_timings: prompt eval time =   594.18 ms /    48 tokens (   12.38 ms per token,    80.78 tokens per second)
llama_print_timings:        eval time =   507.50 ms /     6 runs   (   84.58 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1114.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He went to the telescope and immediately noticed the problem: “How can I observe the sun?”



llama_print_timings:        load time =   598.98 ms
llama_print_timings:      sample time =     9.51 ms /    21 runs   (    0.45 ms per token,  2209.13 tokens per second)
llama_print_timings: prompt eval time =   598.93 ms /    52 tokens (   11.52 ms per token,    86.82 tokens per second)
llama_print_timings:        eval time =  1690.14 ms /    20 runs   (   84.51 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  2332.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He took a black round slice from a wooden box and said, "Put this filter that has been smoked on to the eye piece of the telescope."



llama_print_timings:        load time =   627.27 ms
llama_print_timings:      sample time =    14.74 ms /    33 runs   (    0.45 ms per token,  2239.26 tokens per second)
llama_print_timings: prompt eval time =   627.23 ms /    67 tokens (    9.36 ms per token,   106.82 tokens per second)
llama_print_timings:        eval time =  2778.62 ms /    32 runs   (   86.83 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  3472.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao looked through the telescope at the sun that had risen to a certain altitude, and marveled at the imagination of Mozi. The sun did indeed seem like an opening into boundless fire sea, a smaller part of a larger existence. But when he took a closer look, however, he found that the sun was somewhat different from the one in his own practical experience; it had a small core, which if treated as an eye, looked more like a pupil with its thick and illusory outer layer. The outer layer seemed to lack substance and be made of gas, and as a result, it scattered the light emitted by the core more than any other part of the sun.



llama_print_timings:        load time =   834.37 ms
llama_print_timings:      sample time =    68.97 ms /   149 runs   (    0.46 ms per token,  2160.42 tokens per second)
llama_print_timings: prompt eval time =   834.32 ms /   191 tokens (    4.37 ms per token,   228.93 tokens per second)
llama_print_timings:        eval time = 13218.19 ms /   148 runs   (   89.31 ms per token,    11.20 tokens per second)
llama_print_timings:       total time = 14364.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The real and detailed image of the sun shocked Wang Meng, who again confirmed that the author of the game was hiding a vast amount of details in the simple surface images. The author may have hidden them as little treasures for players to discover.



llama_print_timings:        load time =   617.05 ms
llama_print_timings:      sample time =    23.29 ms /    52 runs   (    0.45 ms per token,  2233.20 tokens per second)
llama_print_timings: prompt eval time =   617.03 ms /    67 tokens (    9.21 ms per token,   108.58 tokens per second)
llama_print_timings:        eval time =  4371.64 ms /    51 runs   (   85.72 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  5093.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Moxi stood up straight, thinking about the implications of the sun's structure. He immediately became excited and adjusted his telescope to keep watching it until its descent to the horizon. The night came and stars began to appear in densely on the sky. Wang Moxi took off the black filter from his telescope and continued observing the stars, particularly the flying stars. After just a few moments of observation, the sun rose again. He then put back the filter and kept watching the sun…Wang Moxi thus continued to observe stars for more than ten days, enjoying the pleasure of discovering them. In fact, time's passage is beneficial to astronomical observation. Because this speeds up the movements and changes of celestial bodies.



llama_print_timings:        load time =   901.27 ms
llama_print_timings:      sample time =    74.35 ms /   164 runs   (    0.45 ms per token,  2205.78 tokens per second)
llama_print_timings: prompt eval time =   901.22 ms /   211 tokens (    4.27 ms per token,   234.13 tokens per second)
llama_print_timings:        eval time = 14439.99 ms /   163 runs   (   88.59 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 15681.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On the 17th day after the beginning of the Heng-jui, the sun was still not up and the earth was still in darkness. In the gold pyramid, a sea of people filled it to the brim. And thousands of torch lights waved in the cold wind.



llama_print_timings:        load time =   640.20 ms
llama_print_timings:      sample time =    29.90 ms /    64 runs   (    0.47 ms per token,  2140.75 tokens per second)
llama_print_timings: prompt eval time =   640.16 ms /    77 tokens (    8.31 ms per token,   120.28 tokens per second)
llama_print_timings:        eval time =  5476.85 ms /    63 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  6250.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“The sun may not come out, just like the end of civilization 137. ” Wang Mo said to Moxie, who was working on compiling the first-ever ancient calendar in this world.



llama_print_timings:        load time =   689.10 ms
llama_print_timings:      sample time =    20.50 ms /    46 runs   (    0.45 ms per token,  2244.01 tokens per second)
llama_print_timings: prompt eval time =   689.06 ms /   110 tokens (    6.26 ms per token,   159.64 tokens per second)
llama_print_timings:        eval time =  4046.14 ms /    45 runs   (   89.91 ms per token,    11.12 tokens per second)
llama_print_timings:       total time =  4826.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It seems to confirm what Mozi said, that the sunlight appeared at the horizon. There was a burst of cheers among the people by the pyramid.



llama_print_timings:        load time =   610.75 ms
llama_print_timings:      sample time =    16.10 ms /    35 runs   (    0.46 ms per token,  2174.45 tokens per second)
llama_print_timings: prompt eval time =   610.71 ms /    55 tokens (   11.10 ms per token,    90.06 tokens per second)
llama_print_timings:        eval time =  2916.09 ms /    34 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3598.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The silver-white dawn spreads faster than usual, as if to make up for lost time. Soon the dawn covers half of the sky and the sun hasn't yet risen. The ground is just like during the day with an equal amount of light. Wang Moxiong looked in the direction where the light appeared and found that a strong glow came from the horizon, bending upward to form a perfect arc of the visual field; soon he realized that it was not the horizon but the edge of the sun rising. His eyes adapting to the bright light, the horizon is still visible. On the horizon, black smoke is rising and appears very clear against the background of the bright sun. A fast horse flies from the direction of the sunrise with dirt flying up in a clear gray line. The crowd make way for it. Wang Moxiong hears the horse people shouting loudly: “Dehydration! Dehydration!



llama_print_timings:        load time =  1010.19 ms
llama_print_timings:      sample time =    92.60 ms /   205 runs   (    0.45 ms per token,  2213.85 tokens per second)
llama_print_timings: prompt eval time =  1010.14 ms /   262 tokens (    3.86 ms per token,   259.37 tokens per second)
llama_print_timings:        eval time = 18484.34 ms /   204 runs   (   90.61 ms per token,    11.04 tokens per second)
llama_print_timings:       total time = 19921.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You follow the horse, and a great number of cattle and other animals come with you. Their bodies are covered in flames, like a moving blanket over the ground. The Great Day has risen from the horizon to cover half of the sky. The earth seems to be slowly descending along a shimmering wall of bright light. You can clearly see the details on the surface of the sun, such as the surging waves and eddies of the ocean of flame, and black corpuscles like ghosts drifting along irregular routes. The solar winds are like golden sleeves spread out lazily, while the corona seems to be draped in gold.



llama_print_timings:        load time =   764.51 ms
llama_print_timings:      sample time =    64.11 ms /   143 runs   (    0.45 ms per token,  2230.61 tokens per second)
llama_print_timings: prompt eval time =   764.48 ms /   143 tokens (    5.35 ms per token,   187.06 tokens per second)
llama_print_timings:        eval time = 12416.42 ms /   142 runs   (   87.44 ms per token,    11.44 tokens per second)
llama_print_timings:       total time = 13472.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On the earth, both dehydrated and non-dehydrated people are burning up like thousands of charcoal blocks thrown into the furnace. Their flames are brighter than those in the furnace but soon they just extinguish out.



llama_print_timings:        load time =   657.91 ms
llama_print_timings:      sample time =    35.24 ms /    58 runs   (    0.61 ms per token,  1645.81 tokens per second)
llama_print_timings: prompt eval time =   657.87 ms /    74 tokens (    8.89 ms per token,   112.48 tokens per second)
llama_print_timings:        eval time =  5198.10 ms /    57 runs   (   91.19 ms per token,    10.97 tokens per second)
llama_print_timings:       total time =  6028.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The sun quickly rose and soon covered most of the sky. Wang Mu looked up and felt a strange change in his perception: He had been looking upwards, but now he felt like he was being thrown into the bright inferno below!



llama_print_timings:        load time =   764.34 ms
llama_print_timings:      sample time =    40.50 ms /    50 runs   (    0.81 ms per token,  1234.60 tokens per second)
llama_print_timings: prompt eval time =   764.29 ms /    97 tokens (    7.88 ms per token,   126.91 tokens per second)
llama_print_timings:        eval time =  4776.66 ms /    49 runs   (   97.48 ms per token,    10.26 tokens per second)
llama_print_timings:       total time =  5721.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The lakes on the earth began to evaporate, and mushroom-shaped clouds of snowy white water vapour rose up high in the sky and then spread out to cover the ashes of the humans by the lake.



llama_print_timings:        load time =   633.80 ms
llama_print_timings:      sample time =    24.76 ms /    51 runs   (    0.49 ms per token,  2059.52 tokens per second)
llama_print_timings: prompt eval time =   633.75 ms /    68 tokens (    9.32 ms per token,   107.30 tokens per second)
llama_print_timings:        eval time =  4388.64 ms /    50 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  5133.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I created the universe, and it will continue.



llama_print_timings:        load time =   622.87 ms
llama_print_timings:      sample time =     4.95 ms /    11 runs   (    0.45 ms per token,  2221.32 tokens per second)
llama_print_timings: prompt eval time =   622.85 ms /    57 tokens (   10.93 ms per token,    91.52 tokens per second)
llama_print_timings:        eval time =   897.82 ms /    10 runs   (   89.78 ms per token,    11.14 tokens per second)
llama_print_timings:       total time =  1542.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao turned to look, he heard a voice coming from the burning moxa. His body was contained in a tall orange-yellow fire column and his skin was fraying and carbonizing but his eyes were still emitting a completely different light with those who were consuming it. He held a fading cotton dust in his hands, which was the first volume of a 10,000-year calendar. Wang Miao also seemed to be burning himself, raising his arms and seeing two torches.



llama_print_timings:        load time =   713.01 ms
llama_print_timings:      sample time =    49.75 ms /   111 runs   (    0.45 ms per token,  2231.02 tokens per second)
llama_print_timings: prompt eval time =   712.97 ms /   128 tokens (    5.57 ms per token,   179.53 tokens per second)
llama_print_timings:        eval time =  9784.84 ms /   110 runs   (   88.95 ms per token,    11.24 tokens per second)
llama_print_timings:       total time = 10723.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"The sun is setting."



llama_print_timings:        load time =   829.30 ms
llama_print_timings:      sample time =     3.07 ms /     7 runs   (    0.44 ms per token,  2281.62 tokens per second)
llama_print_timings: prompt eval time =   829.25 ms /   185 tokens (    4.48 ms per token,   223.09 tokens per second)
llama_print_timings:        eval time =   529.39 ms /     6 runs   (   88.23 ms per token,    11.33 tokens per second)
llama_print_timings:       total time =  1371.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The 141st civilization was destroyed in flames. The civilization evolved to the Eastern Han level.



llama_print_timings:        load time =   596.77 ms
llama_print_timings:      sample time =    11.83 ms /    26 runs   (    0.46 ms per token,  2197.43 tokens per second)
llama_print_timings: prompt eval time =   596.73 ms /    50 tokens (   11.93 ms per token,    83.79 tokens per second)
llama_print_timings:        eval time =  2156.09 ms /    25 runs   (   86.24 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2806.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The seeds of civilization are still alive. They will restart and evolve in the Three-Body Universe again, welcoming your return.



llama_print_timings:        load time =   601.53 ms
llama_print_timings:      sample time =    14.15 ms /    31 runs   (    0.46 ms per token,  2190.81 tokens per second)
llama_print_timings: prompt eval time =   601.49 ms /    56 tokens (   10.74 ms per token,    93.10 tokens per second)
llama_print_timings:        eval time =  2607.48 ms /    30 runs   (   86.92 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  3272.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mu took off her VR goggles, and the shock of the mental realm was slightly alleviated. Suddenly, he felt that familiar sense once more: Three Body is deliberately concealed as a false reality, but its depths are true and vast; whereas the world in front of him seemed like an elaborate mural, full of complex details yet thin in substance and depth.



llama_print_timings:        load time =   671.59 ms
llama_print_timings:      sample time =    39.27 ms /    87 runs   (    0.45 ms per token,  2215.71 tokens per second)
llama_print_timings: prompt eval time =   671.54 ms /    95 tokens (    7.07 ms per token,   141.47 tokens per second)
llama_print_timings:        eval time =  7597.97 ms /    86 runs   (   88.35 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  8449.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He went to work at the Nano Center on the second day, except for the small turmoil caused by his absence yesterday. Everything went as usual. He discovered that working is an effective form of sedation and indulged himself in it to avoid those nightmarish troubles temporarily. All day long, he deliberately kept himself busy to avoid leaving the laboratory until dusk.



llama_print_timings:        load time =   672.97 ms
llama_print_timings:      sample time =    36.07 ms /    80 runs   (    0.45 ms per token,  2218.16 tokens per second)
llama_print_timings: prompt eval time =   672.94 ms /    95 tokens (    7.08 ms per token,   141.17 tokens per second)
llama_print_timings:        eval time =  6843.28 ms /    79 runs   (   86.62 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  7682.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After he stepped out of the building of the Nanocenter, Wang Miao was caught up in the same nightmare that he had felt before. The night sky was full of stars like a magnifying glass that covered everything and made him feel like an exposed bug hiding nowhere. He had to find something else to do for himself so he could visit Yang Dong's mother again. In his car, he drove to Ye Wenjie's house.



llama_print_timings:        load time =   706.10 ms
llama_print_timings:      sample time =    44.62 ms /    97 runs   (    0.46 ms per token,  2173.67 tokens per second)
llama_print_timings: prompt eval time =   706.08 ms /   108 tokens (    6.54 ms per token,   152.96 tokens per second)
llama_print_timings:        eval time =  8577.38 ms /    96 runs   (   89.35 ms per token,    11.19 tokens per second)
llama_print_timings:       total time =  9489.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mother Yang was alone at home when Wang Miao entered. He noticed that she had old and farsighted eyes while reading books, so he saw her glasses only when watching close objects. Mother Yang was glad to see Wang Miao, saying his complexion looked much better than last time.



llama_print_timings:        load time =   661.26 ms
llama_print_timings:      sample time =    28.81 ms /    63 runs   (    0.46 ms per token,  2186.51 tokens per second)
llama_print_timings: prompt eval time =   661.22 ms /    87 tokens (    7.60 ms per token,   131.57 tokens per second)
llama_print_timings:        eval time =  5387.55 ms /    62 runs   (   86.90 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  6181.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It’s all because of your ginseng.” Wang Mo smiled and said.



llama_print_timings:        load time =   587.56 ms
llama_print_timings:      sample time =     8.93 ms /    19 runs   (    0.47 ms per token,  2127.18 tokens per second)
llama_print_timings: prompt eval time =   587.52 ms /    45 tokens (   13.06 ms per token,    76.59 tokens per second)
llama_print_timings:        eval time =  1546.86 ms /    18 runs   (   85.94 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2173.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mother Yang shook her head. “That thing is not very good, back then there were lots of wild ginseng around the base that we could collect freely. I used to collect one so long… what happened? Now it’s said that no one lives there anymore.” She sighed sadly, “Lately, I have been reminiscing about the past more and more.”



llama_print_timings:        load time =   663.35 ms
llama_print_timings:      sample time =    36.34 ms /    82 runs   (    0.44 ms per token,  2256.47 tokens per second)
llama_print_timings: prompt eval time =   663.31 ms /    88 tokens (    7.54 ms per token,   132.67 tokens per second)
llama_print_timings:        eval time =  7063.91 ms /    81 runs   (   87.21 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  7892.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I heard that you had a lot of hardships during the Cultural Revolution.



llama_print_timings:        load time =   587.04 ms
llama_print_timings:      sample time =     7.22 ms /    16 runs   (    0.45 ms per token,  2215.15 tokens per second)
llama_print_timings: prompt eval time =   587.00 ms /    43 tokens (   13.65 ms per token,    73.25 tokens per second)
llama_print_timings:        eval time =  1287.44 ms /    15 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1906.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Listen to what Xiaosha has to say?” Yang mother gently waved her hand, as if wiping away a spider web in front of her, “It’s over. Nothing remains but a blur from the past……Xiao Sha called me yesterday, urgently but I couldn’t understand what she was saying; only that you must have encountered something. Xiaohuang, actually, as an elder person, you will realize that those big events you thought would collapse the sky were really not that serious.”



llama_print_timings:        load time =   689.57 ms
llama_print_timings:      sample time =    52.16 ms /   116 runs   (    0.45 ms per token,  2223.97 tokens per second)
llama_print_timings: prompt eval time =   689.52 ms /   113 tokens (    6.10 ms per token,   163.88 tokens per second)
llama_print_timings:        eval time = 10063.46 ms /   115 runs   (   87.51 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 10992.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thank you, Wang Miao said. He felt a rare warmth. Now, this old man with a calm and serene attitude, and the naive young history, had become the two pillars of his fragile mental world.



llama_print_timings:        load time =   652.49 ms
llama_print_timings:      sample time =    22.00 ms /    49 runs   (    0.45 ms per token,  2226.97 tokens per second)
llama_print_timings: prompt eval time =   652.47 ms /    83 tokens (    7.86 ms per token,   127.21 tokens per second)
llama_print_timings:        eval time =  4198.65 ms /    48 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  4951.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“When I was in the Cultural Revolution, ” she continued, “I am lucky to have stumbled upon a place where I could survive.”



llama_print_timings:        load time =   609.57 ms
llama_print_timings:      sample time =    14.17 ms /    32 runs   (    0.44 ms per token,  2258.29 tokens per second)
llama_print_timings: prompt eval time =   609.53 ms /    60 tokens (   10.16 ms per token,    98.44 tokens per second)
llama_print_timings:        eval time =  2650.39 ms /    31 runs   (   85.50 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  3324.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Are you talking about Red Bank base?"



llama_print_timings:        load time =   587.55 ms
llama_print_timings:      sample time =     4.44 ms /    10 runs   (    0.44 ms per token,  2250.73 tokens per second)
llama_print_timings: prompt eval time =   587.50 ms /    39 tokens (   15.06 ms per token,    66.38 tokens per second)
llama_print_timings:        eval time =   769.10 ms /     9 runs   (   85.46 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  1376.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mother Yang nodded.



llama_print_timings:        load time =   576.20 ms
llama_print_timings:      sample time =     3.12 ms /     7 runs   (    0.45 ms per token,  2245.03 tokens per second)
llama_print_timings: prompt eval time =   576.16 ms /    35 tokens (   16.46 ms per token,    60.75 tokens per second)
llama_print_timings:        eval time =   489.16 ms /     6 runs   (   81.53 ms per token,    12.27 tokens per second)
llama_print_timings:       total time =  1079.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's an incredible thing. I thought it was just a legend at first.



llama_print_timings:        load time =   589.27 ms
llama_print_timings:      sample time =     8.42 ms /    19 runs   (    0.44 ms per token,  2257.87 tokens per second)
llama_print_timings: prompt eval time =   589.23 ms /    46 tokens (   12.81 ms per token,    78.07 tokens per second)
llama_print_timings:        eval time =  1474.01 ms /    18 runs   (   81.89 ms per token,    12.21 tokens per second)
llama_print_timings:       total time =  2102.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's not a fairy tale, if you want to know, I will tell you the things that happened to me."



llama_print_timings:        load time =   590.86 ms
llama_print_timings:      sample time =    12.80 ms /    28 runs   (    0.46 ms per token,  2187.33 tokens per second)
llama_print_timings: prompt eval time =   590.84 ms /    46 tokens (   12.84 ms per token,    77.85 tokens per second)
llama_print_timings:        eval time =  2337.41 ms /    27 runs   (   86.57 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  2987.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Ms. Yang, I was just curious and it’s okay if you don’t want to discuss this with me.”



llama_print_timings:        load time =   604.11 ms
llama_print_timings:      sample time =    12.35 ms /    28 runs   (    0.44 ms per token,  2267.39 tokens per second)
llama_print_timings: prompt eval time =   604.06 ms /    55 tokens (   10.98 ms per token,    91.05 tokens per second)
llama_print_timings:        eval time =  2340.03 ms /    27 runs   (   86.67 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  3000.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, nothing much. Just thought I'd find someone to chat with since lately I've been wanting to talk to someone too.



llama_print_timings:        load time =   596.73 ms
llama_print_timings:      sample time =    13.35 ms /    30 runs   (    0.45 ms per token,  2247.02 tokens per second)
llama_print_timings: prompt eval time =   596.71 ms /    55 tokens (   10.85 ms per token,    92.17 tokens per second)
llama_print_timings:        eval time =  2532.56 ms /    29 runs   (   87.33 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3191.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You can go to the senior citizen's room and sit there. It is better to move around more than be lonely.



llama_print_timings:        load time =   594.82 ms
llama_print_timings:      sample time =    12.57 ms /    28 runs   (    0.45 ms per token,  2227.70 tokens per second)
llama_print_timings: prompt eval time =   594.78 ms /    51 tokens (   11.66 ms per token,    85.75 tokens per second)
llama_print_timings:        eval time =  2295.65 ms /    27 runs   (   85.02 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2947.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Those retired guys are mostly my colleagues in university, but they don't mix with me. Everyone likes to reminisce about the past, but everyone hopes people listen to him, and gets tired of what other people say.”



llama_print_timings:        load time =   659.61 ms
llama_print_timings:      sample time =    23.05 ms /    52 runs   (    0.44 ms per token,  2256.16 tokens per second)
llama_print_timings: prompt eval time =   659.57 ms /    87 tokens (    7.58 ms per token,   131.90 tokens per second)
llama_print_timings:        eval time =  4523.76 ms /    51 runs   (   88.70 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  5286.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's still a bit inconvenient, isn't it?”



llama_print_timings:        load time =   584.38 ms
llama_print_timings:      sample time =     7.45 ms /    17 runs   (    0.44 ms per token,  2280.96 tokens per second)
llama_print_timings: prompt eval time =   584.35 ms /    39 tokens (   14.98 ms per token,    66.74 tokens per second)
llama_print_timings:        eval time =  1404.91 ms /    16 runs   (   87.81 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  2022.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Well, it is still a secret. However, many people who have experienced it say so after the book was published. The person who wrote the book is very irresponsible. Not only did his intentions come first, but also many of the contents in the book were greatly exaggerated.”



llama_print_timings:        load time =   663.35 ms
llama_print_timings:      sample time =    28.21 ms /    62 runs   (    0.46 ms per token,  2197.49 tokens per second)
llama_print_timings: prompt eval time =   663.28 ms /    89 tokens (    7.45 ms per token,   134.18 tokens per second)
llama_print_timings:        eval time =  5159.69 ms /    61 runs   (   84.59 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  5952.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, Yang's mother told Wang Miao about the past that had not been buried.



llama_print_timings:        load time =   586.23 ms
llama_print_timings:      sample time =     8.94 ms /    20 runs   (    0.45 ms per token,  2238.14 tokens per second)
llama_print_timings: prompt eval time =   586.18 ms /    46 tokens (   12.74 ms per token,    78.47 tokens per second)
llama_print_timings:        eval time =  1594.97 ms /    19 runs   (   83.95 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  2223.01 ms


translated 99.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

When she first entered Red Bank Base, Ye Wenjie was not assigned any fixed job and only carried out some technical tasks under the supervision of a security member.



llama_print_timings:        load time =   604.72 ms
llama_print_timings:      sample time =    16.67 ms /    36 runs   (    0.46 ms per token,  2159.18 tokens per second)
llama_print_timings: prompt eval time =   604.69 ms /    60 tokens (   10.08 ms per token,    99.22 tokens per second)
llama_print_timings:        eval time =  3051.58 ms /    35 runs   (   87.19 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  3729.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Already in her second year of college, Ye Wenjie became familiar with her future research mentor. He told her that studying astrophysics without a good understanding of experimental technology and observing ability was futile. At least that is how it worked in China. She tended to agree with his opinion as she thought her father was too theoretical. Her professor was one of the pioneers of radio astronomy, and under his influence, Ye Wenjie developed a strong interest in radio astronomy, learning both the electronic engineering and computer engineering fields (note: in most institutions at that time, these two disciplines were combined). During her time as a graduate student, she worked alongside her professor to debug China's first small telescope for radio astronomy, accumulating valuable experience in this field. She never would have guessed that her knowledge of experimental technology and observing capabilities would come into play at the Red Bamboo Base.



llama_print_timings:        load time =   885.96 ms
llama_print_timings:      sample time =    88.20 ms /   197 runs   (    0.45 ms per token,  2233.51 tokens per second)
llama_print_timings: prompt eval time =   885.91 ms /   210 tokens (    4.22 ms per token,   237.04 tokens per second)
llama_print_timings:        eval time = 17624.62 ms /   196 runs   (   89.92 ms per token,    11.12 tokens per second)
llama_print_timings:       total time = 18905.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie initially worked in the launch department as a device maintenance and inspection technician. Soon, she became an indispensable technical backbone of the launch department, which caused her some confusion. She was the only non-military personnel who wore civilian clothes at the base, due to her identity. As a result, everyone around her kept distance from her and she had to fully immerse herself in her work in order to relieve her loneliness. But this didn't completely explain why she must easily replace technical workers with no engineering background or experience.



llama_print_timings:        load time =   717.31 ms
llama_print_timings:      sample time =    56.18 ms /   124 runs   (    0.45 ms per token,  2207.15 tokens per second)
llama_print_timings: prompt eval time =   717.28 ms /   124 tokens (    5.78 ms per token,   172.88 tokens per second)
llama_print_timings:        eval time = 10608.45 ms /   123 runs   (   86.25 ms per token,    11.59 tokens per second)
llama_print_timings:       total time = 11579.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She soon discovered some reasons. Contrary to what was visible on the surface, the base was manned by the best technical officers of the 20th Group Army, outstanding computer and electronic engineers whose talents she could never hope to catch up with in a lifetime. But due to its remote location and substandard conditions, there were no opportunities for technological advancement. Most of the people working there reduced their abilities to an unnecessary level while still expecting to make good work, hoping that their superiors would think: "This person tried their best but has such little ability and aptitude"



llama_print_timings:        load time =   842.52 ms
llama_print_timings:      sample time =    56.25 ms /   126 runs   (    0.45 ms per token,  2239.96 tokens per second)
llama_print_timings: prompt eval time =   842.48 ms /   196 tokens (    4.30 ms per token,   232.65 tokens per second)
llama_print_timings:        eval time = 11033.30 ms /   125 runs   (   88.27 ms per token,    11.33 tokens per second)
llama_print_timings:       total time = 12126.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving aside the fact that she had become a major contributor to base technology, the other reason for her success was that Red Base didn't have anything that looked truly advanced.



llama_print_timings:        load time =   652.27 ms
llama_print_timings:      sample time =    16.80 ms /    38 runs   (    0.44 ms per token,  2261.64 tokens per second)
llama_print_timings: prompt eval time =   652.23 ms /    87 tokens (    7.50 ms per token,   133.39 tokens per second)
llama_print_timings:        eval time =  3131.38 ms /    37 runs   (   84.63 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  3859.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After entering the base, Ye Wenjie worked mainly in the launching department. With time passing, the restrictions on her were gradually relaxed, and the monitoring officer who was accompanying her was dismissed, allowing her to access the majority of Red Base's structure. She could also read the corresponding technical materials. Of course, there are still some things that she is not allowed to access, such as computer control parts. These parts have been strictly prohibited from being approached by her. But Ye Wenjie later found that these parts were not as important as they had previously imagined. For example, the computer part of the launching department is three machines behind DJS130. Magnetic hearts are used for storing data and paper tape input devices are used. The longest uninterrupted hour count is under 15 hours. She also saw the aiming section of the Red Base system, which has a very low precision and may not be better than the sighting equipment of an artillery piece.



llama_print_timings:        load time =   853.58 ms
llama_print_timings:      sample time =    94.80 ms /   213 runs   (    0.45 ms per token,  2246.88 tokens per second)
llama_print_timings: prompt eval time =   853.53 ms /   192 tokens (    4.45 ms per token,   224.95 tokens per second)
llama_print_timings:        eval time = 18636.84 ms /   212 runs   (   87.91 ms per token,    11.38 tokens per second)
llama_print_timings:       total time = 19923.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On this day, Lai Bingsheng once again找到了 Ye Wenjie for a talk. In her eyes, Yang Hewning and Lai Zhicheng changed positions. In that era, as the highest technology leader, Yang Hewning's political status was not very high, leaving technology had no authority at all. Towards his subordinates, even to the guards on duty, he had to be careful and polite otherwise it would be an attitude problem in terms of ideology combined with politics and thought reformation. Therefore, when something went wrong in her work, Ye Wenjie became his only outlet to vent his frustration. But as Ye Wenjie become increasingly important in technical terms, Lai Bingsheng gradually changed his initial gruffness and coldness towards her and became more cordial.



llama_print_timings:        load time =   807.11 ms
llama_print_timings:      sample time =    79.59 ms /   178 runs   (    0.45 ms per token,  2236.43 tokens per second)
llama_print_timings: prompt eval time =   807.08 ms /   165 tokens (    4.89 ms per token,   204.44 tokens per second)
llama_print_timings:        eval time = 15709.43 ms /   177 runs   (   88.75 ms per token,    11.27 tokens per second)
llama_print_timings:       total time = 16880.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Now, you have a good understanding of the launch system, which is the attack component of Red Bank's system and its main part. What is your overall impression of this system?” Lei政委 asked, sitting on the cliff face at the base where they had arrived atop a high peak in their helicopter. The straight, sheer wall seemed to have no end in sight, but it was one of her favorites here now since she had grown accustomed to it.



llama_print_timings:        load time =   712.19 ms
llama_print_timings:      sample time =    45.33 ms /   102 runs   (    0.44 ms per token,  2250.12 tokens per second)
llama_print_timings: prompt eval time =   712.16 ms /   118 tokens (    6.04 ms per token,   165.69 tokens per second)
llama_print_timings:        eval time =  8789.30 ms /   101 runs   (   87.02 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  9704.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei政委's question, Ye Wenjie was somewhat uncomfortable. She only had to take care of equipment maintenance and repair, she did not know about the entire situation of the red sea, including its functions, attack targets, etc., and she could not attend any launches. She thought for a moment but couldn't find the right words.



llama_print_timings:        load time =   662.57 ms
llama_print_timings:      sample time =    35.38 ms /    77 runs   (    0.46 ms per token,  2176.62 tokens per second)
llama_print_timings: prompt eval time =   662.53 ms /    96 tokens (    6.90 ms per token,   144.90 tokens per second)
llama_print_timings:        eval time =  6518.85 ms /    76 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  7342.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Go ahead and say it,” said Lei政委 as he picked up a grass blade and began to play with it. “No problem.”



llama_print_timings:        load time =   599.02 ms
llama_print_timings:      sample time =    13.73 ms /    31 runs   (    0.44 ms per token,  2257.34 tokens per second)
llama_print_timings: prompt eval time =   598.98 ms /    50 tokens (   11.98 ms per token,    83.47 tokens per second)
llama_print_timings:        eval time =  2653.96 ms /    30 runs   (   88.47 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  3313.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's just a radio transmitter, after all.



llama_print_timings:        load time =   597.21 ms
llama_print_timings:      sample time =     5.99 ms /    13 runs   (    0.46 ms per token,  2172.10 tokens per second)
llama_print_timings: prompt eval time =   597.17 ms /    42 tokens (   14.22 ms per token,    70.33 tokens per second)
llama_print_timings:        eval time =  1052.58 ms /    12 runs   (   87.72 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  1676.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, it is a radio transmitter.” Lei政委 satisfied nodded. “Do you know microwave ovens?”



llama_print_timings:        load time =   619.69 ms
llama_print_timings:      sample time =    12.38 ms /    28 runs   (    0.44 ms per token,  2262.63 tokens per second)
llama_print_timings: prompt eval time =   619.65 ms /    62 tokens (    9.99 ms per token,   100.06 tokens per second)
llama_print_timings:        eval time =  2358.29 ms /    27 runs   (   87.34 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3034.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The luxury of the West, a microwave absorbed by food heated by heat effect produced. We imported a microwave oven from abroad in the research instit te where I used to work. After work, we also used it to bake dumplings and potatoes, which was very interesting: the inside is hot while the outside is still cold.” “Lei政委说着站了起来，来回踱步，他走得如此靠近悬崖边缘，让叶文洁十分紧张，“Red Bottom System is a microwave oven. Its heating target is the spacecraft of the enemy in space. Just reach an energy radiation of 0.1 to 1 watt per square centimeter, and the satellite communication, radar, navigation and other systems can be made to fail or burn the micro-wave electronic devices.”



llama_print_timings:        load time =   830.32 ms
llama_print_timings:      sample time =    80.79 ms /   181 runs   (    0.45 ms per token,  2240.29 tokens per second)
llama_print_timings: prompt eval time =   830.28 ms /   181 tokens (    4.59 ms per token,   218.00 tokens per second)
llama_print_timings:        eval time = 15935.90 ms /   180 runs   (   88.53 ms per token,    11.30 tokens per second)
llama_print_timings:       total time = 17135.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Red Bank system is not just a common radio, but it is not equivalent to its power. The most surprising thing for her is that its emission power is twenty-five gigawatts! This is far greater than the emission power of all communications and even much greater than the emission power of all radars. Red Bank System receives power from a large group of capacitors, because of which its transmission circuit has significant differences compared with the conventional ones. Lei Wenjie now understood the purpose of such a huge emission power, but immediately thought about a problem.



llama_print_timings:        load time =   761.31 ms
llama_print_timings:      sample time =    52.62 ms /   117 runs   (    0.45 ms per token,  2223.28 tokens per second)
llama_print_timings: prompt eval time =   761.28 ms /   131 tokens (    5.81 ms per token,   172.08 tokens per second)
llama_print_timings:        eval time = 10021.74 ms /   116 runs   (   86.39 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11022.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The radio waves emitted by the system seem to be modulated.



llama_print_timings:        load time =   588.11 ms
llama_print_timings:      sample time =     6.58 ms /    15 runs   (    0.44 ms per token,  2281.02 tokens per second)
llama_print_timings: prompt eval time =   588.06 ms /    43 tokens (   13.68 ms per token,    73.12 tokens per second)
llama_print_timings:        eval time =  1189.12 ms /    14 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1807.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, but this modulation is completely different from conventional radio communication and does not involve loading information. It breaks through the enemy's shielding protection with changing frequency and amplitude. Of course, it's still in experimental stages.



llama_print_timings:        load time =   671.75 ms
llama_print_timings:      sample time =    21.97 ms /    48 runs   (    0.46 ms per token,  2184.50 tokens per second)
llama_print_timings: prompt eval time =   671.71 ms /    72 tokens (    9.33 ms per token,   107.19 tokens per second)
llama_print_timings:        eval time =  4186.53 ms /    47 runs   (   89.08 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  4956.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie nodded, and all the other questions in her mind have now been answered.



llama_print_timings:        load time =   595.45 ms
llama_print_timings:      sample time =    10.61 ms /    23 runs   (    0.46 ms per token,  2167.56 tokens per second)
llama_print_timings: prompt eval time =   595.41 ms /    46 tokens (   12.94 ms per token,    77.26 tokens per second)
llama_print_timings:        eval time =  1887.91 ms /    22 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2530.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Recently, two target satellites were launched from Jiuquan and the attack test of Red Bank system carried out successfully. The target was destroyed completely, and the internal temperature of the satellite reached nearly a thousand degrees Celsius. The instruments and photography equipment on board were all destroyed. In the future, the Red Bank system can effectively attack enemy communication and reconnaissance satellites. KH8 and KH9, the American mainstay recon satellites, as well as lower orbit Suhmer reconnaissance satellites are especially vulnerable. Additionally, necessary capabilities to destroy the Soviet Progress space station and the American Space Lab planned for next year can also be demonstrated.”



llama_print_timings:        load time =   774.17 ms
llama_print_timings:      sample time =    64.20 ms /   144 runs   (    0.45 ms per token,  2242.82 tokens per second)
llama_print_timings: prompt eval time =   774.13 ms /   151 tokens (    5.13 ms per token,   195.06 tokens per second)
llama_print_timings:        eval time = 12524.90 ms /   143 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 13589.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Commander, what are you saying to her?” someone asked behind Ye Wenjie. She turned around and saw Yang Weining looking at Lei Jicheng with a stern expression in his eyes.



llama_print_timings:        load time =   608.05 ms
llama_print_timings:      sample time =    19.86 ms /    44 runs   (    0.45 ms per token,  2215.84 tokens per second)
llama_print_timings: prompt eval time =   608.03 ms /    64 tokens (    9.50 ms per token,   105.26 tokens per second)
llama_print_timings:        eval time =  3590.29 ms /    43 runs   (   83.50 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  4287.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei政委 dropped a sentence and turned to go. Yang Weining looked at Yao Wenjie with no words, followed him out as well, leaving Lei in charge of their own thing.



llama_print_timings:        load time =   620.76 ms
llama_print_timings:      sample time =    19.16 ms /    43 runs   (    0.45 ms per token,  2244.02 tokens per second)
llama_print_timings: prompt eval time =   620.72 ms /    70 tokens (    8.87 ms per token,   112.77 tokens per second)
llama_print_timings:        eval time =  3593.58 ms /    42 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4301.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“He brought me into the base.” Lei Wenjie thought with a sense of sorrow and concern for Lei Zhicheng. In the base, Lei Zhicheng has more power than Yang Weining. He decides the important affairs finally. But just now he hurriedly left with a serious look on his face, it is clear that he thinks in front of general engineer that something wrong happened, this may cause what kind of consequence for him? Watching Re Lei’s imposing back figure, Ye Wenjie felt a wave of gratitude in her heart toward him. For her, trust is an luxury good. Compared to Yang Weining, Lei Zhicheng is the real soldier in Ye Wenjie’s eyes, he has the qualities of a soldier- honest and straightforward, while Yang Weining is just an typical intellectual of this era who is timid and prudent. Although Ye Wenjie understands him, but their distance will become even more far apart with each passing day.



llama_print_timings:        load time =   925.44 ms
llama_print_timings:      sample time =   119.61 ms /   218 runs   (    0.55 ms per token,  1822.65 tokens per second)
llama_print_timings: prompt eval time =   925.39 ms /   233 tokens (    3.97 ms per token,   251.78 tokens per second)
llama_print_timings:        eval time = 19708.93 ms /   217 runs   (   90.82 ms per token,    11.01 tokens per second)
llama_print_timings:       total time = 21184.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The next day, Ye Wenjie was transferred from the launching department to the listening department. She had thought this was related to yesterday's incident, but when she got to the listening department, she realized that it resembled even more than Red Banks's core department. Although there were some overlaps in equipment systems between the two departments, like sharing the same antenna, the technology level of the listening department was a higher one than the launching department.



llama_print_timings:        load time =   701.52 ms
llama_print_timings:      sample time =    45.43 ms /   100 runs   (    0.45 ms per token,  2201.43 tokens per second)
llama_print_timings: prompt eval time =   701.49 ms /   114 tokens (    6.15 ms per token,   162.51 tokens per second)
llama_print_timings:        eval time =  8497.38 ms /    99 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  9407.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The listening station has an advanced electromagnetic wave sensitive receiver system, which amplifies the signals received from giant antennas using ruby-induced microwave gyroscope – for suppressing internal interference, it soaks the core of the receiver in liquid nitrogen at 269℃. This makes the system highly sensitive and can receive very weak signals. Ye Wenjie could not help but wonder how wonderful it would be to use this equipment to undertake astronomy research.



llama_print_timings:        load time =   759.68 ms
llama_print_timings:      sample time =    46.73 ms /   105 runs   (    0.45 ms per token,  2247.05 tokens per second)
llama_print_timings: prompt eval time =   759.63 ms /   144 tokens (    5.28 ms per token,   189.56 tokens per second)
llama_print_timings:        eval time =  9117.82 ms /   104 runs   (   87.67 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 10091.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the computer system of the monitor department was far more complex and sophisticated than that of the launching department, Ye Wenjie was surprised to find a row of cathode ray tube display screens, displaying code and running in real time. She had heard about this before from her colleagues, but now she actually saw it with her own eyes: programs could be edited and debugged directly on the screen using a keyboard. This made her realize that software technologies had advanced by leaps and bounds since she first started learning computers at university. In particular, she learned that there was a language called FORTRAN (note: one of the first generational computer programming languages) that allowed programmers to write programs using nearly natural language instead of machine code. Moreover, there was this thing called a database that allowed programmers to manipulate vast amounts of data as desired.



llama_print_timings:        load time =   885.15 ms
llama_print_timings:      sample time =    80.58 ms /   180 runs   (    0.45 ms per token,  2233.94 tokens per second)
llama_print_timings: prompt eval time =   885.10 ms /   214 tokens (    4.14 ms per token,   241.78 tokens per second)
llama_print_timings:        eval time = 15936.84 ms /   179 runs   (   89.03 ms per token,    11.23 tokens per second)
llama_print_timings:       total time = 17193.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Two days later, Lai Wenjie was asked to talk to Lei Bingmin in the listening section's computer room. In front of the green-lit computer monitors on this row, Yang Wening was sitting nearby but neither wanted to join their conversation and leave yet. It made her feel uncomfortable.



llama_print_timings:        load time =   683.54 ms
llama_print_timings:      sample time =    31.33 ms /    70 runs   (    0.45 ms per token,  2234.07 tokens per second)
llama_print_timings: prompt eval time =   683.51 ms /    95 tokens (    7.19 ms per token,   138.99 tokens per second)
llama_print_timings:        eval time =  5877.06 ms /    69 runs   (   85.17 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  6704.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lieutenant Ma said, “Now I’m going to explain the operation of the listening branch. In short, it is to monitor enemy activities in space including eavesdropping on communications between their satellites and ground control stations, tracking the orbits of the enemy satellites with my navigation branch, and providing information for Red Bunker’s operations.”



llama_print_timings:        load time =   686.04 ms
llama_print_timings:      sample time =    35.48 ms /    78 runs   (    0.45 ms per token,  2198.55 tokens per second)
llama_print_timings: prompt eval time =   685.99 ms /   107 tokens (    6.41 ms per token,   155.98 tokens per second)
llama_print_timings:        eval time =  6621.34 ms /    77 runs   (   85.99 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  7471.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Wenning butted in, saying, “Lei政委, I don’t think you are doing this right. It is not necessary to say these words.”



llama_print_timings:        load time =   593.33 ms
llama_print_timings:      sample time =    16.20 ms /    37 runs   (    0.44 ms per token,  2283.53 tokens per second)
llama_print_timings: prompt eval time =   593.29 ms /    52 tokens (   11.41 ms per token,    87.65 tokens per second)
llama_print_timings:        eval time =  3091.23 ms /    36 runs   (   85.87 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3757.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie looked at Yang Weining, who was sitting nearby. With a nervous tone, she said, “Commander. If it is not suitable for me to ask you anything about the plan, please let me know.”



llama_print_timings:        load time =   598.93 ms
llama_print_timings:      sample time =    23.52 ms /    51 runs   (    0.46 ms per token,  2168.00 tokens per second)
llama_print_timings: prompt eval time =   598.89 ms /    55 tokens (   10.89 ms per token,    91.84 tokens per second)
llama_print_timings:        eval time =  4292.45 ms /    50 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  5001.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, no,” said Liang政委 as he raised his hand to stop Yvonne from going on. Turning to Yang Huiyin, he continued, “June-Way, the words still stand, in order to work out Ye Wenjie’s full potential and make sure she understands what she needs to know.”



llama_print_timings:        load time =   671.26 ms
llama_print_timings:      sample time =    33.80 ms /    74 runs   (    0.46 ms per token,  2189.61 tokens per second)
llama_print_timings: prompt eval time =   671.22 ms /    82 tokens (    8.19 ms per token,   122.17 tokens per second)
llama_print_timings:        eval time =  6239.22 ms /    73 runs   (   85.47 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  7066.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining stood up, “I need to report to my supervisor!”



llama_print_timings:        load time =   576.51 ms
llama_print_timings:      sample time =     7.91 ms /    18 runs   (    0.44 ms per token,  2275.60 tokens per second)
llama_print_timings: prompt eval time =   576.48 ms /    43 tokens (   13.41 ms per token,    74.59 tokens per second)
llama_print_timings:        eval time =  1446.29 ms /    17 runs   (   85.08 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2057.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This is certainly your right, but please don't worry, Yang general. I will be fully responsible for this."



llama_print_timings:        load time =   594.38 ms
llama_print_timings:      sample time =    12.21 ms /    26 runs   (    0.47 ms per token,  2129.40 tokens per second)
llama_print_timings: prompt eval time =   594.34 ms /    57 tokens (   10.43 ms per token,    95.90 tokens per second)
llama_print_timings:        eval time =  2155.53 ms /    25 runs   (   86.22 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2806.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining rose up with a slumped back.



llama_print_timings:        load time =   576.98 ms
llama_print_timings:      sample time =     6.32 ms /    14 runs   (    0.45 ms per token,  2214.14 tokens per second)
llama_print_timings: prompt eval time =   576.94 ms /    39 tokens (   14.79 ms per token,    67.60 tokens per second)
llama_print_timings:        eval time =  1112.23 ms /    13 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1717.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Don't worry, Yang is like this. He is very cautious and sometimes he can't work freely.” Re Lei smiled and shook his head. “To bring you here, the initial purpose was simple: The Red Beach Listening System is frequently influenced by electromagnetic radiation produced by solar flares and sunspots activities. We unexpectedly saw your paper on researching solar activities in China, which showed the most accurate prediction model. So, we decided to let you help solve this problem. But after you came here, you showed very strong working ability in technical work. Therefore, we decided that you would take on more and more important works. I am so planning: Let you first go to the launching section, then the listening section, have a comprehensive understanding of the red base system, and what kind of work you will be assigned, we can study later. Of course, you also saw that this has resistance, but I trust you personally. Ye Wenjie, here, I need to clarify something: Until now, it is just


llama_print_timings:        load time =  1049.59 ms
llama_print_timings:      sample time =   108.33 ms /   243 runs   (    0.45 ms per token,  2243.17 tokens per second)
llama_print_timings: prompt eval time =  1049.54 ms /   290 tokens (    3.62 ms per token,   276.31 tokens per second)
llama_print_timings:        eval time = 21359.11 ms /   242 runs   (   88.26 ms per token,    11.33 tokens per second)
llama_print_timings:       total time = 22911.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

LeiMingJun said, "I must leave now. Goodbye!" and stood up and walked away with his military steps. YueWenJie's eyes brimmed over with tears. Through the tears, the code on the screen became a swarm of flames. It was the first time she had wept since her father died.



llama_print_timings:        load time =   640.53 ms
llama_print_timings:      sample time =    34.84 ms /    76 runs   (    0.46 ms per token,  2181.59 tokens per second)
llama_print_timings: prompt eval time =   640.49 ms /    77 tokens (    8.32 ms per token,   120.22 tokens per second)
llama_print_timings:        eval time =  6296.02 ms /    75 runs   (   83.95 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  7097.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As Yvonne Leung got familiar with the job of the listening department, she soon realized that it was not as smooth as it had been in the launching division. She was way behind on her computer knowledge and had to start learning most software technologies from scratch. Despite Rear Major Ren's trust, she was restricted from accessing databases but could view source code.



llama_print_timings:        load time =   661.78 ms
llama_print_timings:      sample time =    34.16 ms /    77 runs   (    0.44 ms per token,  2254.03 tokens per second)
llama_print_timings: prompt eval time =   661.75 ms /   100 tokens (    6.62 ms per token,   151.11 tokens per second)
llama_print_timings:        eval time =  6520.11 ms /    76 runs   (   85.79 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  7338.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a result, she began to feel that the project was more complex than she had originally thought.



llama_print_timings:        load time =   668.64 ms
llama_print_timings:      sample time =     9.62 ms /    21 runs   (    0.46 ms per token,  2183.41 tokens per second)
llama_print_timings: prompt eval time =   668.60 ms /   108 tokens (    6.19 ms per token,   161.53 tokens per second)
llama_print_timings:        eval time =  1738.69 ms /    20 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2451.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The listening system received a series of noteworthy information, which was interpreted by the computer and discovered is a few satellite photos, very blurry. Sent to the General Staff Map Bureau for interpretation. They discovered that several important targets in our country were shown in the photos, including the Qingdao naval port and several Three-line Key Enterprise in the country. After analyzing, they confirmed that these photos were taken by the HK9 reconnaissance satellite of the United States. The first KH9 was just completed for the test launch, which was mainly used to recycle the intel via film reel. But it also was trying out a more advanced radio digital transfer trial, due to the unperfect technology, the transmission frequency was relatively low, so much information was leaked and could be received by the Red Branch. Since it was a test, the encryption level was lower and could be cracked, which was the most important monitoring target for understanding the United Sta


llama_print_timings:        load time =   869.16 ms
llama_print_timings:      sample time =   142.87 ms /   244 runs   (    0.59 ms per token,  1707.82 tokens per second)
llama_print_timings: prompt eval time =   869.11 ms /   208 tokens (    4.18 ms per token,   239.32 tokens per second)
llama_print_timings:        eval time = 22055.26 ms /   243 runs   (   90.76 ms per token,    11.02 tokens per second)
llama_print_timings:       total time = 23594.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Another thing shocked her, though she was in the listening section. Despite being in the listening department, some things still required her to do. Once, by accident, she saw the frequency setups of future launches and discovered that while the confirmed launch frequencies for the next three launches (304, 318 and 325) were below microwave range, they would not produce any heat effect on their targets.



llama_print_timings:        load time =   688.67 ms
llama_print_timings:      sample time =    42.11 ms /    93 runs   (    0.45 ms per token,  2208.66 tokens per second)
llama_print_timings: prompt eval time =   688.64 ms /   103 tokens (    6.69 ms per token,   149.57 tokens per second)
llama_print_timings:        eval time =  7843.10 ms /    92 runs   (   85.25 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  8726.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This day, someone informed Ye Wenjie to go to the headquarters office of the base. From the officer's tone and facial expression, Ye Wenjie felt something was wrong.



llama_print_timings:        load time =   612.47 ms
llama_print_timings:      sample time =    19.51 ms /    42 runs   (    0.46 ms per token,  2152.30 tokens per second)
llama_print_timings: prompt eval time =   612.43 ms /    63 tokens (    9.72 ms per token,   102.87 tokens per second)
llama_print_timings:        eval time =  3519.06 ms /    41 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  4222.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As she entered the office, a familiar scene appeared: The base's main leaders were all present, along with two officers who seemed to be higher-ranking people from another department. She felt a sharp sensation in her back. The ice-cold gazes of everyone in the room focused on her, and although she had experienced many ups and downs over the years, her instinct told her that today's unlucky person wasn't her; at most, she would just be a sacrificial lamb. Seeing Li Rizheng政委 sitting in a corner with a grim expression, she thought to herself: He will definitely have to pay for my trust. This was the first thought that came to her mind when she saw the scene. In an instant, she decided not to involve Li Rizheng and to take responsibility for it herself, even if it meant lying. But she did not expect the first person to speak would be Li Rizheng himself, and his words completely taken her by surprise.



llama_print_timings:        load time =   800.28 ms
llama_print_timings:      sample time =    95.91 ms /   214 runs   (    0.45 ms per token,  2231.33 tokens per second)
llama_print_timings: prompt eval time =   800.23 ms /   176 tokens (    4.55 ms per token,   219.94 tokens per second)
llama_print_timings:        eval time = 18364.64 ms /   213 runs   (   86.22 ms per token,    11.60 tokens per second)
llama_print_timings:       total time = 19614.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining nodded gravely, “To better fulfill your role at Red Bank Base, after repeated consultation with his superior, Yang Zongying asked me to let you know the true situation of the project.”



llama_print_timings:        load time =   764.05 ms
llama_print_timings:      sample time =    21.31 ms /    47 runs   (    0.45 ms per token,  2205.74 tokens per second)
llama_print_timings: prompt eval time =   764.02 ms /   145 tokens (    5.27 ms per token,   189.79 tokens per second)
llama_print_timings:        eval time =  3795.69 ms /    46 runs   (   82.52 ms per token,    12.12 tokens per second)
llama_print_timings:       total time =  4660.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After a long time, Ye Wenjie finally understood the meaning of Lei Bing's words: he had been deceiving her!



llama_print_timings:        load time =   583.51 ms
llama_print_timings:      sample time =    13.92 ms /    31 runs   (    0.45 ms per token,  2227.17 tokens per second)
llama_print_timings: prompt eval time =   583.47 ms /    51 tokens (   11.44 ms per token,    87.41 tokens per second)
llama_print_timings:        eval time =  2524.90 ms /    30 runs   (   84.16 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  3173.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I hope you will cherish this opportunity,” said Lei政委 sharply. “Work hard and make an achievement. From now on, you are only allowed to behave yourself in the base. Any anti-government behaviors will be punished severely.”



llama_print_timings:        load time =   733.24 ms
llama_print_timings:      sample time =    25.92 ms /    57 runs   (    0.45 ms per token,  2199.24 tokens per second)
llama_print_timings: prompt eval time =   733.19 ms /   133 tokens (    5.51 ms per token,   181.40 tokens per second)
llama_print_timings:        eval time =  4881.35 ms /    56 runs   (   87.17 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  5733.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“If you disagree, it is not too late,” said Yang Weining. Ye Wenjie knew the weight of these words and understood why he had been so anxious to see her in recent days. In order for her to show off her talents at base, she must know the true nature of Red Bottom, but this also means that the last hope she has to stay at Radar Peak will disappear.



llama_print_timings:        load time =   680.71 ms
llama_print_timings:      sample time =    39.34 ms /    88 runs   (    0.45 ms per token,  2237.19 tokens per second)
llama_print_timings: prompt eval time =   680.66 ms /   113 tokens (    6.02 ms per token,   166.01 tokens per second)
llama_print_timings:        eval time =  7511.76 ms /    87 runs   (   86.34 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  8373.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I agree.” Ye Wenjie said gently but firmly.



llama_print_timings:        load time =   576.04 ms
llama_print_timings:      sample time =     7.57 ms /    17 runs   (    0.45 ms per token,  2244.82 tokens per second)
llama_print_timings: prompt eval time =   576.00 ms /    44 tokens (   13.09 ms per token,    76.39 tokens per second)
llama_print_timings:        eval time =  1315.83 ms /    16 runs   (   82.24 ms per token,    12.16 tokens per second)
llama_print_timings:       total time =  1925.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, in the twilight of this early summer, amidst the roaring noise of huge antennas and distant pine trees, Yang Weining told Ye Wenjie about the true Hong Bang Project, an even more unbelievable era myth than Lei Zhicheng's lies.



llama_print_timings:        load time =   641.77 ms
llama_print_timings:      sample time =    28.50 ms /    64 runs   (    0.45 ms per token,  2245.30 tokens per second)
llama_print_timings: prompt eval time =   641.74 ms /    86 tokens (    7.46 ms per token,   134.01 tokens per second)
llama_print_timings:        eval time =  5346.08 ms /    63 runs   (   84.86 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6116.61 ms


translated 46.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Why does the Red Harbor project have such a high level of confidentiality?” asked Wang Meng, after listening to Ye Wenjie's explanation.



llama_print_timings:        load time =   623.21 ms
llama_print_timings:      sample time =    16.20 ms /    36 runs   (    0.45 ms per token,  2221.95 tokens per second)
llama_print_timings: prompt eval time =   623.18 ms /    80 tokens (    7.79 ms per token,   128.37 tokens per second)
llama_print_timings:        eval time =  3019.70 ms /    35 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3714.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In fact, this issue was raised in the early stages of Red Bank Project and has been continued to be discussed until the final stage. Now that you have an answer, we can only admire the superforecasting of the highest decision-makers of the project.



llama_print_timings:        load time =   620.81 ms
llama_print_timings:      sample time =    25.04 ms /    56 runs   (    0.45 ms per token,  2236.33 tokens per second)
llama_print_timings: prompt eval time =   620.77 ms /    74 tokens (    8.39 ms per token,   119.21 tokens per second)
llama_print_timings:        eval time =  4802.53 ms /    55 runs   (   87.32 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  5536.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Yes, very advanced." Wang Miao shook her head and pointed to the words with a deep sense of pride.



llama_print_timings:        load time =   579.63 ms
llama_print_timings:      sample time =    12.00 ms /    25 runs   (    0.48 ms per token,  2084.03 tokens per second)
llama_print_timings: prompt eval time =   579.59 ms /    48 tokens (   12.07 ms per token,    82.82 tokens per second)
llama_print_timings:        eval time =  2074.45 ms /    24 runs   (   86.44 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2707.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a serious topic, the impact of contact with extraterrestrial civilizations on human society has been systematically studied for only two years. But this research is rising sharply and gives astonishing conclusions. Previous innocent idealistic hopes have collapsed as scholars discover that the effect of contact on human culture will not be integration but disintegration, and conflicts between different cultures will not be eliminated but intensified. In short, once contact takes place, the gap between internal differences within earth civilizations will become much wider, with potentially catastrophic effects. The most astonishing conclusion is that this effect has nothing to do with contact's degree and form (one-way or two-way) or the form and evolution of the extraterrestrial civilization in question!



llama_print_timings:        load time =   829.62 ms
llama_print_timings:      sample time =    73.98 ms /   165 runs   (    0.45 ms per token,  2230.18 tokens per second)
llama_print_timings: prompt eval time =   829.57 ms /   189 tokens (    4.39 ms per token,   227.83 tokens per second)
llama_print_timings:        eval time = 14309.73 ms /   164 runs   (   87.25 ms per token,    11.46 tokens per second)
llama_print_timings:       total time = 15478.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$This is the theory of contact symbols proposed by social science researcher Bill Matthew in his book “The Ten Thousand Light Years Iron Curtain: SETI Sociology.” Matthew suggests that a symbolic or metaphorical contact with alien civilizations will have an equal impact on human society as any substantial content. In other words, a contact, which is simply a sign of the existence of alien civilizations but lacks any tangible meaning, would have the same effect as the actual content. If such a metaphorical contact were to be monopolized by a country or a political force, its significance would be equal to that of economic and military power.



llama_print_timings:        load time =   779.24 ms
llama_print_timings:      sample time =    62.38 ms /   140 runs   (    0.45 ms per token,  2244.17 tokens per second)
llama_print_timings: prompt eval time =   779.21 ms /   158 tokens (    4.93 ms per token,   202.77 tokens per second)
llama_print_timings:        eval time = 12115.43 ms /   139 runs   (   87.16 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 13176.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“And what about the Red Bank project?” Wang Mu asked.



llama_print_timings:        load time =   579.70 ms
llama_print_timings:      sample time =     6.21 ms /    14 runs   (    0.44 ms per token,  2254.79 tokens per second)
llama_print_timings: prompt eval time =   579.66 ms /    45 tokens (   12.88 ms per token,    77.63 tokens per second)
llama_print_timings:        eval time =  1117.33 ms /    13 runs   (   85.95 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  1724.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You should be able to think of it.



llama_print_timings:        load time =   576.95 ms
llama_print_timings:      sample time =     4.48 ms /    10 runs   (    0.45 ms per token,  2234.14 tokens per second)
llama_print_timings: prompt eval time =   576.91 ms /    36 tokens (   16.03 ms per token,    62.40 tokens per second)
llama_print_timings:        eval time =   771.11 ms /     9 runs   (   85.68 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  1367.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Moxue nodded, of course he knew that if the Red Cliff succeeded, the world would not be as it is today. But he still said comforting words: “It's hard to say whether the Red Cliff will succeed or not now; the signal sent by the Red Cliff hasn't travelled very far in the universe yet.”



llama_print_timings:        load time =   675.58 ms
llama_print_timings:      sample time =    36.13 ms /    78 runs   (    0.46 ms per token,  2158.93 tokens per second)
llama_print_timings: prompt eval time =   675.54 ms /    89 tokens (    7.59 ms per token,   131.75 tokens per second)
llama_print_timings:        eval time =  6789.36 ms /    77 runs   (   88.17 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  7629.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$叶文洁 shakes her head: "The signal strength of the electromagnetic waves weakens with distance, and there are too many interferences in space. It's very unlikely for any extraterrestrial civilization to receive our radio signals."



llama_print_timings:        load time =  1034.15 ms
llama_print_timings:      sample time =    24.71 ms /    54 runs   (    0.46 ms per token,  2185.53 tokens per second)
llama_print_timings: prompt eval time =  1034.11 ms /   288 tokens (    3.59 ms per token,   278.50 tokens per second)
llama_print_timings:        eval time =  4620.69 ms /    53 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  5769.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But if the Soviet man had thought of a civilization of type II and type III, we would have been able to hear their voices.



llama_print_timings:        load time =   597.18 ms
llama_print_timings:      sample time =    12.85 ms /    29 runs   (    0.44 ms per token,  2256.46 tokens per second)
llama_print_timings: prompt eval time =   597.15 ms /    58 tokens (   10.30 ms per token,    97.13 tokens per second)
llama_print_timings:        eval time =  2301.23 ms /    28 runs   (   82.19 ms per token,    12.17 tokens per second)
llama_print_timings:       total time =  2956.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

For the past 20 years, we have never heard of anything about Red Bank.



llama_print_timings:        load time =   574.70 ms
llama_print_timings:      sample time =     8.49 ms /    19 runs   (    0.45 ms per token,  2237.14 tokens per second)
llama_print_timings: prompt eval time =   574.66 ms /    43 tokens (   13.36 ms per token,    74.83 tokens per second)
llama_print_timings:        eval time =  1555.08 ms /    18 runs   (   86.39 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2167.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Yes, it's like that with Red Bank and SETI. But what if everything we've been doing ultimately proves to be a waste of time and energy? Maybe the only intelligent life in the universe is us.”



llama_print_timings:        load time =   601.73 ms
llama_print_timings:      sample time =    21.56 ms /    48 runs   (    0.45 ms per token,  2226.55 tokens per second)
llama_print_timings: prompt eval time =   601.70 ms /    58 tokens (   10.37 ms per token,    96.39 tokens per second)
llama_print_timings:        eval time =  3975.77 ms /    47 runs   (   84.59 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  4675.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In theory, this may be an unsolved mystery,” she sighed softly. “But in terms of feeling, I, as well as everyone who has experienced Red Coast, recognizes this.”



llama_print_timings:        load time =   625.76 ms
llama_print_timings:      sample time =    19.40 ms /    43 runs   (    0.45 ms per token,  2216.15 tokens per second)
llama_print_timings: prompt eval time =   625.72 ms /    70 tokens (    8.94 ms per token,   111.87 tokens per second)
llama_print_timings:        eval time =  3615.07 ms /    42 runs   (   86.07 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  4329.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's a real shame that the Hongba Project has been cancelled. It would have been a great project to operate, it was such a noble undertaking!



llama_print_timings:        load time =   598.20 ms
llama_print_timings:      sample time =    15.72 ms /    35 runs   (    0.45 ms per token,  2226.18 tokens per second)
llama_print_timings: prompt eval time =   598.16 ms /    54 tokens (   11.08 ms per token,    90.28 tokens per second)
llama_print_timings:        eval time =  2923.76 ms /    34 runs   (   85.99 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3594.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Red Bottom is gradually declining. In the early 1980s, it had undergone a large-scale reformation, mainly upgrading computer systems for launch and monitor. The launch system implemented automatic operation. Monitoring systems introduced two IBM medium computers, data processing capacity increased greatly. It can monitor up to 40,000 channels simultaneously. But later, as people's vision widened, it became clear that the exploration of extra-terrestrial civilizations was difficult, and the upper command gradually lost interest in the Red Bottom project. The first changes were seen in the reduction of the base's confidential level, which was generally considered excessive at a high level. At this time, the base's garrison from a regiment reduced to a company, and then to one five-man guard team, and these changes began in the reform after that.”



llama_print_timings:        load time =   882.83 ms
llama_print_timings:      sample time =    85.16 ms /   190 runs   (    0.45 ms per token,  2231.15 tokens per second)
llama_print_timings: prompt eval time =   882.76 ms /   216 tokens (    4.09 ms per token,   244.69 tokens per second)
llama_print_timings:        eval time = 16659.30 ms /   189 runs   (   88.14 ms per token,    11.35 tokens per second)
llama_print_timings:       total time = 17933.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Many of your achievements were made during that time.”



llama_print_timings:        load time =   586.68 ms
llama_print_timings:      sample time =     5.73 ms /    13 runs   (    0.44 ms per token,  2269.16 tokens per second)
llama_print_timings: prompt eval time =   586.65 ms /    40 tokens (   14.67 ms per token,    68.18 tokens per second)
llama_print_timings:        eval time =  1042.64 ms /    12 runs   (   86.89 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1654.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Red Bank System originally served as the base for some astronomical observation projects. At that time, it was the largest radio telescope in China. Later, with the establishment of other astronomical observatory bases, Red Bank's research mainly focused on observing and analyzing solar electromagnetic activities, and it also added a solar telescope to achieve this. We built a leading mathematical model for studying solar electromagnetic activities at that time. With these studies and results, Red Bank made considerable investment. Indeed, much of the return is due to Li Rongbang. Of course, he has his own motives. At that time, he discovered that working in technical services was not promising, as he had once been a student of astronomy. So he wanted to go back to science and research. Red Bank introduced projects on exploring extrasolar civilizations outside of the project, which were mainly achieved under Li Rongbang's efforts.



llama_print_timings:        load time =   839.89 ms
llama_print_timings:      sample time =    88.55 ms /   198 runs   (    0.45 ms per token,  2236.10 tokens per second)
llama_print_timings: prompt eval time =   839.85 ms /   198 tokens (    4.24 ms per token,   235.76 tokens per second)
llama_print_timings:        eval time = 17452.52 ms /   197 runs   (   88.59 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 18702.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It’s not that easy to go back to professionalism? At that time, I thought he was more likely to take credit for your achievements than you were?”



llama_print_timings:        load time =   621.05 ms
llama_print_timings:      sample time =    15.84 ms /    36 runs   (    0.44 ms per token,  2272.87 tokens per second)
llama_print_timings: prompt eval time =   621.00 ms /    58 tokens (   10.71 ms per token,    93.40 tokens per second)
llama_print_timings:        eval time =  3019.46 ms /    35 runs   (   86.27 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3712.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie smiled broadly with a tolerant attitude. “Without Lao Le, Red Bank base would have been closed a long time ago. After the base was moved from military control to civilian control, the military simply gave up on it and the Chinese Academy of Sciences couldn't maintain its operation expenses, so everything ended there.”



llama_print_timings:        load time =   646.78 ms
llama_print_timings:      sample time =    33.68 ms /    75 runs   (    0.45 ms per token,  2227.11 tokens per second)
llama_print_timings: prompt eval time =   646.74 ms /    81 tokens (    7.98 ms per token,   125.24 tokens per second)
llama_print_timings:        eval time =  6215.57 ms /    74 runs   (   83.99 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  7014.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie didn't talk much about her life at Hongbaishan base. Wang Mu didn't ask. After entering the base for four years, she formed a family with Yang Weining and everything happened naturally. Later on in an accident of the base, both Yang Weining and Lei Zhicheng perished, but Yang Dong as the only child was born subsequently. They mother and daughter left Hongbaishan base until the late 1980s when the base finally closed down. She later became a professor at her alma mater teaching astrophysics, and retiring. Everything about Lei Wenjie's life in Hongbaishan base was told to Wang Mu by Shang Shari Mountain at Mimianyanbase.



llama_print_timings:        load time =   771.72 ms
llama_print_timings:      sample time =    73.35 ms /   163 runs   (    0.45 ms per token,  2222.13 tokens per second)
llama_print_timings: prompt eval time =   771.69 ms /   157 tokens (    4.92 ms per token,   203.45 tokens per second)
llama_print_timings:        eval time = 14017.84 ms /   162 runs   (   86.53 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 15127.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Exploring extraterrestrial civilizations is a very special discipline and it has a great influence on the researcher’s life,” said Ye Wenjie, who sounded like she was telling stories to children. “Lying in bed at night listening to the sound of the universe without any living creature—that faint noise, like those stars are still alive but also eternal: sometimes it feels like that sound is the endless cold wind in winter in Dahongshan Mountain and I feel so cold!”



llama_print_timings:        load time =   721.04 ms
llama_print_timings:      sample time =    48.67 ms /   109 runs   (    0.45 ms per token,  2239.43 tokens per second)
llama_print_timings: prompt eval time =   721.00 ms /   129 tokens (    5.59 ms per token,   178.92 tokens per second)
llama_print_timings:        eval time =  9496.08 ms /   108 runs   (   87.93 ms per token,    11.37 tokens per second)
llama_print_timings:       total time = 10438.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Sometimes working the night shift, I look up at the stars and feel like they're glowing deserts. It feels like I am a poor child left alone in the desert...I have that feeling: human life is really just an accident of the universe. The universe is just an empty palace, and humans are only a tiny ant in it. This thought makes me have a very contradictory mentality for the latter half of my life: sometimes I think life is so precious; sometimes again, I feel that humans are so insignificant, nothing matters.”



llama_print_timings:        load time =   769.81 ms
llama_print_timings:      sample time =    53.54 ms /   120 runs   (    0.45 ms per token,  2241.52 tokens per second)
llama_print_timings: prompt eval time =   769.77 ms /   152 tokens (    5.06 ms per token,   197.46 tokens per second)
llama_print_timings:        eval time = 10177.05 ms /   119 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time = 11195.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

For the lonely and great cause of his lifetime, Wang Moxiang wanted to comfort the old man but the last word made him fall into the same sad mood, so he didn't say anything but "Ye Teacher, let me accompany you to go see Red Bank Base once more."



llama_print_timings:        load time =   661.52 ms
llama_print_timings:      sample time =    29.24 ms /    65 runs   (    0.45 ms per token,  2223.21 tokens per second)
llama_print_timings: prompt eval time =   661.49 ms /    96 tokens (    6.89 ms per token,   145.13 tokens per second)
llama_print_timings:        eval time =  5450.74 ms /    64 runs   (   85.17 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  6247.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie slowly shakes her head. “Little Wang, we are different from each other. I'm older and my health is not good either. Anything unexpected could happen tomorrow, let's just live for today.”



llama_print_timings:        load time =   621.99 ms
llama_print_timings:      sample time =    24.26 ms /    54 runs   (    0.45 ms per token,  2226.25 tokens per second)
llama_print_timings: prompt eval time =   621.95 ms /    68 tokens (    9.15 ms per token,   109.33 tokens per second)
llama_print_timings:        eval time =  4509.69 ms /    53 runs   (   85.09 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  5241.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As he looked at the silver hair on Yue Wenjie's head, Wang Mo knew that he thought about his daughter again.



llama_print_timings:        load time =   585.11 ms
llama_print_timings:      sample time =    13.37 ms /    30 runs   (    0.45 ms per token,  2243.33 tokens per second)
llama_print_timings: prompt eval time =   585.07 ms /    51 tokens (   11.47 ms per token,    87.17 tokens per second)
llama_print_timings:        eval time =  2462.48 ms /    29 runs   (   84.91 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3109.33 ms


translated 25.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After coming out of Yue Wenjie's house, Wang Moxiang was very uneasy. The incidents these days and the story about Red Bank have been tied together in his mind. These things, which were not connected at all, have made the world feel strange to him in one night.



llama_print_timings:        load time =   620.21 ms
llama_print_timings:      sample time =    29.37 ms /    66 runs   (    0.45 ms per token,  2247.19 tokens per second)
llama_print_timings: prompt eval time =   620.18 ms /    72 tokens (    8.61 ms per token,   116.10 tokens per second)
llama_print_timings:        eval time =  5619.52 ms /    65 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  6373.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He returned home to get rid of the feeling, and opened his computer. He put on his VR gear for the third time, and log in as the character named Galileo again. His mentality had been successfully adjusted, and he felt extremely excited when he saw the login screen. This time, with a mission in mind, Wang Meng registered a corresponding ID: Galileo.



llama_print_timings:        load time =   711.99 ms
llama_print_timings:      sample time =    36.87 ms /    83 runs   (    0.44 ms per token,  2251.40 tokens per second)
llama_print_timings: prompt eval time =   711.96 ms /   121 tokens (    5.88 ms per token,   169.95 tokens per second)
llama_print_timings:        eval time =  7069.30 ms /    82 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  7951.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After logging into the Three-Body universe, Wang Moxiong stood on that vast plain facing the strange dawn of the alien world. A huge pyramid appeared in the east, but he immediately saw that it was not the ones built by King Xi and Mozi; instead, it had a Gothic spire reaching high into the sky, making him think of the Romanesque church he had seen at Wangjingjie yesterday morning. Additionally, many buildings clearly designed as grain silos seemed to also be in the form of Gothic architecture with pointed towers and slender spires, like countless roots protruding from the earth.



llama_print_timings:        load time =   790.26 ms
llama_print_timings:      sample time =    61.30 ms /   139 runs   (    0.44 ms per token,  2267.65 tokens per second)
llama_print_timings: prompt eval time =   790.22 ms /   167 tokens (    4.73 ms per token,   211.33 tokens per second)
llama_print_timings:        eval time = 11979.40 ms /   138 runs   (   86.81 ms per token,    11.52 tokens per second)
llama_print_timings:       total time = 13049.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Vivi: Hello!



llama_print_timings:        load time =   964.31 ms
llama_print_timings:      sample time =     2.71 ms /     6 runs   (    0.45 ms per token,  2210.76 tokens per second)
llama_print_timings: prompt eval time =   964.26 ms /   253 tokens (    3.81 ms per token,   262.38 tokens per second)
llama_print_timings:        eval time =   467.23 ms /     5 runs   (   93.45 ms per token,    10.70 tokens per second)
llama_print_timings:       total time =  1443.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this point, Wang Mao had determined his judgment: This game is designed for every player to open a process individually. Currently the European Middle Ages Instance in software chooses according to the ID of the player.



llama_print_timings:        load time =   613.41 ms
llama_print_timings:      sample time =    20.43 ms /    46 runs   (    0.44 ms per token,  2251.48 tokens per second)
llama_print_timings: prompt eval time =   613.38 ms /    65 tokens (    9.44 ms per token,   105.97 tokens per second)
llama_print_timings:        eval time =  3926.91 ms /    45 runs   (   87.26 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  4633.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are late, the meeting has been going on for a long time,” said the man wearing gold-trimmed robe and red cape. “I am Pope Gregory.”



llama_print_timings:        load time =   614.20 ms
llama_print_timings:      sample time =    18.10 ms /    39 runs   (    0.46 ms per token,  2154.82 tokens per second)
llama_print_timings: prompt eval time =   614.16 ms /    59 tokens (   10.41 ms per token,    96.07 tokens per second)
llama_print_timings:        eval time =  3159.33 ms /    38 runs   (   83.14 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =  3857.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He tried to recall the medieval history of Europe, trying to infer the level of civilization from its name, but thought it was pointless in light of the chaos of historical records on the Trisolarian World.



llama_print_timings:        load time =   627.17 ms
llama_print_timings:      sample time =    20.44 ms /    46 runs   (    0.44 ms per token,  2250.16 tokens per second)
llama_print_timings: prompt eval time =   627.13 ms /    72 tokens (    8.71 ms per token,   114.81 tokens per second)
llama_print_timings:        eval time =  3744.72 ms /    45 runs   (   83.22 ms per token,    12.02 tokens per second)
llama_print_timings:       total time =  4465.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You changed your ID, but we all know you. In the two previous civilizations, it seems that you traveled to the East. Oh, I am Aristotle." said the person in Ancient Greek attire, with a head of white curly hair.



llama_print_timings:        load time =   626.72 ms
llama_print_timings:      sample time =    25.90 ms /    58 runs   (    0.45 ms per token,  2239.56 tokens per second)
llama_print_timings: prompt eval time =   626.69 ms /    80 tokens (    7.83 ms per token,   127.65 tokens per second)
llama_print_timings:        eval time =  4891.02 ms /    57 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  5636.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, ” Wang Mu nodded. “I witnessed the ruin of two civilizations, once from frost and once from heat. I also saw the great efforts by the scholars in the east to master the laws of solar motion.”



llama_print_timings:        load time =   650.83 ms
llama_print_timings:      sample time =    23.41 ms /    53 runs   (    0.44 ms per token,  2263.80 tokens per second)
llama_print_timings: prompt eval time =   650.79 ms /    82 tokens (    7.94 ms per token,   126.00 tokens per second)
llama_print_timings:        eval time =  4382.60 ms /    52 runs   (   84.28 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  5140.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A man with a peaked mountain goat haircut, who is even thinner than the Pope, uttered an exclamation in the shadows. “Laughable attempt to discover the mystery of sun's motion from meditation, enlightenment or even dream visions, Eastern sage!”



llama_print_timings:        load time =   633.71 ms
llama_print_timings:      sample time =    29.85 ms /    66 runs   (    0.45 ms per token,  2210.98 tokens per second)
llama_print_timings: prompt eval time =   633.69 ms /    80 tokens (    7.92 ms per token,   126.24 tokens per second)
llama_print_timings:        eval time =  5676.69 ms /    65 runs   (   87.33 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  6448.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Alexandre said, “This is Galileo. He argued that knowledge of the world should come from experiments and observations rather than dogma, a craftsman-like thinker who achieved nonetheless things we have to take note of.”



llama_print_timings:        load time =   625.08 ms
llama_print_timings:      sample time =    22.93 ms /    52 runs   (    0.44 ms per token,  2268.17 tokens per second)
llama_print_timings: prompt eval time =   625.03 ms /    73 tokens (    8.56 ms per token,   116.79 tokens per second)
llama_print_timings:        eval time =  4450.26 ms /    51 runs   (   87.26 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  5179.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Mozi also conducted experiments and observations.



llama_print_timings:        load time =   580.21 ms
llama_print_timings:      sample time =     4.45 ms /    10 runs   (    0.44 ms per token,  2248.71 tokens per second)
llama_print_timings: prompt eval time =   580.17 ms /    45 tokens (   12.89 ms per token,    77.56 tokens per second)
llama_print_timings:        eval time =   805.32 ms /     9 runs   (   89.48 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  1405.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Galileo snorted. “Mozi's ideas are still oriental, he is only a philosopher disguised as a scientist and never took his own observations seriously, but proceeded to construct an entire simulation model of the universe based on his subjective assumptions. How amusing! Unfortunately for those sophisticated instruments.”



llama_print_timings:        load time =   686.80 ms
llama_print_timings:      sample time =    32.76 ms /    73 runs   (    0.45 ms per token,  2228.19 tokens per second)
llama_print_timings: prompt eval time =   686.76 ms /   121 tokens (    5.68 ms per token,   176.19 tokens per second)
llama_print_timings:        eval time =  6139.73 ms /    72 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6977.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is correct, Wang Miao nodded. "That's exactly how I think."



llama_print_timings:        load time =   606.47 ms
llama_print_timings:      sample time =     8.96 ms /    20 runs   (    0.45 ms per token,  2233.14 tokens per second)
llama_print_timings: prompt eval time =   606.43 ms /    50 tokens (   12.13 ms per token,    82.45 tokens per second)
llama_print_timings:        eval time =  1605.54 ms /    19 runs   (   84.50 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  2251.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Are you also bringing a copy of the Calendar?" The Pope said with irony.



llama_print_timings:        load time =   592.23 ms
llama_print_timings:      sample time =     8.43 ms /    19 runs   (    0.44 ms per token,  2255.19 tokens per second)
llama_print_timings: prompt eval time =   592.20 ms /    47 tokens (   12.60 ms per token,    79.37 tokens per second)
llama_print_timings:        eval time =  1524.44 ms /    18 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2155.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't have a calendar, but I brought with me the cosmological model based on observations that we have. However, to say the least, even if this model is correct, it wouldn't necessarily help us master the precise laws of the sun's movement to compose calendars. But this must be the first step to do anyway.



llama_print_timings:        load time =   649.50 ms
llama_print_timings:      sample time =    32.81 ms /    73 runs   (    0.45 ms per token,  2225.00 tokens per second)
llama_print_timings: prompt eval time =   649.46 ms /    82 tokens (    7.92 ms per token,   126.26 tokens per second)
llama_print_timings:        eval time =  6135.97 ms /    72 runs   (   85.22 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6934.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Galileo was greeted with a few moments of silence, which echoed in the cold space. These were the applause of Galileo. "Very good, Copernicus. Very good. Your idea that is based on reality and scientific thought is something most scholars don't possess, so your theory also deserves to be heard."



llama_print_timings:        load time =   657.04 ms
llama_print_timings:      sample time =    34.77 ms /    78 runs   (    0.45 ms per token,  2243.12 tokens per second)
llama_print_timings: prompt eval time =   657.00 ms /    89 tokens (    7.38 ms per token,   135.46 tokens per second)
llama_print_timings:        eval time =  6507.66 ms /    77 runs   (   84.52 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  7322.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Pope nodded to Wang Miao, “Speak up.”



llama_print_timings:        load time =   585.34 ms
llama_print_timings:      sample time =     7.11 ms /    15 runs   (    0.47 ms per token,  2108.52 tokens per second)
llama_print_timings: prompt eval time =   585.30 ms /    46 tokens (   12.72 ms per token,    78.59 tokens per second)
llama_print_timings:        eval time =  1149.23 ms /    14 runs   (   82.09 ms per token,    12.18 tokens per second)
llama_print_timings:       total time =  1767.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   827.34 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2270.15 tokens per second)
llama_print_timings: prompt eval time =   827.29 ms /   192 tokens (    4.31 ms per token,   232.08 tokens per second)
llama_print_timings:        eval time =    84.88 ms /     1 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =   915.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dimly lit cathedral echoed with a few dry laughs. “Burn him,” the Pope said, without expression, as the two rusty-armed soldiers wearing full suits of armor stepped forward like clumsy robots toward Wang Meng.



llama_print_timings:        load time =   647.52 ms
llama_print_timings:      sample time =    27.38 ms /    58 runs   (    0.47 ms per token,  2118.03 tokens per second)
llama_print_timings: prompt eval time =   647.48 ms /    84 tokens (    7.71 ms per token,   129.73 tokens per second)
llama_print_timings:        eval time =  4896.14 ms /    57 runs   (   85.90 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  5670.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Burn it, ” Galileo sighed, “I had hoped you were someone more than just an astrologer or a sorcerer.”



llama_print_timings:        load time =   621.66 ms
llama_print_timings:      sample time =    15.18 ms /    34 runs   (    0.45 ms per token,  2240.38 tokens per second)
llama_print_timings: prompt eval time =   621.62 ms /    61 tokens (   10.19 ms per token,    98.13 tokens per second)
llama_print_timings:        eval time =  2881.36 ms /    33 runs   (   87.31 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3571.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He nodded in agreement.



llama_print_timings:        load time =   591.87 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2257.34 tokens per second)
llama_print_timings: prompt eval time =   591.84 ms /    47 tokens (   12.59 ms per token,    79.41 tokens per second)
llama_print_timings:        eval time =   505.75 ms /     6 runs   (   84.29 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1111.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"I have to say my piece now!” Wang Mo pushed aside the two soldiers’ iron gloves.



llama_print_timings:        load time =   592.62 ms
llama_print_timings:      sample time =    10.19 ms /    22 runs   (    0.46 ms per token,  2159.62 tokens per second)
llama_print_timings: prompt eval time =   592.58 ms /    54 tokens (   10.97 ms per token,    91.13 tokens per second)
llama_print_timings:        eval time =  1727.00 ms /    21 runs   (   82.24 ms per token,    12.16 tokens per second)
llama_print_timings:       total time =  2365.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Have you ever seen three suns?” Galileo asked, turning his head.



llama_print_timings:        load time =   590.02 ms
llama_print_timings:      sample time =     8.44 ms /    19 runs   (    0.44 ms per token,  2250.65 tokens per second)
llama_print_timings: prompt eval time =   589.99 ms /    51 tokens (   11.57 ms per token,    86.44 tokens per second)
llama_print_timings:        eval time =  1567.43 ms /    18 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  2195.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Everyone has seen it.



llama_print_timings:        load time =   590.16 ms
llama_print_timings:      sample time =     3.11 ms /     7 runs   (    0.44 ms per token,  2248.63 tokens per second)
llama_print_timings: prompt eval time =   590.14 ms /    34 tokens (   17.36 ms per token,    57.61 tokens per second)
llama_print_timings:        eval time =   547.79 ms /     6 runs   (   91.30 ms per token,    10.95 tokens per second)
llama_print_timings:       total time =  1151.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“And where are the other two suns?”



llama_print_timings:        load time =   589.98 ms
llama_print_timings:      sample time =     4.86 ms /    11 runs   (    0.44 ms per token,  2264.77 tokens per second)
llama_print_timings: prompt eval time =   589.95 ms /    52 tokens (   11.35 ms per token,    88.14 tokens per second)
llama_print_timings:        eval time =   870.35 ms /    10 runs   (   87.03 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  1482.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

First of all, we may not be seeing the same sun at different times. It could be one of three suns. The other two are stars that appear like stars when they travel a long distance from us.



llama_print_timings:        load time =   620.54 ms
llama_print_timings:      sample time =    19.80 ms /    44 runs   (    0.45 ms per token,  2222.00 tokens per second)
llama_print_timings: prompt eval time =   620.50 ms /    70 tokens (    8.86 ms per token,   112.81 tokens per second)
llama_print_timings:        eval time =  3750.86 ms /    43 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  4461.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Galileo disagreed. "You lack the most basic scientific training," he shrugged, shaking his head. "The sun is continuously moving away from us, so there cannot be a third option as suggested by your hypothesis: that it is smaller than normal and bigger than a comet but has never been seen to become one."



llama_print_timings:        load time =   688.08 ms
llama_print_timings:      sample time =    33.11 ms /    74 runs   (    0.45 ms per token,  2234.77 tokens per second)
llama_print_timings: prompt eval time =   688.03 ms /   108 tokens (    6.37 ms per token,   156.97 tokens per second)
llama_print_timings:        eval time =  6222.38 ms /    73 runs   (   85.24 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  7065.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As you have been trained in science, you should have some understanding of the structure of the sun.



llama_print_timings:        load time =   578.90 ms
llama_print_timings:      sample time =     9.26 ms /    21 runs   (    0.44 ms per token,  2269.04 tokens per second)
llama_print_timings: prompt eval time =   578.86 ms /    48 tokens (   12.06 ms per token,    82.92 tokens per second)
llama_print_timings:        eval time =  1659.49 ms /    20 runs   (   82.97 ms per token,    12.05 tokens per second)
llama_print_timings:       total time =  2280.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is the most remarkable discovery I have ever made: the sun is composed of a deep and thin gas layer and a dense, hot inner core.



llama_print_timings:        load time =   609.65 ms
llama_print_timings:      sample time =    14.51 ms /    31 runs   (    0.47 ms per token,  2136.90 tokens per second)
llama_print_timings: prompt eval time =   609.61 ms /    61 tokens (    9.99 ms per token,   100.06 tokens per second)
llama_print_timings:        eval time =  2562.66 ms /    30 runs   (   85.42 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  3240.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, but you obviously haven't noticed the peculiar optical action of the gas layer in the sun's atmosphere. This is a phenomenon similar to polarization, which makes the gaseous outer layer of the sun disappear when it goes beyond a certain distance from our planetary atmosphere, leaving only its luminescent core visible. At this time, the sun becomes shrouded in our field of view and becomes a flying star. It is this phenomenon that confounded historians researchers who couldn't see three stars at once because they were far away.”



llama_print_timings:        load time =   779.61 ms
llama_print_timings:      sample time =    52.94 ms /   118 runs   (    0.45 ms per token,  2228.90 tokens per second)
llama_print_timings: prompt eval time =   779.54 ms /   164 tokens (    4.75 ms per token,   210.38 tokens per second)
llama_print_timings:        eval time = 10321.93 ms /   117 runs   (   88.22 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 11346.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Alexandros responded, "You lack basic logical training. Indeed, we might have seen three stars flying in the sky and they would be accompanied by fierce coldness but, according to your theory, we should also have been able to see three normal sized suns - which has never happened in any record of civilizations past or present!"



llama_print_timings:        load time =   678.70 ms
llama_print_timings:      sample time =    32.11 ms /    72 runs   (    0.45 ms per token,  2242.36 tokens per second)
llama_print_timings: prompt eval time =   678.66 ms /   114 tokens (    5.95 ms per token,   167.98 tokens per second)
llama_print_timings:        eval time =  5964.27 ms /    71 runs   (   84.00 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  6789.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Wait a minute!" exclaimed the man with the strange-shaped hat and long beard, rising to speak for the first time. "History suggests there was a civilization that witnessed the appearance of two suns, only it instantly perished in an intense fire on both days. However, this record is fuzzy. Oh, I am Da Vinci."



llama_print_timings:        load time =   652.77 ms
llama_print_timings:      sample time =    35.53 ms /    78 runs   (    0.46 ms per token,  2195.58 tokens per second)
llama_print_timings: prompt eval time =   652.75 ms /    89 tokens (    7.33 ms per token,   136.35 tokens per second)
llama_print_timings:        eval time =  6593.65 ms /    77 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  7411.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We are talking about three suns, not two! "Galileo shouted. "According to his theory, three suns would surely appear, just like three flying stars!"



llama_print_timings:        load time =   627.14 ms
llama_print_timings:      sample time =    18.00 ms /    40 runs   (    0.45 ms per token,  2222.84 tokens per second)
llama_print_timings: prompt eval time =   627.10 ms /    66 tokens (    9.50 ms per token,   105.25 tokens per second)
llama_print_timings:        eval time =  3333.71 ms /    39 runs   (   85.48 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  4042.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Three suns have appeared, ” said Wang Moxian calmly. “Someone has seen them before, but those who saw the great sight were unable to spread information about it because they only had a few seconds to survive and escape. ‘The three days in the sky’ is the most terrifying disaster in the Three Body World. At that time, the surface of the planet will be turned into an iron furnace, and high temperatures can even melt rock. The destroyed worlds after ‘the three days in the sky’ take a long time to reappear life and civilization; this is why there are no historical records.”



llama_print_timings:        load time =   752.59 ms
llama_print_timings:      sample time =    60.86 ms /   137 runs   (    0.44 ms per token,  2251.18 tokens per second)
llama_print_timings: prompt eval time =   752.55 ms /   144 tokens (    5.23 ms per token,   191.35 tokens per second)
llama_print_timings:        eval time = 11844.64 ms /   136 runs   (   87.09 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 12879.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Silence. Everyone looked at the Pope.



llama_print_timings:        load time =   579.26 ms
llama_print_timings:      sample time =     6.24 ms /    11 runs   (    0.57 ms per token,  1762.54 tokens per second)
llama_print_timings: prompt eval time =   579.22 ms /    42 tokens (   13.79 ms per token,    72.51 tokens per second)
llama_print_timings:        eval time =   844.94 ms /    10 runs   (   84.49 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1454.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Burn him,” the Pope said calmly, and his smile seemed familiar—the same smile as Xu Zhuang.



llama_print_timings:        load time =   602.21 ms
llama_print_timings:      sample time =    12.66 ms /    28 runs   (    0.45 ms per token,  2211.87 tokens per second)
llama_print_timings: prompt eval time =   602.17 ms /    57 tokens (   10.56 ms per token,    94.66 tokens per second)
llama_print_timings:        eval time =  2288.30 ms /    27 runs   (   84.75 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  2949.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The great hall immediately became lively. Everyone seemed to have encountered something joyful. Galileo and the others were in high spirits as they moved a crossfire stake from the dark corner with enthusiasm and laid it up vertically. While others were excitedly piling wood on it, Da?Vinci remained seated at his table and contemplating without moving an inch. From time to time, he would use his pen to calculate something on the desk.



llama_print_timings:        load time =   704.54 ms
llama_print_timings:      sample time =    44.93 ms /   101 runs   (    0.44 ms per token,  2248.14 tokens per second)
llama_print_timings: prompt eval time =   704.51 ms /   127 tokens (    5.55 ms per token,   180.27 tokens per second)
llama_print_timings:        eval time =  8769.38 ms /   100 runs   (   87.69 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  9679.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Brouno, " said Aristotle, pointing to the charred body, "has been talking just as incoherently here as you are."



llama_print_timings:        load time =   604.42 ms
llama_print_timings:      sample time =    14.68 ms /    33 runs   (    0.44 ms per token,  2248.11 tokens per second)
llama_print_timings: prompt eval time =   604.38 ms /    58 tokens (   10.42 ms per token,    95.97 tokens per second)
llama_print_timings:        eval time =  2782.84 ms /    32 runs   (   86.96 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  3453.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Pope was powerless to speak.



llama_print_timings:        load time =   580.42 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2251.13 tokens per second)
llama_print_timings: prompt eval time =   580.38 ms /    40 tokens (   14.51 ms per token,    68.92 tokens per second)
llama_print_timings:        eval time =   698.33 ms /     8 runs   (   87.29 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1296.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Two soldiers tied Wang Mo to a stake with fireproof linen rope, Wang Mo pointed at the Pope with his still moving finger and said, "You are definitely just a program, whether you or anyone else is not a programmer, I will return as long as possible!"



llama_print_timings:        load time =   653.36 ms
llama_print_timings:      sample time =    25.99 ms /    58 runs   (    0.45 ms per token,  2231.54 tokens per second)
llama_print_timings: prompt eval time =   653.32 ms /    88 tokens (    7.42 ms per token,   134.70 tokens per second)
llama_print_timings:        eval time =  4867.63 ms /    57 runs   (   85.40 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  5639.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You can't come back here, you will always disappear in the Three Body World.” Galileo said with a grin.



llama_print_timings:        load time =   591.05 ms
llama_print_timings:      sample time =    13.05 ms /    29 runs   (    0.45 ms per token,  2222.73 tokens per second)
llama_print_timings: prompt eval time =   591.01 ms /    53 tokens (   11.15 ms per token,    89.68 tokens per second)
llama_print_timings:        eval time =  2413.94 ms /    28 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  3065.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You must be a programmer as well, a normal person can't have no knowledge of this kind of thing. Just remember my MAC address and switch to a new computer with a new ID soon after then I will announce who I am.”



llama_print_timings:        load time =   628.05 ms
llama_print_timings:      sample time =    22.98 ms /    51 runs   (    0.45 ms per token,  2219.61 tokens per second)
llama_print_timings: prompt eval time =   628.01 ms /    72 tokens (    8.72 ms per token,   114.65 tokens per second)
llama_print_timings:        eval time =  4187.38 ms /    50 runs   (   83.75 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  4919.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"The system has recorded your retinal features," said Da Vinci, looking at Wang Meng with a glance. Then he continued his calculations.



llama_print_timings:        load time =   618.03 ms
llama_print_timings:      sample time =    13.83 ms /    31 runs   (    0.45 ms per token,  2241.18 tokens per second)
llama_print_timings: prompt eval time =   617.99 ms /    66 tokens (    9.36 ms per token,   106.80 tokens per second)
llama_print_timings:        eval time =  2529.84 ms /    30 runs   (   84.33 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  3209.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Miao suddenly felt a sense of fear and shouted, "Don't do that! Let me down! I am speaking the truth!"



llama_print_timings:        load time =   606.60 ms
llama_print_timings:      sample time =    14.42 ms /    32 runs   (    0.45 ms per token,  2218.83 tokens per second)
llama_print_timings: prompt eval time =   606.56 ms /    59 tokens (   10.28 ms per token,    97.27 tokens per second)
llama_print_timings:        eval time =  2636.69 ms /    31 runs   (   85.05 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  3309.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If you are telling the truth, you will not be burned alive. The game will grant free pass to the person who chooses the correct path.



llama_print_timings:        load time =   643.45 ms
llama_print_timings:      sample time =    14.46 ms /    32 runs   (    0.45 ms per token,  2213.15 tokens per second)
llama_print_timings: prompt eval time =   643.42 ms /    85 tokens (    7.57 ms per token,   132.11 tokens per second)
llama_print_timings:        eval time =  2633.45 ms /    31 runs   (   84.95 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  3343.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

就在他伸手在柴堆上点火时，一道红色的强光从门洞射入，接着涌入一股挟带着烟尘的热浪，一匹马穿透强光跑进大殿，马的躯体在熊熊燃烧，已成了一团火球，奔跑时火焰呼呼作响。马上骑着一个人，是一位穿着重铠的中世纪骑士，他的盔甲已被烧得通红，奔跑时拖着一股白烟。



llama_print_timings:        load time =   723.49 ms
llama_print_timings:      sample time =    56.95 ms /   101 runs   (    0.56 ms per token,  1773.33 tokens per second)
llama_print_timings: prompt eval time =   723.41 ms /   129 tokens (    5.61 ms per token,   178.32 tokens per second)
llama_print_timings:        eval time =  8912.43 ms /   100 runs   (   89.12 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  9900.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The world has just been destroyed! ! The world has just been destroyed! ! Thirsty! ! Thirsty! !  Knight screamed, and the burning horse plummeted to the ground, rolling into a mass of flames. The knight was thrown out far away, finally stopping at the stake which was now spreading with thick white smoke. Human oil oozed out from the armor, as if it had spread wings made of fire.



llama_print_timings:        load time =   704.31 ms
llama_print_timings:      sample time =    44.27 ms /    99 runs   (    0.45 ms per token,  2236.18 tokens per second)
llama_print_timings: prompt eval time =   704.27 ms /   127 tokens (    5.55 ms per token,   180.33 tokens per second)
llama_print_timings:        eval time =  8409.98 ms /    98 runs   (   85.82 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  9320.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They all rushed towards the door and soon disappeared in the red light that was shooting from it. Wang Meng struggled to break free of his ropes,繞e rovse the burning knight and horse, and ran out of the hall into the hot wind.



llama_print_timings:        load time =   650.29 ms
llama_print_timings:      sample time =    25.52 ms /    57 runs   (    0.45 ms per token,  2233.98 tokens per second)
llama_print_timings: prompt eval time =   650.24 ms /    93 tokens (    6.99 ms per token,   143.02 tokens per second)
llama_print_timings:        eval time =  4880.19 ms /    56 runs   (   87.15 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  5646.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$$大地已经像一块炉中的铁板一样被烧得通红，发出暗红色光的地面上流淌着一条条明亮的岩浆小溪，织成一张伸向天边的亮丽的火网。Red-hot Earth has numerous slender fire pikes high in the air, which are from dry silos and those dried silo dwellers' blue flames is transparent. Their faces and bodies can be seen slowly changing. They all raise their arms in the sky with the same gesture, sing with a singing voice to praise.



llama_print_timings:        load time =   891.46 ms
llama_print_timings:      sample time =    53.56 ms /   119 runs   (    0.45 ms per token,  2221.64 tokens per second)
llama_print_timings: prompt eval time =   891.42 ms /   226 tokens (    3.94 ms per token,   253.53 tokens per second)
llama_print_timings:        eval time = 10301.60 ms /   118 runs   (   87.30 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 11440.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Three days in the air.



llama_print_timings:        load time =   576.19 ms
llama_print_timings:      sample time =     3.09 ms /     7 runs   (    0.44 ms per token,  2266.84 tokens per second)
llama_print_timings: prompt eval time =   576.17 ms /    37 tokens (   15.57 ms per token,    64.22 tokens per second)
llama_print_timings:        eval time =   484.58 ms /     6 runs   (   80.76 ms per token,    12.38 tokens per second)
llama_print_timings:       total time =  1074.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The end of the world is not a tragedy, but a necessity.



llama_print_timings:        load time =   896.97 ms
llama_print_timings:      sample time =     7.79 ms /    17 runs   (    0.46 ms per token,  2182.00 tokens per second)
llama_print_timings: prompt eval time =   896.92 ms /   226 tokens (    3.97 ms per token,   251.97 tokens per second)
llama_print_timings:        eval time =  1340.97 ms /    16 runs   (   83.81 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  2274.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The civilization known as Civilization $183 destroyed itself in the "Three Days of Unbroken Sky", reaching a medieval level of development.



llama_print_timings:        load time =   592.01 ms
llama_print_timings:      sample time =    14.29 ms /    32 runs   (    0.45 ms per token,  2238.86 tokens per second)
llama_print_timings: prompt eval time =   591.97 ms /    53 tokens (   11.17 ms per token,    89.53 tokens per second)
llama_print_timings:        eval time =  2659.02 ms /    31 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3317.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After a long time, life and civilization will restart and resume their destiny-changing evolution in the Three Body Universe.



llama_print_timings:        load time =   599.99 ms
llama_print_timings:      sample time =    13.13 ms /    29 runs   (    0.45 ms per token,  2207.84 tokens per second)
llama_print_timings: prompt eval time =   599.94 ms /    52 tokens (   11.54 ms per token,    86.68 tokens per second)
llama_print_timings:        eval time =  2378.37 ms /    28 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  3037.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In this civilization, Copernicus successfully unveiled the basic structure of the universe. The Three Body Civilization will produce its first leap and the game enters Level 2.



llama_print_timings:        load time =   605.04 ms
llama_print_timings:      sample time =    17.43 ms /    39 runs   (    0.45 ms per token,  2237.14 tokens per second)
llama_print_timings: prompt eval time =   605.00 ms /    58 tokens (   10.43 ms per token,    95.87 tokens per second)
llama_print_timings:        eval time =  3205.34 ms /    38 runs   (   84.35 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  3889.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Welcome to level 2 of "The Three-Body Problem".



llama_print_timings:        load time =   582.33 ms
llama_print_timings:      sample time =     6.88 ms /    15 runs   (    0.46 ms per token,  2180.55 tokens per second)
llama_print_timings: prompt eval time =   582.29 ms /    38 tokens (   15.32 ms per token,    65.26 tokens per second)
llama_print_timings:        eval time =  1182.70 ms /    14 runs   (   84.48 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1796.85 ms


translated 56.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Mike just left the game, his phone rang and it was David calling him. He said there was an urgent matter that needed to be taken care of and asked Mike to come to their headquarters as soon as possible. When he looked at his watch, it was already three in the morning.



llama_print_timings:        load time =   626.66 ms
llama_print_timings:      sample time =    26.97 ms /    61 runs   (    0.44 ms per token,  2261.52 tokens per second)
llama_print_timings: prompt eval time =   626.63 ms /    76 tokens (    8.25 ms per token,   121.28 tokens per second)
llama_print_timings:        eval time =  5090.77 ms /    60 runs   (   84.85 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  5835.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao arrived at the chaotic office, he saw that it had been fanned by smoke. The young female policeman kept faning her nose with a notebook in front of her as a result. Big Shi introduced her to be called Xu Bingbing who was a computer expert and was working for the Information Security Department. The third person who surprised Wang Miao was Wei Cheng, Shen Yufei's husband, who had a messy haircut. He looked up at Wang Miao. It seemed that he had forgotten meeting him before.



llama_print_timings:        load time =   730.12 ms
llama_print_timings:      sample time =    71.79 ms /   121 runs   (    0.59 ms per token,  1685.49 tokens per second)
llama_print_timings: prompt eval time =   730.08 ms /   132 tokens (    5.53 ms per token,   180.80 tokens per second)
llama_print_timings:        eval time = 10903.15 ms /   120 runs   (   90.86 ms per token,    11.01 tokens per second)
llama_print_timings:       total time = 11953.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Excuse me, but I don't think you have slept yet," said Dashi. "We have something to report at the battle command center and it needs your opinion."



llama_print_timings:        load time =   624.76 ms
llama_print_timings:      sample time =    17.73 ms /    40 runs   (    0.44 ms per token,  2255.55 tokens per second)
llama_print_timings: prompt eval time =   624.72 ms /    75 tokens (    8.33 ms per token,   120.05 tokens per second)
llama_print_timings:        eval time =  3305.68 ms /    39 runs   (   84.76 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  4010.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have said that my life is threatened.



llama_print_timings:        load time =   593.43 ms
llama_print_timings:      sample time =     4.47 ms /    10 runs   (    0.45 ms per token,  2235.64 tokens per second)
llama_print_timings: prompt eval time =   593.39 ms /    50 tokens (   11.87 ms per token,    84.26 tokens per second)
llama_print_timings:        eval time =   780.38 ms /     9 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  1393.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Let's start from the beginning.



llama_print_timings:        load time =   573.76 ms
llama_print_timings:      sample time =     3.97 ms /     9 runs   (    0.44 ms per token,  2269.86 tokens per second)
llama_print_timings: prompt eval time =   573.72 ms /    36 tokens (   15.94 ms per token,    62.75 tokens per second)
llama_print_timings:        eval time =   673.01 ms /     8 runs   (   84.13 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  1263.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Alright, let me start from the beginning. I’m really looking for someone to chat with.” Wei Cheng looked at Xu Bingbing and said, “No need for written accounts, okay?”



llama_print_timings:        load time =   631.19 ms
llama_print_timings:      sample time =    20.85 ms /    45 runs   (    0.46 ms per token,  2158.48 tokens per second)
llama_print_timings: prompt eval time =   631.15 ms /    70 tokens (    9.02 ms per token,   110.91 tokens per second)
llama_print_timings:        eval time =  3715.40 ms /    44 runs   (   84.44 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  4441.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Not now, but nobody spoke to you before?" Dashisun asked timely.



llama_print_timings:        load time =   588.14 ms
llama_print_timings:      sample time =     8.96 ms /    20 runs   (    0.45 ms per token,  2232.64 tokens per second)
llama_print_timings: prompt eval time =   588.10 ms /    46 tokens (   12.78 ms per token,    78.22 tokens per second)
llama_print_timings:        eval time =  1622.21 ms /    19 runs   (   85.38 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2250.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Not really. I'm just a lazy person.



llama_print_timings:        load time =   601.42 ms
llama_print_timings:      sample time =     5.55 ms /    12 runs   (    0.46 ms per token,  2160.99 tokens per second)
llama_print_timings: prompt eval time =   601.37 ms /    42 tokens (   14.32 ms per token,    69.84 tokens per second)
llama_print_timings:        eval time =   947.00 ms /    11 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  1572.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is a translation of Wei Cheng's description.



llama_print_timings:        load time =   565.90 ms
llama_print_timings:      sample time =     6.19 ms /    13 runs   (    0.48 ms per token,  2101.86 tokens per second)
llama_print_timings: prompt eval time =   565.88 ms /    35 tokens (   16.17 ms per token,    61.85 tokens per second)
llama_print_timings:        eval time =  1072.93 ms /    12 runs   (   89.41 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  1665.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm a lazy person, from childhood. While living at school, I never washed the bowl and didn't fold the blanket. I didn't have interest in anything and didn't like to study or even play, so I spent most of the day in a daze. But I know that I possess some talents that surpass those of others, such as if you draw a line, I can accurately locate it at the 0.618 golden section on the online chart. My classmates said I was suitable for being a carpenter, but I think this is a higher-order talent, which involves intuition about numbers and shapes. Actually, my maths grades are as bad as those of other subjects, I can't deduce anything correctly during exams, so I always write the answers that I think will pass, even though I only get correct scores about 80%.



llama_print_timings:        load time =   800.04 ms
llama_print_timings:      sample time =   116.56 ms /   197 runs   (    0.59 ms per token,  1690.07 tokens per second)
llama_print_timings: prompt eval time =   800.00 ms /   174 tokens (    4.60 ms per token,   217.50 tokens per second)
llama_print_timings:        eval time = 17881.49 ms /   196 runs   (   91.23 ms per token,    10.96 tokens per second)
llama_print_timings:       total time = 19202.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

During my sophomore year, a math teacher noticed me. Many talented people were hired to teach middle school because of the Cultural Revolution. He was such a person. One day, he detained me after class and wrote numbers series on his blackboard. I quickly solved a few of them but had trouble with the others since they all looked like divergent series. The teacher took out a book and turned to an article called “The Study of Blood Letter”. He read to me: “Dr. Watson saw someone who was dressed as an ordinary person delivering letters, pointed it at Holmes, and asked if that was the ex-navy seaman whose hands he had seen.” Dr. Watson felt surprised that how Holmes could deduce his identity, and Holmes himself didn’t understand how he did it either. He said that it was not strange for other people to explain how they calculated 2+2=4 as well.



llama_print_timings:        load time =   957.36 ms
llama_print_timings:      sample time =    89.11 ms /   198 runs   (    0.45 ms per token,  2222.10 tokens per second)
llama_print_timings: prompt eval time =   957.31 ms /   246 tokens (    3.89 ms per token,   256.97 tokens per second)
llama_print_timings:        eval time = 17333.70 ms /   197 runs   (   87.99 ms per token,    11.37 tokens per second)
llama_print_timings:       total time = 18690.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said to me, "You are like this. Your deduction is too fast and it comes naturally so you don't notice it." He then asked, "What do you think of the number sequence?". I replied, "For me any number sequence is a geometric form." He kept asking,"What about geometric shapes?" I said, "The opposite. In my head there are no geometric forms anymore, and everything becomes numbers instead like if you look at the photo in the newspaper very closely."



llama_print_timings:        load time =   779.13 ms
llama_print_timings:      sample time =    45.87 ms /   103 runs   (    0.45 ms per token,  2245.72 tokens per second)
llama_print_timings: prompt eval time =   779.10 ms /   151 tokens (    5.16 ms per token,   193.81 tokens per second)
llama_print_timings:        eval time =  8837.15 ms /   102 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  9822.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The teacher said you have a great talent in math. But, but... He talked for a long time and seemed to be dealing with an difficult matter as I don't know how to handle it. But you are not willing to cherish your gift of mathematics, he said. Thinking for a while, he gave up and said: "Okay, then you should attend the next round of math competition." And I did take part in that competition and keep on competing until the Budapest Olympic Math Competition. After coming back, I was accepted by a top university's department of mathematics without taking an exam…



llama_print_timings:        load time =   795.28 ms
llama_print_timings:      sample time =    57.16 ms /   128 runs   (    0.45 ms per token,  2239.17 tokens per second)
llama_print_timings: prompt eval time =   795.24 ms /   169 tokens (    4.71 ms per token,   212.51 tokens per second)
llama_print_timings:        eval time = 10974.68 ms /   127 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 12026.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I said these things you don't mind, ah, actually to say the后面的事儿, these are still must say. That high school teacher is right, I won't cherish myself, I have been college and masters, but somehow came through. Once into society, it turned out that I am a commoner, not just mathematics but anything else. In the complicated human relationship, I am in semi-sleeping state, then become more and less; later to university teaching, also can't hold on. Teaching on the blackboard, write a sentence, "easily provable", students underneath need to ponder for a while, later get eliminated. At this point I am tired of everything, just lucky enough to leave with simple luggage to a temple in the southern deep mountains.



llama_print_timings:        load time =   821.41 ms
llama_print_timings:      sample time =    76.81 ms /   172 runs   (    0.45 ms per token,  2239.41 tokens per second)
llama_print_timings: prompt eval time =   821.37 ms /   181 tokens (    4.54 ms per token,   220.36 tokens per second)
llama_print_timings:        eval time = 15014.68 ms /   171 runs   (   87.81 ms per token,    11.39 tokens per second)
llama_print_timings:       total time = 16183.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, I'm not going to be a monk. I just want to find a truly peaceful place to stay for a while. The abbot of this temple is an old friend of my father. He's very learned but in his later years has become a hermit and as per my father says, only one path remains for him at this stage. The abbot took me in and I told him that I wanted to find a peaceful way to end this life instead of going into retreat like he did. The abbot said the temple was not really peaceful. It's a tourist spot and there are many offerings brought here. He added that the monks at the temple had more attachments than me, who is just an empty cloud, as far as fame goes. I found this suggestion quite inspiring. Later on, I thought that this wasn't really a Buddhist concept but it sounded more like a theory from modern physics instead. The abbot also said that he wouldn't discuss Buddhism with me, as did my middle school teacher. Reason: to him I am no use.



llama_print_timings:        load time =   954.12 ms
llama_print_timings:      sample time =   117.89 ms /   235 runs   (    0.50 ms per token,  1993.43 tokens per second)
llama_print_timings: prompt eval time =   954.08 ms /   252 tokens (    3.79 ms per token,   264.13 tokens per second)
llama_print_timings:        eval time = 20950.92 ms /   234 runs   (   89.53 ms per token,    11.17 tokens per second)
llama_print_timings:       total time = 22440.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On the first night, I couldn't sleep in the hut of the temple. It was surprisingly uncomfortable and my bedding became damp from the mountain mist. In order to lull myself into sleep, I tried to follow the instructions of the head monk by creating “empty” space in my consciousness. The first empty space that I created was a vast and void universe without light or anything else at all. Soon however, I felt this empty universe couldn't make me feel peaceful and I found myself feeling restless with an urgent desire to catch something.



llama_print_timings:        load time =   794.85 ms
llama_print_timings:      sample time =    55.98 ms /   124 runs   (    0.45 ms per token,  2215.12 tokens per second)
llama_print_timings: prompt eval time =   794.78 ms /   159 tokens (    5.00 ms per token,   200.05 tokens per second)
llama_print_timings:        eval time = 10544.88 ms /   123 runs   (   85.73 ms per token,    11.66 tokens per second)
llama_print_timings:       total time = 11590.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So I created a small, massless sphere for myself in this boundless space. It hung in the center of “nothing”, that is to say, any part of space is centered at the same level, and nothing affects it and neither does it affect anything. The sphere just hangs there without moving or changing in any way; it's a perfect interpretation of death.



llama_print_timings:        load time =   694.63 ms
llama_print_timings:      sample time =    35.11 ms /    78 runs   (    0.45 ms per token,  2221.78 tokens per second)
llama_print_timings: prompt eval time =   694.60 ms /   117 tokens (    5.94 ms per token,   168.44 tokens per second)
llama_print_timings:        eval time =  6573.15 ms /    77 runs   (   85.37 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  7422.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I created a second ball of the same size and mass as the original, with the same completely reflective surface. They are in perfect mirror-image and can only see each other but nothing else in the universe. But the situation is not very different: If there is no initial motion on either side (first push), they will quickly be attracted to one another and hang still together as a dead sign, or remain still if there's no initial motion at all. If there is an initial motion and collisions are avoided, they will revolve around each other under their respective gravity, whether you start with any initial motion. Eventually the revolution will stop and remain fixed for eternity, marking a dead sign.



llama_print_timings:        load time =   762.15 ms
llama_print_timings:      sample time =    68.05 ms /   148 runs   (    0.46 ms per token,  2174.78 tokens per second)
llama_print_timings: prompt eval time =   762.10 ms /   153 tokens (    4.98 ms per token,   200.76 tokens per second)
llama_print_timings:        eval time = 12636.47 ms /   147 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time = 13705.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I introduced a third sphere, and the situation changed dramatically. As I have mentioned before, any figure in my consciousness is digitized, and the one-ball, two-ball and non-sphere universes have presented themselves as a few or several descriptions of their equations like leaves in the autumn. But this third sphere was a dragon with "Empty" eyes, which suddenly made the three spheres complicated, with three spheres with given initial motions conducting complex movements in space and described equations like rain. I just fell into sleep because of this dance, and the dance continued even without any rule, as if it were infinite. However, in my consciousness, this dance is rhythmic, although it has an endless period. This fascinated me, and I want to describe only a part or all of this period.



llama_print_timings:        load time =   844.25 ms
llama_print_timings:      sample time =    79.55 ms /   177 runs   (    0.45 ms per token,  2225.10 tokens per second)
llama_print_timings: prompt eval time =   844.21 ms /   199 tokens (    4.24 ms per token,   235.72 tokens per second)
llama_print_timings:        eval time = 15611.82 ms /   176 runs   (   88.70 ms per token,    11.27 tokens per second)
llama_print_timings:       total time = 16810.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The second day, I kept thinking about the three balls dancing in “Empty” with all my power. My thoughts never turned like this before and there was a monk who asked elder if something was wrong with my mind. The elder smiled and said nothing is wrong; he has found emptiness. Yes, I have found emptiness and now I can hide myself in the bustling crowd just as if I am standing still, even in my interior I feel peaceful calm. For the first time, I enjoyed the fun of mathematics. The physics principles of the three-body problem are very simple; it's a mathematical problem actually. At this moment, I felt like a free-wheeling wanderer who suddenly experienced love after wandering for ages.



llama_print_timings:        load time =   923.73 ms
llama_print_timings:      sample time =    71.54 ms /   160 runs   (    0.45 ms per token,  2236.57 tokens per second)
llama_print_timings: prompt eval time =   923.70 ms /   227 tokens (    4.07 ms per token,   245.75 tokens per second)
llama_print_timings:        eval time = 13818.66 ms /   159 runs   (   86.91 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 15060.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You don't know Poincaré?（Note:The French mathematician Poincaré, who proved that the three-body problem is mathematically unsolvable and created a new mathematical method for solving differential equation problems based on it.”）


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

At that time, I didn't know that Poncelet is wrong in studying mathematics, but I don't admire the master. Neither do I want to be a master. So I wasn't aware of this. But even if I knew Poncelet at that time, I would still pursue my research on the three-body problem. The whole world thought that Poncelet proved the three-body problem is unsolvable, but I think it may be a misunderstanding. He just proved that the initial conditions of the system are sensitive and that the system is an unsolvable one, but sensitivity does not mean complete uncertainty. Only this kind of certainty contains more diverse forms, and now what we need to do is find a new algorithm. At that time, I immediately thought of something: Have you heard of Monte Carlo method? Oh, it's an algorithm for calculating the area of irregular shapes on computer software by using lots of small balls randomly hitting the shaped object and not hit again. When the ball hits a certain amount of times, all parts of the shape wil


llama_print_timings:        load time =   917.54 ms
llama_print_timings:      sample time =   118.60 ms /   266 runs   (    0.45 ms per token,  2242.80 tokens per second)
llama_print_timings: prompt eval time =   917.47 ms /   235 tokens (    3.90 ms per token,   256.14 tokens per second)
llama_print_timings:        eval time = 23457.44 ms /   265 runs   (   88.52 ms per token,    11.30 tokens per second)
llama_print_timings:       total time = 24909.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This method is simple, but it demonstrates a thinking approach in mathematics of using brute force against exact logic. It's an idea to use the quantity to obtain quality of computation. This is my strategy for solving the problem of three-body problem at any time slice. In this discontinuity surface, there are an infinite combination of each ball's movement vector, and I consider every kind of combination to be like a creature; the key is to determine which kind of combination is 'healthy' and 'beneficial', which one is 'injurious' and 'detrimental', so that only those with survival advantages can exist, while those with survival difficulties will vanish in the process of computation. In the end, only the prediction about movement at the next time slice is correct.



llama_print_timings:        load time =   833.06 ms
llama_print_timings:      sample time =    77.24 ms /   170 runs   (    0.45 ms per token,  2200.90 tokens per second)
llama_print_timings: prompt eval time =   833.02 ms /   175 tokens (    4.76 ms per token,   210.08 tokens per second)
llama_print_timings:        eval time = 14820.89 ms /   169 runs   (   87.70 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 15999.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The term "evolutionary algorithm" was coined by John Holland, who developed a model to explain how an organism evolves. This model is based on the principle that natural selection takes place within populations of different traits.



llama_print_timings:        load time =   580.56 ms
llama_print_timings:      sample time =    21.39 ms /    48 runs   (    0.45 ms per token,  2244.25 tokens per second)
llama_print_timings: prompt eval time =   580.53 ms /    40 tokens (   14.51 ms per token,    68.90 tokens per second)
llama_print_timings:        eval time =  3942.99 ms /    47 runs   (   83.89 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  4617.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You are welcome, " said Dash with a nod.



llama_print_timings:        load time =   587.41 ms
llama_print_timings:      sample time =     6.29 ms /    14 runs   (    0.45 ms per token,  2223.99 tokens per second)
llama_print_timings: prompt eval time =   587.36 ms /    48 tokens (   12.24 ms per token,    81.72 tokens per second)
llama_print_timings:        eval time =  1144.84 ms /    13 runs   (   88.06 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  1759.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, I learned about this term later. The characteristics of this algorithm are that it is massive computation and the computational amount is extremely huge for the three-body problem, which current computers are not able to do. When I was in the temple at the time, I didn't have a calculator but only an empty account book from the accounts office and a pencil: I started building mathematical models on paper, which required a lot of work. Soon, over a dozen empty account books were used up, making the accountant angry. However, under the demand of the abbot, they still gave me more paper and pens. After that, I would write down the results of my calculations on pillows under my head and throw away the discarded ones in the temple's censer.



llama_print_timings:        load time =   775.60 ms
llama_print_timings:      sample time =    73.05 ms /   164 runs   (    0.45 ms per token,  2244.95 tokens per second)
llama_print_timings: prompt eval time =   775.56 ms /   161 tokens (    4.82 ms per token,   207.59 tokens per second)
llama_print_timings:        eval time = 14193.53 ms /   163 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 15300.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This evening, a young woman suddenly rushed into my room. This was the first time any woman had entered this house. She held in her hand several charred pieces of paper, which were my scraps.



llama_print_timings:        load time =   618.33 ms
llama_print_timings:      sample time =    19.53 ms /    44 runs   (    0.44 ms per token,  2252.60 tokens per second)
llama_print_timings: prompt eval time =   618.29 ms /    68 tokens (    9.09 ms per token,   109.98 tokens per second)
llama_print_timings:        eval time =  3789.53 ms /    43 runs   (   88.13 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  4495.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They said this is yours, are you studying the Three Body Problem?



llama_print_timings:        load time =   609.23 ms
llama_print_timings:      sample time =     6.65 ms /    15 runs   (    0.44 ms per token,  2257.00 tokens per second)
llama_print_timings: prompt eval time =   609.19 ms /    59 tokens (   10.33 ms per token,    96.85 tokens per second)
llama_print_timings:        eval time =  1198.87 ms /    14 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  1836.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That person really shocked me. I used unusual mathematical methods and made a lot of jumps in my deduction, but she could detect the research object from those discarded calculations papers! That was just astonishing. She also shared my keen interest in the Three-Body problem. However, I had no good impression of those tourists and pilgrims; they didn't know why they were there and only took pictures all the time; moreover, most of them seemed to be in a kind of mental suppression due to poverty. But she was different. She had a scholarly atmosphere which made me realize that she is from the same group of Japanese tourists.



llama_print_timings:        load time =   766.97 ms
llama_print_timings:      sample time =    61.80 ms /   138 runs   (    0.45 ms per token,  2233.15 tokens per second)
llama_print_timings: prompt eval time =   766.92 ms /   156 tokens (    4.92 ms per token,   203.41 tokens per second)
llama_print_timings:        eval time = 11876.50 ms /   137 runs   (   86.69 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 12918.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She added, “Your ideas are so clever. We’ve been looking for ways to reduce the difficulty of solving the Three-Body Problem into a large computational workload. But this would need very powerful computers.”



llama_print_timings:        load time =   630.62 ms
llama_print_timings:      sample time =    20.06 ms /    44 runs   (    0.46 ms per token,  2192.98 tokens per second)
llama_print_timings: prompt eval time =   630.59 ms /    68 tokens (    9.27 ms per token,   107.84 tokens per second)
llama_print_timings:        eval time =  3621.52 ms /    43 runs   (   84.22 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  4340.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Even if you put all the world's computers together, it still won't be enough.” I told her the truth.



llama_print_timings:        load time =   578.09 ms
llama_print_timings:      sample time =    12.94 ms /    29 runs   (    0.45 ms per token,  2240.77 tokens per second)
llama_print_timings: prompt eval time =   578.05 ms /    46 tokens (   12.57 ms per token,    79.58 tokens per second)
llama_print_timings:        eval time =  2380.98 ms /    28 runs   (   85.04 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  3015.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But you need a good research environment, and there's nothing here. I can give you an opportunity to use a giant computer, and I'll send you a small one for tomorrow. We will go down together early in the morning.



llama_print_timings:        load time =   626.82 ms
llama_print_timings:      sample time =    22.61 ms /    51 runs   (    0.44 ms per token,  2255.44 tokens per second)
llama_print_timings: prompt eval time =   626.77 ms /    69 tokens (    9.08 ms per token,   110.09 tokens per second)
llama_print_timings:        eval time =  4222.62 ms /    50 runs   (   84.45 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  4949.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She was Xin Yufei, just as she is now, simple and domineering. But she was more attractive than I expected. Since I'm usually very cold-blooded, not interested in women at all, but she is so special that her most unladylike womanliness attracted me. After all, I am just a lazy person who quickly agreed to her.



llama_print_timings:        load time =   657.93 ms
llama_print_timings:      sample time =    37.43 ms /    83 runs   (    0.45 ms per token,  2217.41 tokens per second)
llama_print_timings: prompt eval time =   657.89 ms /    95 tokens (    6.93 ms per token,   144.40 tokens per second)
llama_print_timings:        eval time =  7026.99 ms /    82 runs   (   85.70 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  7851.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the middle of the night, I couldn't sleep and went to the temple. From a distance, I saw Xin Yufei's figure in the dimly lit hall. She was offering incense before a Buddha statue, and her movements were very ceremonial. When I got close enough to hear what she was praying, I heard her whisper:



llama_print_timings:        load time =   675.33 ms
llama_print_timings:      sample time =    35.99 ms /    79 runs   (    0.46 ms per token,  2194.81 tokens per second)
llama_print_timings: prompt eval time =   675.29 ms /    99 tokens (    6.82 ms per token,   146.60 tokens per second)
llama_print_timings:        eval time =  6578.64 ms /    78 runs   (   84.34 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  7420.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I thought I heard her wrongly, but she recited it again: “Fuxu baohuo womian kaihe. "



llama_print_timings:        load time =   611.39 ms
llama_print_timings:      sample time =    14.08 ms /    31 runs   (    0.45 ms per token,  2201.24 tokens per second)
llama_print_timings: prompt eval time =   611.36 ms /    63 tokens (    9.70 ms per token,   103.05 tokens per second)
llama_print_timings:        eval time =  2632.00 ms /    30 runs   (   87.73 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  3304.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know any religion and I am not interested, but it is hard for me to imagine a more absurd prayer than this one. I simply blurted out: "What are you saying?!"



llama_print_timings:        load time =   609.08 ms
llama_print_timings:      sample time =    19.81 ms /    45 runs   (    0.44 ms per token,  2272.04 tokens per second)
llama_print_timings: prompt eval time =   609.04 ms /    61 tokens (    9.98 ms per token,   100.16 tokens per second)
llama_print_timings:        eval time =  3764.29 ms /    44 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4462.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She didn't pay attention to my existence at all, still closed her eyes and folded her hands, as if watching her prayers ascending to the Buddha. After a while, she opened her eyes and turned toward me.



llama_print_timings:        load time =   633.18 ms
llama_print_timings:      sample time =    23.56 ms /    50 runs   (    0.47 ms per token,  2122.15 tokens per second)
llama_print_timings: prompt eval time =   633.14 ms /    78 tokens (    8.12 ms per token,   123.20 tokens per second)
llama_print_timings:        eval time =  4212.16 ms /    49 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  4953.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She said, "Go to sleep. We'll leave early tomorrow." She didn't even look at me.



llama_print_timings:        load time =   588.60 ms
llama_print_timings:      sample time =    11.72 ms /    26 runs   (    0.45 ms per token,  2218.24 tokens per second)
llama_print_timings: prompt eval time =   588.56 ms /    46 tokens (   12.79 ms per token,    78.16 tokens per second)
llama_print_timings:        eval time =  2136.97 ms /    25 runs   (   85.48 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  2777.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I asked, “Is that the term ‘my self’ that you mentioned?”



llama_print_timings:        load time =   585.54 ms
llama_print_timings:      sample time =     7.54 ms /    17 runs   (    0.44 ms per token,  2255.54 tokens per second)
llama_print_timings: prompt eval time =   585.47 ms /    47 tokens (   12.46 ms per token,    80.28 tokens per second)
llama_print_timings:        eval time =  1388.82 ms /    16 runs   (   86.80 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  2008.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Not there.



llama_print_timings:        load time =   587.57 ms
llama_print_timings:      sample time =     1.80 ms /     4 runs   (    0.45 ms per token,  2223.46 tokens per second)
llama_print_timings: prompt eval time =   587.53 ms /    33 tokens (   17.80 ms per token,    56.17 tokens per second)
llama_print_timings:        eval time =   261.38 ms /     3 runs   (   87.13 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =   856.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Than...



llama_print_timings:        load time =   568.49 ms
llama_print_timings:      sample time =     1.75 ms /     4 runs   (    0.44 ms per token,  2283.11 tokens per second)
llama_print_timings: prompt eval time =   568.45 ms /    34 tokens (   16.72 ms per token,    59.81 tokens per second)
llama_print_timings:        eval time =   222.03 ms /     3 runs   (   74.01 ms per token,    13.51 tokens per second)
llama_print_timings:       total time =   797.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shen Yufei remained silent and hastily left, without waiting for me to ask her any more questions. I repeated the prayer in my head repeatedly, feeling eerier each time, so I hurried to the eldest's residence and knocked on his door.



llama_print_timings:        load time =   654.33 ms
llama_print_timings:      sample time =    25.61 ms /    56 runs   (    0.46 ms per token,  2186.39 tokens per second)
llama_print_timings: prompt eval time =   654.28 ms /    88 tokens (    7.44 ms per token,   134.50 tokens per second)
llama_print_timings:        eval time =  4929.45 ms /    55 runs   (   89.63 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  5699.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If someone prays to the Buddha for another person, what is this?



llama_print_timings:        load time =   631.16 ms
llama_print_timings:      sample time =     8.00 ms /    18 runs   (    0.44 ms per token,  2249.16 tokens per second)
llama_print_timings: prompt eval time =   631.04 ms /    59 tokens (   10.70 ms per token,    93.50 tokens per second)
llama_print_timings:        eval time =  1449.27 ms /    17 runs   (   85.25 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  2115.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Long-bearded old man silently looked at the book in his hand, but apparently was not reading it. Instead, he was thinking of what I had said and then asked me to go away for a moment so that he could think. I turned around and went out quietly, knowing this is unusual. The elderly man was learned, and he could immediately give an answer to almost all questions about religion, history, and culture. After waiting for a little while outside the door, the elderly man called me back.



llama_print_timings:        load time =   683.86 ms
llama_print_timings:      sample time =    67.93 ms /   107 runs   (    0.63 ms per token,  1575.20 tokens per second)
llama_print_timings: prompt eval time =   683.81 ms /   113 tokens (    6.05 ms per token,   165.25 tokens per second)
llama_print_timings:        eval time =  9808.02 ms /   106 runs   (   92.53 ms per token,    10.81 tokens per second)
llama_print_timings:       total time = 10797.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I feel there is only one possibility.



llama_print_timings:        load time =   572.54 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2248.88 tokens per second)
llama_print_timings: prompt eval time =   572.51 ms /    42 tokens (   13.63 ms per token,    73.36 tokens per second)
llama_print_timings:        eval time =   708.05 ms /     8 runs   (   88.51 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  1298.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What? What could it be?” “Is this a religion where the deity wants his believers to pray to other religions’ gods to save them?”



llama_print_timings:        load time =   589.48 ms
llama_print_timings:      sample time =    16.10 ms /    35 runs   (    0.46 ms per token,  2174.45 tokens per second)
llama_print_timings: prompt eval time =   589.44 ms /    56 tokens (   10.53 ms per token,    95.01 tokens per second)
llama_print_timings:        eval time =  2956.76 ms /    34 runs   (   86.96 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  3617.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Her lord is a real being.



llama_print_timings:        load time =   571.22 ms
llama_print_timings:      sample time =     3.55 ms /     8 runs   (    0.44 ms per token,  2254.79 tokens per second)
llama_print_timings: prompt eval time =   571.17 ms /    39 tokens (   14.65 ms per token,    68.28 tokens per second)
llama_print_timings:        eval time =   594.11 ms /     7 runs   (   84.87 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1180.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm a little confused by your words: "So, Buddha doesn't exist?" I immediately realized my mistake and apologized.



llama_print_timings:        load time =   612.59 ms
llama_print_timings:      sample time =    14.00 ms /    31 runs   (    0.45 ms per token,  2213.81 tokens per second)
llama_print_timings: prompt eval time =   612.55 ms /    57 tokens (   10.75 ms per token,    93.05 tokens per second)
llama_print_timings:        eval time =  2594.78 ms /    30 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3271.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Elder shrugged his hands and said, "I have already told you that I cannot talk about Buddhism with you. The existence of the Buddha is beyond your understanding; on the other hand, what she refers to as 'main', exists in a manner that you can understand. As for this matter, I am unable to say anything more. Just listen to my advice and do not follow her."



llama_print_timings:        load time =   674.10 ms
llama_print_timings:      sample time =    38.58 ms /    87 runs   (    0.44 ms per token,  2255.29 tokens per second)
llama_print_timings: prompt eval time =   674.05 ms /    90 tokens (    7.49 ms per token,   133.52 tokens per second)
llama_print_timings:        eval time =  7337.70 ms /    86 runs   (   85.32 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  8185.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Why?



llama_print_timings:        load time =   572.25 ms
llama_print_timings:      sample time =     1.34 ms /     3 runs   (    0.45 ms per token,  2240.48 tokens per second)
llama_print_timings: prompt eval time =   572.21 ms /    33 tokens (   17.34 ms per token,    57.67 tokens per second)
llama_print_timings:        eval time =   164.88 ms /     2 runs   (   82.44 ms per token,    12.13 tokens per second)
llama_print_timings:       total time =   742.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I just feel like there are something about her that I can't imagine.



llama_print_timings:        load time =   582.39 ms
llama_print_timings:      sample time =     7.68 ms /    17 runs   (    0.45 ms per token,  2212.97 tokens per second)
llama_print_timings: prompt eval time =   582.35 ms /    46 tokens (   12.66 ms per token,    78.99 tokens per second)
llama_print_timings:        eval time =  1357.65 ms /    16 runs   (   84.85 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  1974.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As I walked out of the temple and made my way to my residence, the night was full of a moon that looked at me with a bizarre silver eye. The light from the moon was chilly, as if it were staring at me in horror.



llama_print_timings:        load time =   626.93 ms
llama_print_timings:      sample time =    25.13 ms /    55 runs   (    0.46 ms per token,  2188.45 tokens per second)
llama_print_timings: prompt eval time =   626.88 ms /    77 tokens (    8.14 ms per token,   122.83 tokens per second)
llama_print_timings:        eval time =  4599.35 ms /    54 runs   (   85.17 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  5339.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The next day, I still went out with Xin Yufen — but I never expected the next few years to be like a dream. Xin Yufen kept her promise and had made enough money. We married each other simply because it was more convenient for us to live together instead of having too many expectations in our lives. In those peaceful years, my days can be summed up with just one word: quiet. At the villa where we were living, I had everything I needed to focus on my research on the trilogy problem. Xin Yufen never interfered with my life at all. She didn't mind me bringing a woman home as long as she could learn about my research. We only exchanged words about this problem every day.



llama_print_timings:        load time =   933.84 ms
llama_print_timings:      sample time =    96.69 ms /   158 runs   (    0.61 ms per token,  1634.16 tokens per second)
llama_print_timings: prompt eval time =   933.81 ms /   237 tokens (    3.94 ms per token,   253.80 tokens per second)
llama_print_timings:        eval time = 14705.51 ms /   157 runs   (   93.67 ms per token,    10.68 tokens per second)
llama_print_timings:       total time = 16078.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Do you know what else Xin Yufei does?" asked Dashisi.



llama_print_timings:        load time =   588.53 ms
llama_print_timings:      sample time =     8.88 ms /    19 runs   (    0.47 ms per token,  2140.60 tokens per second)
llama_print_timings: prompt eval time =   588.49 ms /    46 tokens (   12.79 ms per token,    78.17 tokens per second)
llama_print_timings:        eval time =  1532.54 ms /    18 runs   (   85.14 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2159.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Just that ‘scientific boundary’, she is always busy with it and people come and go in her home every day.



llama_print_timings:        load time =   595.33 ms
llama_print_timings:      sample time =    12.28 ms /    27 runs   (    0.45 ms per token,  2198.16 tokens per second)
llama_print_timings: prompt eval time =   595.29 ms /    53 tokens (   11.23 ms per token,    89.03 tokens per second)
llama_print_timings:        eval time =  2216.66 ms /    26 runs   (   85.26 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  2868.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Did she invite you to join the club?”



llama_print_timings:        load time =   573.99 ms
llama_print_timings:      sample time =     5.27 ms /    12 runs   (    0.44 ms per token,  2275.31 tokens per second)
llama_print_timings: prompt eval time =   573.95 ms /    39 tokens (   14.72 ms per token,    67.95 tokens per second)
llama_print_timings:        eval time =   943.83 ms /    11 runs   (   85.80 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1541.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

From never, she even did not talk to me about these things, and I do not care about them. I am just like this person, who is unwilling to concern more than things. She deeply knows this, saying that I am an inadequate person with no mission and would be disturbed by such things, where they are not suitable for my research.



llama_print_timings:        load time =   631.91 ms
llama_print_timings:      sample time =    33.56 ms /    76 runs   (    0.44 ms per token,  2264.47 tokens per second)
llama_print_timings: prompt eval time =   631.88 ms /    80 tokens (    7.90 ms per token,   126.61 tokens per second)
llama_print_timings:        eval time =  6447.06 ms /    75 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  7229.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Any progress on the Three-Body research?" asked Wang Miao.



llama_print_timings:        load time =   580.47 ms
llama_print_timings:      sample time =     7.11 ms /    16 runs   (    0.44 ms per token,  2250.04 tokens per second)
llama_print_timings: prompt eval time =   580.43 ms /    44 tokens (   13.19 ms per token,    75.81 tokens per second)
llama_print_timings:        eval time =  1284.53 ms /    15 runs   (   85.64 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  1898.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As for the general situation of this research field, progress is breakthrough-like. In recent years, Richard Montgomery and Santa Cruz from Paris 7th University, as well as researchers at France's Institutions des Hautes Études Numériques (IHÉN), have found a possible stable configuration for the three-body problem using an algorithm called "approximation method": under appropriate initial conditions, the trajectory of the three bodies will form an 8 shape. Later, people were fascinated by finding this special stable state and only found about three or four such states so far. In fact, I've already found more than one hundred types of stable states using evolutionary algorithms, which would make a postmodern art exhibition if I put them together. But that is not my goal; the true solution to the three-body problem is to establish a mathematical model that can predict all movement states of the system in the future with just knowing its initial motion vector at any time slice. That is w


llama_print_timings:        load time =   910.67 ms
llama_print_timings:      sample time =   129.78 ms /   222 runs   (    0.58 ms per token,  1710.55 tokens per second)
llama_print_timings: prompt eval time =   910.63 ms /   233 tokens (    3.91 ms per token,   255.87 tokens per second)
llama_print_timings:        eval time = 20343.74 ms /   221 runs   (   92.05 ms per token,    10.86 tokens per second)
llama_print_timings:       total time = 21841.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, my calm life ended yesterday as I encountered some problems.



llama_print_timings:        load time =   586.36 ms
llama_print_timings:      sample time =     6.66 ms /    14 runs   (    0.48 ms per token,  2102.73 tokens per second)
llama_print_timings: prompt eval time =   586.33 ms /    42 tokens (   13.96 ms per token,    71.63 tokens per second)
llama_print_timings:        eval time =  1133.33 ms /    13 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  1749.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is this the case you want to report?” asked Dashizhi.



llama_print_timings:        load time =   585.40 ms
llama_print_timings:      sample time =     7.45 ms /    17 runs   (    0.44 ms per token,  2282.19 tokens per second)
llama_print_timings: prompt eval time =   585.38 ms /    42 tokens (   13.94 ms per token,    71.75 tokens per second)
llama_print_timings:        eval time =  1385.82 ms /    16 runs   (   86.61 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  2003.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, a man called yesterday and said that if I didn’t stop my research on the Three-Body Problem immediately, he would kill me.”



llama_print_timings:        load time =   586.43 ms
llama_print_timings:      sample time =    14.84 ms /    32 runs   (    0.46 ms per token,  2155.75 tokens per second)
llama_print_timings: prompt eval time =   586.39 ms /    53 tokens (   11.06 ms per token,    90.38 tokens per second)
llama_print_timings:        eval time =  2654.14 ms /    31 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3305.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who is that person?



llama_print_timings:        load time =   573.31 ms
llama_print_timings:      sample time =     2.69 ms /     6 runs   (    0.45 ms per token,  2232.14 tokens per second)
llama_print_timings: prompt eval time =   573.27 ms /    35 tokens (   16.38 ms per token,    61.05 tokens per second)
llama_print_timings:        eval time =   419.96 ms /     5 runs   (   83.99 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  1004.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know.



llama_print_timings:        load time =   571.76 ms
llama_print_timings:      sample time =     3.12 ms /     7 runs   (    0.45 ms per token,  2244.31 tokens per second)
llama_print_timings: prompt eval time =   571.73 ms /    33 tokens (   17.33 ms per token,    57.72 tokens per second)
llama_print_timings:        eval time =   506.68 ms /     6 runs   (   84.45 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1091.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What's your phone number?”



llama_print_timings:        load time =   566.54 ms
llama_print_timings:      sample time =     4.01 ms /     9 runs   (    0.45 ms per token,  2243.27 tokens per second)
llama_print_timings: prompt eval time =   566.52 ms /    33 tokens (   17.17 ms per token,    58.25 tokens per second)
llama_print_timings:        eval time =   675.58 ms /     8 runs   (   84.45 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1259.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know, my phone doesn't have an incoming call indicator.



llama_print_timings:        load time =   585.60 ms
llama_print_timings:      sample time =     7.99 ms /    18 runs   (    0.44 ms per token,  2253.66 tokens per second)
llama_print_timings: prompt eval time =   585.56 ms /    41 tokens (   14.28 ms per token,    70.02 tokens per second)
llama_print_timings:        eval time =  1414.23 ms /    17 runs   (   83.19 ms per token,    12.02 tokens per second)
llama_print_timings:       total time =  2036.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Other matters?



llama_print_timings:        load time =   571.93 ms
llama_print_timings:      sample time =     1.78 ms /     4 runs   (    0.44 ms per token,  2253.52 tokens per second)
llama_print_timings: prompt eval time =   571.89 ms /    36 tokens (   15.89 ms per token,    62.95 tokens per second)
llama_print_timings:        eval time =   235.08 ms /     3 runs   (   78.36 ms per token,    12.76 tokens per second)
llama_print_timings:       total time =   814.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know.



llama_print_timings:        load time =   564.38 ms
llama_print_timings:      sample time =     3.07 ms /     7 runs   (    0.44 ms per token,  2279.39 tokens per second)
llama_print_timings: prompt eval time =   564.36 ms /    33 tokens (   17.10 ms per token,    58.47 tokens per second)
llama_print_timings:        eval time =   510.34 ms /     6 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  1087.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Laughing with a broad smile, he said, "After all that, just this sentence and a few others?"



llama_print_timings:        load time =   599.01 ms
llama_print_timings:      sample time =    11.84 ms /    25 runs   (    0.47 ms per token,  2112.20 tokens per second)
llama_print_timings: prompt eval time =   598.97 ms /    58 tokens (   10.33 ms per token,    96.83 tokens per second)
llama_print_timings:        eval time =  2002.09 ms /    24 runs   (   83.42 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  2655.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I didn't say anything, but if this is all you understand I won't come here anymore. I am lazy after all. Late tonight - oh, yes, at the time it was midnight - I don't know if it was yesterday or today because I fell asleep and woke up confused about my face feeling a cold thing moving, but when I opened my eyes I saw Shen Yufei, which really frightened me.”



llama_print_timings:        load time =   676.94 ms
llama_print_timings:      sample time =    69.45 ms /    96 runs   (    0.72 ms per token,  1382.25 tokens per second)
llama_print_timings: prompt eval time =   676.90 ms /   107 tokens (    6.33 ms per token,   158.07 tokens per second)
llama_print_timings:        eval time =  8777.25 ms /    95 runs   (   92.39 ms per token,    10.82 tokens per second)
llama_print_timings:       total time =  9766.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What's the most horrible thing you saw in bed at midnight?



llama_print_timings:        load time =   584.77 ms
llama_print_timings:      sample time =     7.56 ms /    17 runs   (    0.44 ms per token,  2247.49 tokens per second)
llama_print_timings: prompt eval time =   584.75 ms /    40 tokens (   14.62 ms per token,    68.41 tokens per second)
llama_print_timings:        eval time =  1426.27 ms /    16 runs   (   89.14 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  2043.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She looked at me with a look I had never seen before. Outside the garden, the light was shining on her face, making her look like a ghost. She held something in her hands. It was a gun! She scraped my face with the end of it and said that I must continue the study of Three-Body Problem, or she would kill me too.



llama_print_timings:        load time =   655.38 ms
llama_print_timings:      sample time =    35.75 ms /    80 runs   (    0.45 ms per token,  2237.64 tokens per second)
llama_print_timings: prompt eval time =   655.33 ms /    89 tokens (    7.36 ms per token,   135.81 tokens per second)
llama_print_timings:        eval time =  6701.57 ms /    79 runs   (   84.83 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  7516.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Well, it’s starting to make sense.” Big History nodded with satisfaction.



llama_print_timings:        load time =   583.01 ms
llama_print_timings:      sample time =     8.51 ms /    19 runs   (    0.45 ms per token,  2232.93 tokens per second)
llama_print_timings: prompt eval time =   582.97 ms /    52 tokens (   11.21 ms per token,    89.20 tokens per second)
llama_print_timings:        eval time =  1529.05 ms /    18 runs   (   84.95 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2149.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What does 'meaningful' mean? I have nowhere to go and I came to see you."



llama_print_timings:        load time =   578.73 ms
llama_print_timings:      sample time =    10.69 ms /    24 runs   (    0.45 ms per token,  2245.30 tokens per second)
llama_print_timings: prompt eval time =   578.69 ms /    47 tokens (   12.31 ms per token,    81.22 tokens per second)
llama_print_timings:        eval time =  2071.86 ms /    23 runs   (   90.08 ms per token,    11.10 tokens per second)
llama_print_timings:       total time =  2697.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Tell her what you said exactly as she told it to you.



llama_print_timings:        load time =   580.67 ms
llama_print_timings:      sample time =     7.11 ms /    15 runs   (    0.47 ms per token,  2109.70 tokens per second)
llama_print_timings: prompt eval time =   580.65 ms /    40 tokens (   14.52 ms per token,    68.89 tokens per second)
llama_print_timings:        eval time =  1175.69 ms /    14 runs   (   83.98 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  1788.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“She said to me, ‘If the Three Body Problem is solved successfully, you will be a savior; if it stops now, you will be guilty of humanity’s death. If there is anyone who saves or destroys humanity, his good deeds and bad deeds would both be equal to yours.’”Chairman Dai spat out thick smoke and stared at Wei Cheng for a while before pulling out a notebook from the messy table and picking up a pen.



llama_print_timings:        load time =   710.95 ms
llama_print_timings:      sample time =    48.46 ms /   107 runs   (    0.45 ms per token,  2208.10 tokens per second)
llama_print_timings: prompt eval time =   710.90 ms /   120 tokens (    5.92 ms per token,   168.80 tokens per second)
llama_print_timings:        eval time =  9088.73 ms /   106 runs   (   85.74 ms per token,    11.66 tokens per second)
llama_print_timings:       total time = 10019.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You said, "I don't want to do the transcript."  Repeat it once more.



llama_print_timings:        load time =   582.21 ms
llama_print_timings:      sample time =    10.45 ms /    23 runs   (    0.45 ms per token,  2202.01 tokens per second)
llama_print_timings: prompt eval time =   582.18 ms /    44 tokens (   13.23 ms per token,    75.58 tokens per second)
llama_print_timings:        eval time =  1899.11 ms /    22 runs   (   86.32 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  2527.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wei Cheng repeated what Wang Miao had said and asked, "Why exactly is it twice as much?"



llama_print_timings:        load time =   592.60 ms
llama_print_timings:      sample time =    10.76 ms /    24 runs   (    0.45 ms per token,  2230.90 tokens per second)
llama_print_timings: prompt eval time =   592.56 ms /    55 tokens (   10.77 ms per token,    92.82 tokens per second)
llama_print_timings:        eval time =  2005.48 ms /    23 runs   (   87.19 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2645.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It seems to be quite serious, " Wei Cheng blinked his eyes and said to Dashu. "The nurse who was on duty when I arrived apparently called for you."



llama_print_timings:        load time =   619.19 ms
llama_print_timings:      sample time =    18.33 ms /    41 runs   (    0.45 ms per token,  2237.01 tokens per second)
llama_print_timings: prompt eval time =   619.13 ms /    70 tokens (    8.84 ms per token,   113.06 tokens per second)
llama_print_timings:        eval time =  3409.68 ms /    40 runs   (   85.24 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  4111.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He nodded, “Another question: Do you think your wife's gun is real?” He couldn't answer, so he added, “Is it oiled?”



llama_print_timings:        load time =   616.11 ms
llama_print_timings:      sample time =    16.36 ms /    37 runs   (    0.44 ms per token,  2261.61 tokens per second)
llama_print_timings: prompt eval time =   616.06 ms /    68 tokens (    9.06 ms per token,   110.38 tokens per second)
llama_print_timings:        eval time =  3131.13 ms /    36 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  3820.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, there is definitely oil smell!


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Okay, ” said the large Shi sitting at the desk as he jumped down, “it’s a chance to search illegally holding firearms. That makes a poor justification, but we can do the paperwork later. Let’s go right away!” He turned to Wang Meng and said, "You'll have to go along with us for consultation." Then he looked at I Hsu without speaking for a while before finally saying, “Xu Xin, you are a specialist in information management, but today you’re going to join our outing.”



llama_print_timings:        load time =   797.32 ms
llama_print_timings:      sample time =    57.39 ms /   127 runs   (    0.45 ms per token,  2213.04 tokens per second)
llama_print_timings: prompt eval time =   797.27 ms /   167 tokens (    4.77 ms per token,   209.46 tokens per second)
llama_print_timings:        eval time = 11021.97 ms /   126 runs   (   87.48 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 12080.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   574.07 ms
llama_print_timings:      sample time =     0.44 ms /     1 runs   (    0.44 ms per token,  2262.44 tokens per second)
llama_print_timings: prompt eval time =   574.02 ms /    36 tokens (   15.95 ms per token,    62.72 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   575.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Six people, including Dash and Xiao Xu, who were chosen to perform the search mission, got into two patrol cars and drove through the darkest hours before dawn on the outskirts of the city.



llama_print_timings:        load time =   640.44 ms
llama_print_timings:      sample time =    21.25 ms /    48 runs   (    0.44 ms per token,  2258.40 tokens per second)
llama_print_timings: prompt eval time =   640.40 ms /    84 tokens (    7.62 ms per token,   131.17 tokens per second)
llama_print_timings:        eval time =  3994.83 ms /    47 runs   (   85.00 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  4731.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As soon as the car started, Xu Bingbing and Wang Moxue sat in the rear seats, Xu whispered to Wang, "Wang teacher, you are very popular in the Three Body Problem."



llama_print_timings:        load time =   615.66 ms
llama_print_timings:      sample time =    20.78 ms /    46 runs   (    0.45 ms per token,  2214.20 tokens per second)
llama_print_timings: prompt eval time =   615.64 ms /    70 tokens (    8.79 ms per token,   113.70 tokens per second)
llama_print_timings:        eval time =  3919.57 ms /    45 runs   (   87.10 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  4628.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yet another person mentioned Three-Body Problem, which excited Wang Miao. Suddenly he felt that the distance between him and the girl in police uniform had been shortened.



llama_print_timings:        load time =   606.13 ms
llama_print_timings:      sample time =    17.77 ms /    38 runs   (    0.47 ms per token,  2138.07 tokens per second)
llama_print_timings: prompt eval time =   606.10 ms /    59 tokens (   10.27 ms per token,    97.34 tokens per second)
llama_print_timings:        eval time =  3119.90 ms /    37 runs   (   84.32 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  3806.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do you play, too?



llama_print_timings:        load time =   573.96 ms
llama_print_timings:      sample time =     3.08 ms /     7 runs   (    0.44 ms per token,  2274.94 tokens per second)
llama_print_timings: prompt eval time =   573.92 ms /    34 tokens (   16.88 ms per token,    59.24 tokens per second)
llama_print_timings:        eval time =   516.79 ms /     6 runs   (   86.13 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  1104.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm in charge of monitoring and tracking it, a tough job.



llama_print_timings:        load time =   578.43 ms
llama_print_timings:      sample time =     7.53 ms /    17 runs   (    0.44 ms per token,  2258.54 tokens per second)
llama_print_timings: prompt eval time =   578.39 ms /    43 tokens (   13.45 ms per token,    74.34 tokens per second)
llama_print_timings:        eval time =  1363.65 ms /    16 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1975.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I can't wait to hear more about it.



llama_print_timings:        load time =   592.27 ms
llama_print_timings:      sample time =     5.32 ms /    12 runs   (    0.44 ms per token,  2255.64 tokens per second)
llama_print_timings: prompt eval time =   592.23 ms /    50 tokens (   11.84 ms per token,    84.43 tokens per second)
llama_print_timings:        eval time =   966.06 ms /    11 runs   (   87.82 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  1581.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Through the faint light outside the window, Wang Miao saw Xu Bingbing smile mysteriously.



llama_print_timings:        load time =   592.75 ms
llama_print_timings:      sample time =    10.63 ms /    24 runs   (    0.44 ms per token,  2257.97 tokens per second)
llama_print_timings: prompt eval time =   592.71 ms /    52 tokens (   11.40 ms per token,    87.73 tokens per second)
llama_print_timings:        eval time =  1912.46 ms /    23 runs   (   83.15 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =  2553.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We also want to know. But its servers are located in a foreign country, and the system and firewall are very secure. It's hard to get into it.” 



llama_print_timings:        load time =   661.35 ms
llama_print_timings:      sample time =    17.30 ms /    37 runs   (    0.47 ms per token,  2138.23 tokens per second)
llama_print_timings: prompt eval time =   661.30 ms /    94 tokens (    7.04 ms per token,   142.14 tokens per second)
llama_print_timings:        eval time =  3086.53 ms /    36 runs   (   85.74 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3827.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

There, is there any...  Wang Mo deliberately weighed his words. "It seems to be a sign of the supernatural."



llama_print_timings:        load time =   595.05 ms
llama_print_timings:      sample time =    13.23 ms /    29 runs   (    0.46 ms per token,  2191.66 tokens per second)
llama_print_timings: prompt eval time =   595.01 ms /    56 tokens (   10.63 ms per token,    94.12 tokens per second)
llama_print_timings:        eval time =  2376.44 ms /    28 runs   (   84.87 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3030.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We don't think so, many people have joined this game-programming event. It is widely held all over the world, and its development mode resembles Linux from several years ago. However, this time, we can be sure that they are using a very advanced development toolset. As for those information, no one knows where they came from, and it is really something … We still believe in the famous phrase of Professor Shu, everything must have been done by human hands.”



llama_print_timings:        load time =   694.85 ms
llama_print_timings:      sample time =    44.54 ms /   101 runs   (    0.44 ms per token,  2267.88 tokens per second)
llama_print_timings: prompt eval time =   694.81 ms /   120 tokens (    5.79 ms per token,   172.71 tokens per second)
llama_print_timings:        eval time =  8617.54 ms /   100 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  9515.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But the girl was still unpracticed. The last sentence made Wang Mo realize that she had been hiding a lot from herself. “That is what he really means?” Wang Mo looked at the car driving in front of them and said.



llama_print_timings:        load time =   622.50 ms
llama_print_timings:      sample time =    22.66 ms /    50 runs   (    0.45 ms per token,  2206.82 tokens per second)
llama_print_timings: prompt eval time =   622.46 ms /    70 tokens (    8.89 ms per token,   112.46 tokens per second)
llama_print_timings:        eval time =  4119.37 ms /    49 runs   (   84.07 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  4843.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   949.29 ms
llama_print_timings:      sample time =     0.94 ms /     2 runs   (    0.47 ms per token,  2129.93 tokens per second)
llama_print_timings: prompt eval time =   949.25 ms /   252 tokens (    3.77 ms per token,   265.47 tokens per second)
llama_print_timings:        eval time =    84.46 ms /     1 runs   (   84.46 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1037.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao came in, he happened to meet a male colleague and the police officer running out, heading into a room on the opposite side with its lights on but windows closed. She heard car engines starting outside. A male police officer started making phone calls while another female colleague stood nearby nervously. He quickly returned to his post, sliding his gun back into his belt, and told that male colleague who was holding a phone: “Black Santana with just one person in it; let them target the entrance of the fifth ring.” He looked around as if he had seen several holes on the wall and some shells scattered on the ground. He said, "The other guy shot five times, missed three times; she fired two shots, missed both." Then, sitting down with that male colleague to inspect the body. Xiao Xing remained far away from him, secretly looking at Weicheng who stood beside her. Da Shi also looked over and saw Xiao Xing and Weicheng.



llama_print_timings:        load time =   939.18 ms
llama_print_timings:      sample time =   126.59 ms /   214 runs   (    0.59 ms per token,  1690.56 tokens per second)
llama_print_timings: prompt eval time =   939.14 ms /   246 tokens (    3.82 ms per token,   261.94 tokens per second)
llama_print_timings:        eval time = 19747.18 ms /   213 runs   (   92.71 ms per token,    10.79 tokens per second)
llama_print_timings:       total time = 21264.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He was a little shocked and sad, but only to the extent of his nature, which hadn't been broken yet. In comparison with Wang Meng, he looked calmer.



llama_print_timings:        load time =   638.70 ms
llama_print_timings:      sample time =    17.22 ms /    39 runs   (    0.44 ms per token,  2264.41 tokens per second)
llama_print_timings: prompt eval time =   638.66 ms /    68 tokens (    9.39 ms per token,   106.47 tokens per second)
llama_print_timings:        eval time =  3251.80 ms /    38 runs   (   85.57 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3967.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You seem to be indifferent. Perhaps the person who came here is going to kill you.



llama_print_timings:        load time =   589.46 ms
llama_print_timings:      sample time =     8.87 ms /    20 runs   (    0.44 ms per token,  2256.06 tokens per second)
llama_print_timings: prompt eval time =   589.42 ms /    50 tokens (   11.79 ms per token,    84.83 tokens per second)
llama_print_timings:        eval time =  1603.48 ms /    19 runs   (   84.39 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  2232.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wei Cheng actually smiled. "How can I do anything? At this point, I'm actually quite ignorant about her. Not only have I advised her to live simply several times in the past, but… thinking back to what the long-bearded elderly person told me that night."



llama_print_timings:        load time =   647.68 ms
llama_print_timings:      sample time =    28.83 ms /    64 runs   (    0.45 ms per token,  2220.14 tokens per second)
llama_print_timings: prompt eval time =   647.64 ms /    81 tokens (    8.00 ms per token,   125.07 tokens per second)
llama_print_timings:        eval time =  5376.78 ms /    63 runs   (   85.35 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  6156.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Stand up, walk to Wei Cheng's front and light a cigarette,” Dashi said. “You must have something else you haven't told us?”



llama_print_timings:        load time =   605.86 ms
llama_print_timings:      sample time =    16.66 ms /    37 runs   (    0.45 ms per token,  2220.76 tokens per second)
llama_print_timings: prompt eval time =   605.81 ms /    57 tokens (   10.63 ms per token,    94.09 tokens per second)
llama_print_timings:        eval time =  3080.53 ms /    36 runs   (   85.57 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3762.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Some things, I don't want to talk about.



llama_print_timings:        load time =   583.17 ms
llama_print_timings:      sample time =     5.80 ms /    13 runs   (    0.45 ms per token,  2239.83 tokens per second)
llama_print_timings: prompt eval time =   583.14 ms /    38 tokens (   15.35 ms per token,    65.16 tokens per second)
llama_print_timings:        eval time =  1006.05 ms /    12 runs   (   83.84 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1615.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You need to work harder now!”



llama_print_timings:        load time =   580.59 ms
llama_print_timings:      sample time =     4.10 ms /     9 runs   (    0.46 ms per token,  2195.12 tokens per second)
llama_print_timings: prompt eval time =   580.55 ms /    40 tokens (   14.51 ms per token,    68.90 tokens per second)
llama_print_timings:        eval time =   691.35 ms /     8 runs   (   86.42 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  1291.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Well, ” thought Wei Cheng, “she was quarreling with a man in the living room this afternoon. That man is Pan Han, who is famous as an environmentalist.”



llama_print_timings:        load time =   651.73 ms
llama_print_timings:      sample time =    18.42 ms /    40 runs   (    0.46 ms per token,  2171.08 tokens per second)
llama_print_timings: prompt eval time =   651.69 ms /    91 tokens (    7.16 ms per token,   139.64 tokens per second)
llama_print_timings:        eval time =  3357.78 ms /    39 runs   (   86.10 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  4093.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Say it in the original way.”



llama_print_timings:        load time =   571.89 ms
llama_print_timings:      sample time =     4.49 ms /    10 runs   (    0.45 ms per token,  2227.67 tokens per second)
llama_print_timings: prompt eval time =   571.85 ms /    37 tokens (   15.46 ms per token,    64.70 tokens per second)
llama_print_timings:        eval time =   791.32 ms /     9 runs   (   87.92 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  1382.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Okay. Pan Bans says: We people who appear to have come together on the surface are actually two enemies at war! Shen Yufei agrees, saying: Yes, you human beings resist us by using the power of God. Pan Bans replies: That is not entirely without reason; we need God's降临世界 and punish those who deserve to be punished, whereas you are blocking this from happening; so, we find ourselves in opposition and will never stop until you do! Shen Yufei says: Let those demons enter the organization? I think your director has gone blind! Pan Bans stops speaking loudly after that.



llama_print_timings:        load time =   829.90 ms
llama_print_timings:      sample time =   118.95 ms /   141 runs   (    0.84 ms per token,  1185.40 tokens per second)
llama_print_timings: prompt eval time =   829.86 ms /   188 tokens (    4.41 ms per token,   226.55 tokens per second)
llama_print_timings:        eval time = 13836.28 ms /   140 runs   (   98.83 ms per token,    10.12 tokens per second)
llama_print_timings:       total time = 15205.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who does his voice remind you of?



llama_print_timings:        load time =   582.69 ms
llama_print_timings:      sample time =     4.42 ms /    10 runs   (    0.44 ms per token,  2264.49 tokens per second)
llama_print_timings: prompt eval time =   582.65 ms /    42 tokens (   13.87 ms per token,    72.08 tokens per second)
llama_print_timings:        eval time =   759.85 ms /     9 runs   (   84.43 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1362.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know, I couldn't hear it clearly because the voice was very soft.



llama_print_timings:        load time =   590.73 ms
llama_print_timings:      sample time =     9.59 ms /    21 runs   (    0.46 ms per token,  2190.47 tokens per second)
llama_print_timings: prompt eval time =   590.69 ms /    48 tokens (   12.31 ms per token,    81.26 tokens per second)
llama_print_timings:        eval time =  1701.82 ms /    20 runs   (   85.09 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2336.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

And more police cars honked outside, and a group of police wearing white gloves and holding cameras walked up the stairs. Dashi made Wang Miao go back to rest, but Wang Miao found Wei Cheng in the room where there was a small machine.



llama_print_timings:        load time =   657.18 ms
llama_print_timings:      sample time =    26.15 ms /    58 runs   (    0.45 ms per token,  2217.97 tokens per second)
llama_print_timings: prompt eval time =   657.14 ms /    87 tokens (    7.55 ms per token,   132.39 tokens per second)
llama_print_timings:        eval time =  4928.11 ms /    57 runs   (   86.46 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  5704.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Could you please give me a summary of the model for the Three-Body Problem evolution algorithm? I want to introduce it in an event. This is kind of abrupt and if possible, just ignore it.



llama_print_timings:        load time =   613.12 ms
llama_print_timings:      sample time =    19.11 ms /    43 runs   (    0.44 ms per token,  2250.25 tokens per second)
llama_print_timings: prompt eval time =   613.07 ms /    65 tokens (    9.43 ms per token,   106.02 tokens per second)
llama_print_timings:        eval time =  3550.11 ms /    42 runs   (   84.53 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  4249.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wei Cheng hands a 3-inch CD to Wang Miao. "Everything is in here, the models and all the supporting documents. If you want to help me out, just use your name and publish this. It would really help me."



llama_print_timings:        load time =   621.58 ms
llama_print_timings:      sample time =    24.19 ms /    54 runs   (    0.45 ms per token,  2232.70 tokens per second)
llama_print_timings: prompt eval time =   621.55 ms /    78 tokens (    7.97 ms per token,   125.49 tokens per second)
llama_print_timings:        eval time =  4547.20 ms /    53 runs   (   85.80 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  5278.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```noinput



llama_print_timings:        load time =   577.76 ms
llama_print_timings:      sample time =     1.82 ms /     4 runs   (    0.45 ms per token,  2201.43 tokens per second)
llama_print_timings: prompt eval time =   577.72 ms /    37 tokens (   15.61 ms per token,    64.04 tokens per second)
llama_print_timings:        eval time =   234.60 ms /     3 runs   (   78.20 ms per token,    12.79 tokens per second)
llama_print_timings:       total time =   820.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wei Cheng pointed at the CD in Wang Miao's hands and said, "Wang Professor, I had noticed you even before you arrived. You are a good person with a sense of responsibility, so, I am still urging you to stay away from this thing. The world is about to undergo a tremendous change, and it is important that everyone can live out their lives peacefully and comfortably, without worrying about anything else."



llama_print_timings:        load time =   683.55 ms
llama_print_timings:      sample time =    63.92 ms /    96 runs   (    0.67 ms per token,  1501.90 tokens per second)
llama_print_timings: prompt eval time =   683.51 ms /   107 tokens (    6.39 ms per token,   156.55 tokens per second)
llama_print_timings:        eval time =  8752.38 ms /    95 runs   (   92.13 ms per token,    10.85 tokens per second)
llama_print_timings:       total time =  9727.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You seem to know more about it.



llama_print_timings:        load time =   591.07 ms
llama_print_timings:      sample time =     4.23 ms /     9 runs   (    0.47 ms per token,  2129.17 tokens per second)
llama_print_timings: prompt eval time =   591.03 ms /    38 tokens (   15.55 ms per token,    64.29 tokens per second)
llama_print_timings:        eval time =   676.80 ms /     8 runs   (   84.60 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1288.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I can't help but know everything about her.”



llama_print_timings:        load time =   618.11 ms
llama_print_timings:      sample time =     5.87 ms /    13 runs   (    0.45 ms per token,  2215.03 tokens per second)
llama_print_timings: prompt eval time =   618.07 ms /    39 tokens (   15.85 ms per token,    63.10 tokens per second)
llama_print_timings:        eval time =  1029.26 ms /    12 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  1673.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Why not tell the police?”



llama_print_timings:        load time =   575.72 ms
llama_print_timings:      sample time =     3.61 ms /     8 runs   (    0.45 ms per token,  2214.84 tokens per second)
llama_print_timings: prompt eval time =   575.70 ms /    37 tokens (   15.56 ms per token,    64.27 tokens per second)
llama_print_timings:        eval time =   595.45 ms /     7 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  1186.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Steve shrugs his shoulders and says, “Nah, the police are nothing more than a joke. Even if God comes back, it won't be enough. At this point, humans have already reached ‘no answer from Heaven, no response on earth’.”



llama_print_timings:        load time =   616.02 ms
llama_print_timings:      sample time =    26.15 ms /    58 runs   (    0.45 ms per token,  2218.31 tokens per second)
llama_print_timings: prompt eval time =   615.97 ms /    71 tokens (    8.68 ms per token,   115.27 tokens per second)
llama_print_timings:        eval time =  4874.94 ms /    57 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  5609.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Steven stands by the window facing east, and a strange dawn starts to reveal in the sky behind the high buildings of the city. He doesn't know why, but it makes him think about that peculiar sunrise he once saw when entering 'The Three-Body Problem'.



llama_print_timings:        load time =   616.43 ms
llama_print_timings:      sample time =    26.40 ms /    59 runs   (    0.45 ms per token,  2234.59 tokens per second)
llama_print_timings: prompt eval time =   616.39 ms /    72 tokens (    8.56 ms per token,   116.81 tokens per second)
llama_print_timings:        eval time =  5032.70 ms /    58 runs   (   86.77 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  5769.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm not as detached as I seem. These past few nights I haven't been able to sleep, and when I wake up in the morning and look at this beautiful sunrise, I can only see it as dusk.



llama_print_timings:        load time =   651.14 ms
llama_print_timings:      sample time =    24.12 ms /    53 runs   (    0.46 ms per token,  2197.62 tokens per second)
llama_print_timings: prompt eval time =   651.11 ms /    91 tokens (    7.16 ms per token,   139.76 tokens per second)
llama_print_timings:        eval time =  4390.15 ms /    52 runs   (   84.43 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  5151.61 ms


translated 118.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The second level of the plot starts with a scene that is very similar to before, but this time the shape of the golden pyramid is restored to its traditional Chinese form.



llama_print_timings:        load time =   623.73 ms
llama_print_timings:      sample time =    16.66 ms /    37 runs   (    0.45 ms per token,  2221.02 tokens per second)
llama_print_timings: prompt eval time =   623.70 ms /    70 tokens (    8.91 ms per token,   112.23 tokens per second)
llama_print_timings:        eval time =  3096.63 ms /    36 runs   (   86.02 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3793.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Stop it! Stop it!”



llama_print_timings:        load time =   871.07 ms
llama_print_timings:      sample time =     3.56 ms /     8 runs   (    0.44 ms per token,  2249.72 tokens per second)
llama_print_timings: prompt eval time =   871.02 ms /   213 tokens (    4.09 ms per token,   244.54 tokens per second)
llama_print_timings:        eval time =   615.20 ms /     7 runs   (   87.89 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =  1501.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Stop it! You two boring people! Don't you have any sense of responsibility at all? If the world doesn't have a future, what honor do you have!



llama_print_timings:        load time =   610.45 ms
llama_print_timings:      sample time =    17.46 ms /    38 runs   (    0.46 ms per token,  2176.28 tokens per second)
llama_print_timings: prompt eval time =   610.41 ms /    59 tokens (   10.35 ms per token,    96.66 tokens per second)
llama_print_timings:        eval time =  3199.69 ms /    37 runs   (   86.48 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3886.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The two swordsmen paid him no attention and concentrated on their fight. The taller one suddenly cried out in pain, and his sword fell to the ground with a "dongling" sound. Then, the other one ran towards the unsuccessful one and spat at his back.



llama_print_timings:        load time =   655.06 ms
llama_print_timings:      sample time =    27.46 ms /    62 runs   (    0.44 ms per token,  2257.99 tokens per second)
llama_print_timings: prompt eval time =   655.03 ms /    82 tokens (    7.99 ms per token,   125.19 tokens per second)
llama_print_timings:        eval time =  5188.75 ms /    61 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  5967.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“呸，无耻之徒！” he bent over to pick up his wig, looking at Wang Meng and pointing at the escapee’s direction with his sword. $“He竟然说是微积分是由他发明的! ”



llama_print_timings:        load time =   679.31 ms
llama_print_timings:      sample time =    22.81 ms /    51 runs   (    0.45 ms per token,  2235.57 tokens per second)
llama_print_timings: prompt eval time =   679.23 ms /   111 tokens (    6.12 ms per token,   163.42 tokens per second)
llama_print_timings:        eval time =  4315.91 ms /    50 runs   (   86.32 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  5095.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is that the one who ran away?” Wang Miao asked.



llama_print_timings:        load time =   577.82 ms
llama_print_timings:      sample time =     7.05 ms /    15 runs   (    0.47 ms per token,  2127.36 tokens per second)
llama_print_timings: prompt eval time =   577.78 ms /    48 tokens (   12.04 ms per token,    83.08 tokens per second)
llama_print_timings:        eval time =  1184.49 ms /    14 runs   (   84.61 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  1793.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“He, the scoundrel! I don't care about this title. It has already made me a person second only to God by discovering the three laws of motion and the scientific tools such as calculus that rely on these laws. Now that we have calculus which can be used to understand the laws of solar orbit and cell division, mastering the orbits of three suns is within our reach.”



llama_print_timings:        load time =   683.54 ms
llama_print_timings:      sample time =    37.65 ms /    84 runs   (    0.45 ms per token,  2231.02 tokens per second)
llama_print_timings: prompt eval time =   683.49 ms /   110 tokens (    6.21 ms per token,   160.94 tokens per second)
llama_print_timings:        eval time =  6968.75 ms /    83 runs   (   83.96 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  7821.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That's not so simple,” the mediator said. “Have you considered the computational cost? I have looked at your list of differential equations and it seems impossible to obtain a formal solution, only a numerical one, with such huge computation requirements that even if the whole world's mathematician worked day and night for all eternity would be not enough. Of course, unless we can learn the rules of sunrise, the end of the world may not come soon.”



llama_print_timings:        load time =   756.28 ms
llama_print_timings:      sample time =    44.39 ms /   100 runs   (    0.44 ms per token,  2252.66 tokens per second)
llama_print_timings: prompt eval time =   756.24 ms /   144 tokens (    5.25 ms per token,   190.42 tokens per second)
llama_print_timings:        eval time =  8452.11 ms /    99 runs   (   85.37 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  9406.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You traveled all the way from the East to solve these equations. Didn't you say this is what you came for?



llama_print_timings:        load time =   682.04 ms
llama_print_timings:      sample time =    12.84 ms /    29 runs   (    0.44 ms per token,  2258.57 tokens per second)
llama_print_timings: prompt eval time =   682.00 ms /   109 tokens (    6.26 ms per token,   159.82 tokens per second)
llama_print_timings:        eval time =  2406.41 ms /    28 runs   (   85.94 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  3147.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Does the computer have to come from the East?" Wang Mo asked F. Nietzsche.



llama_print_timings:        load time =   595.91 ms
llama_print_timings:      sample time =     9.76 ms /    22 runs   (    0.44 ms per token,  2255.25 tokens per second)
llama_print_timings: prompt eval time =   595.88 ms /    52 tokens (   11.46 ms per token,    87.27 tokens per second)
llama_print_timings:        eval time =  1833.58 ms /    21 runs   (   87.31 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2472.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Noi



llama_print_timings:        load time =   593.87 ms
llama_print_timings:      sample time =     2.19 ms /     5 runs   (    0.44 ms per token,  2285.19 tokens per second)
llama_print_timings: prompt eval time =   593.83 ms /    53 tokens (   11.20 ms per token,    89.25 tokens per second)
llama_print_timings:        eval time =   360.33 ms /     4 runs   (   90.08 ms per token,    11.10 tokens per second)
llama_print_timings:       total time =   963.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You don't know computers? Well, what are you going to use to do all those calculations then?



llama_print_timings:        load time =   586.09 ms
llama_print_timings:      sample time =    10.16 ms /    23 runs   (    0.44 ms per token,  2263.33 tokens per second)
llama_print_timings: prompt eval time =   586.06 ms /    48 tokens (   12.21 ms per token,    81.90 tokens per second)
llama_print_timings:        eval time =  1910.98 ms /    22 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  2542.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Noi



llama_print_timings:        load time =   622.89 ms
llama_print_timings:      sample time =     2.18 ms /     5 runs   (    0.44 ms per token,  2289.38 tokens per second)
llama_print_timings: prompt eval time =   622.85 ms /    72 tokens (    8.65 ms per token,   115.60 tokens per second)
llama_print_timings:        eval time =   319.75 ms /     4 runs   (   79.94 ms per token,    12.51 tokens per second)
llama_print_timings:       total time =   952.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You said that mathematicians are not enough.



llama_print_timings:        load time =   577.27 ms
llama_print_timings:      sample time =     4.65 ms /    10 runs   (    0.47 ms per token,  2150.54 tokens per second)
llama_print_timings: prompt eval time =   577.24 ms /    43 tokens (   13.42 ms per token,    74.49 tokens per second)
llama_print_timings:        eval time =   786.16 ms /     9 runs   (   87.35 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  1383.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We don't need mathematicians, we need ordinary people. But the number of them we need is huge! At least 30 million! This is the strategy of the army of mathematics.



llama_print_timings:        load time =   598.63 ms
llama_print_timings:      sample time =    20.14 ms /    41 runs   (    0.49 ms per token,  2035.95 tokens per second)
llama_print_timings: prompt eval time =   598.59 ms /    61 tokens (    9.81 ms per token,   101.91 tokens per second)
llama_print_timings:        eval time =  3427.42 ms /    40 runs   (   85.69 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  4116.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Are you kidding me?” Wang Mo was surprised. “I thought we were living in a society where only 10% of the population had college degrees. I would have believed you if you told me there are thirty million mathematicians.”



llama_print_timings:        load time =   630.40 ms
llama_print_timings:      sample time =    23.11 ms /    52 runs   (    0.44 ms per token,  2249.91 tokens per second)
llama_print_timings: prompt eval time =   630.36 ms /    75 tokens (    8.40 ms per token,   118.98 tokens per second)
llama_print_timings:        eval time =  4330.87 ms /    51 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  5065.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Have you heard a joke about the Chung army?” asked F.N. Fritzsch, taking out his cigar and lighting it up. “The soldiers practised marches because they were so illiterate that even the officers couldn’t understand two one to command, so the officer thought of a solution: every soldier put their left foot into a straw sandal and their right foot into a leather sandal when walking in formation, so we needed those kind of soldiers. But we had to pay them three thousand golden marks.”



llama_print_timings:        load time =   729.73 ms
llama_print_timings:      sample time =    50.70 ms /   114 runs   (    0.44 ms per token,  2248.65 tokens per second)
llama_print_timings: prompt eval time =   729.68 ms /   133 tokens (    5.49 ms per token,   182.27 tokens per second)
llama_print_timings:        eval time =  9624.05 ms /   113 runs   (   85.17 ms per token,    11.74 tokens per second)
llama_print_timings:       total time = 10581.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Hearing this modern joke, Wang Mo knew that the person in front of him was not a computer program but a human being, and it could be almost certainly Chinese.



llama_print_timings:        load time =   623.83 ms
llama_print_timings:      sample time =    16.51 ms /    37 runs   (    0.45 ms per token,  2240.66 tokens per second)
llama_print_timings: prompt eval time =   623.79 ms /    54 tokens (   11.55 ms per token,    86.57 tokens per second)
llama_print_timings:        eval time =  3111.27 ms /    36 runs   (   86.42 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  3808.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Such a large army, it is hard to imagine,” said Wang Mo with a shake of his head.



llama_print_timings:        load time =   602.66 ms
llama_print_timings:      sample time =    11.76 ms /    25 runs   (    0.47 ms per token,  2125.67 tokens per second)
llama_print_timings: prompt eval time =   602.62 ms /    47 tokens (   12.82 ms per token,    77.99 tokens per second)
llama_print_timings:        eval time =  2057.18 ms /    24 runs   (   85.72 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2713.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“So let’s find Emperor Qin,” Newton pointed at the pyramid.



llama_print_timings:        load time =   588.22 ms
llama_print_timings:      sample time =     8.47 ms /    19 runs   (    0.45 ms per token,  2242.42 tokens per second)
llama_print_timings: prompt eval time =   588.20 ms /    47 tokens (   12.51 ms per token,    79.91 tokens per second)
llama_print_timings:        eval time =  1561.17 ms /    18 runs   (   86.73 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2187.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is he still in power?” Wang Moxiang looked around and asked, noting that the soldiers guarding the entrance to the pyramid were wearing simple bronze plate armors, carrying halberds. In the course of reading Liu Cixin’s Three-Body Problem, Wang Moxiang had long become used to the chaotic history of this universe.



llama_print_timings:        load time =   667.76 ms
llama_print_timings:      sample time =    47.05 ms /    81 runs   (    0.58 ms per token,  1721.43 tokens per second)
llama_print_timings: prompt eval time =   667.72 ms /    96 tokens (    6.96 ms per token,   143.77 tokens per second)
llama_print_timings:        eval time =  7004.03 ms /    80 runs   (   87.55 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  7883.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They both followed him to the pyramid entrance. "The world will now be ruled by him, with a large army of more than three million ready to invade Europe. Let us go see him."



llama_print_timings:        load time =   653.44 ms
llama_print_timings:      sample time =    20.32 ms /    44 runs   (    0.46 ms per token,  2165.57 tokens per second)
llama_print_timings: prompt eval time =   653.39 ms /    91 tokens (    7.18 ms per token,   139.27 tokens per second)
llama_print_timings:        eval time =  3663.71 ms /    43 runs   (   85.20 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  4409.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They entered the entrance, walked to the end of the courtyard and were about to enter the main hall. A guard insisted that they remove their clothes, but Newton objected saying we are famous scholars, without dark tools! Both sides stood still when a low male voice came from inside the main hall: "Is it these Westerners who discovered three laws of motion? Let them in." The three scholars entered the main hall and saw Qin Gongzi was walking inside. Long dresses and the famous sword were dragged on the ground. He turned to look at the scholars, and Wang Meng immediately noticed that the eyes of Emperor Xi Zhi and Pope Gregory III were there.



llama_print_timings:        load time =   774.90 ms
llama_print_timings:      sample time =    67.59 ms /   149 runs   (    0.45 ms per token,  2204.44 tokens per second)
llama_print_timings: prompt eval time =   774.86 ms /   161 tokens (    4.81 ms per token,   207.78 tokens per second)
llama_print_timings:        eval time = 12738.92 ms /   148 runs   (   86.07 ms per token,    11.62 tokens per second)
llama_print_timings:       total time = 13825.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I know your intentions. Why don't you go find Caesar? He rules over a vast empire and should be able to gather three million men.



llama_print_timings:        load time =   633.28 ms
llama_print_timings:      sample time =    15.38 ms /    34 runs   (    0.45 ms per token,  2210.38 tokens per second)
llama_print_timings: prompt eval time =   633.24 ms /    66 tokens (    9.59 ms per token,   104.23 tokens per second)
llama_print_timings:        eval time =  2749.61 ms /    33 runs   (   83.32 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  3453.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But, respected emperor, do you know what kind of army that is? Do you know how the empire looks like now? In the great city of Rome, the rivers are heavily polluted passing through it. What is the cause of this pollution?



llama_print_timings:        load time =   622.25 ms
llama_print_timings:      sample time =    26.56 ms /    55 runs   (    0.48 ms per token,  2071.10 tokens per second)
llama_print_timings: prompt eval time =   622.21 ms /    74 tokens (    8.41 ms per token,   118.93 tokens per second)
llama_print_timings:        eval time =  4672.83 ms /    54 runs   (   86.53 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  5413.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

**“Military enterprises?”**



llama_print_timings:        load time =   569.52 ms
llama_print_timings:      sample time =     5.07 ms /    11 runs   (    0.46 ms per token,  2170.48 tokens per second)
llama_print_timings: prompt eval time =   569.50 ms /    34 tokens (   16.75 ms per token,    59.70 tokens per second)
llama_print_timings:        eval time =   875.37 ms /    10 runs   (   87.54 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  1468.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, not the great emperor, but the vomit of a gluttonous Roman! The nobles dined at the table and had cots carried to take their leisure time after meals. The entire empire was plunged into debauchery. It was impossible for them to possess such great talent and strength to do this calculation.”



llama_print_timings:        load time =   678.99 ms
llama_print_timings:      sample time =    33.56 ms /    74 runs   (    0.45 ms per token,  2204.87 tokens per second)
llama_print_timings: prompt eval time =   678.95 ms /   108 tokens (    6.29 ms per token,   159.07 tokens per second)
llama_print_timings:        eval time =  6193.47 ms /    73 runs   (   84.84 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  7024.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I know this, ” said the Emperor of China, “but Caesar is coming to, and he is restoring his armies. The wisdom of the West can also be very dreadful, you are not more intelligent than the Easterners, but can think in ways we cannot. For instance, he sees three suns; you could figure out these three laws, which are remarkable, but we can’t for now. And, while I can't go to Western lands by sea, my boats won’t work, and it would be too long for our supply lines.”



llama_print_timings:        load time =   698.99 ms
llama_print_timings:      sample time =    54.69 ms /   122 runs   (    0.45 ms per token,  2230.88 tokens per second)
llama_print_timings: prompt eval time =   698.94 ms /   125 tokens (    5.59 ms per token,   178.84 tokens per second)
llama_print_timings:        eval time = 10496.11 ms /   121 runs   (   86.74 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 11441.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“So, Your Majesty, your empire will continue to develop!” Von Neumann timely said, “If you can master the laws of the sun’s movement, you can use every epoch in an efficient manner and avoid losses caused by chaotic epochs. Please trust us. We are scholars; we can accurately predict the movements of the sun with three laws and calculus without caring who rules over the world.”



llama_print_timings:        load time =   687.40 ms
llama_print_timings:      sample time =    40.12 ms /    90 runs   (    0.45 ms per token,  2243.49 tokens per second)
llama_print_timings: prompt eval time =   687.36 ms /   113 tokens (    6.08 ms per token,   164.40 tokens per second)
llama_print_timings:        eval time =  7723.72 ms /    89 runs   (   86.78 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  8594.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I certainly need to predict the movement of the sun, but you have me gather three million troops first and show me how this calculation is done.



llama_print_timings:        load time =   599.01 ms
llama_print_timings:      sample time =    13.27 ms /    30 runs   (    0.44 ms per token,  2260.23 tokens per second)
llama_print_timings: prompt eval time =   598.98 ms /    61 tokens (    9.82 ms per token,   101.84 tokens per second)
llama_print_timings:        eval time =  2484.38 ms /    29 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3144.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Your Majesty, please give me three soldiers and I will demonstrate for you.” Fritz von Neumann excitedly.



llama_print_timings:        load time =   587.46 ms
llama_print_timings:      sample time =    10.73 ms /    24 runs   (    0.45 ms per token,  2236.09 tokens per second)
llama_print_timings: prompt eval time =   587.43 ms /    52 tokens (   11.30 ms per token,    88.52 tokens per second)
llama_print_timings:        eval time =  1959.90 ms /    23 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  2594.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Three? Just three? I can easily give you 3,000. 



llama_print_timings:        load time =   602.37 ms
llama_print_timings:      sample time =     8.42 ms /    19 runs   (    0.44 ms per token,  2256.80 tokens per second)
llama_print_timings: prompt eval time =   602.35 ms /    61 tokens (    9.87 ms per token,   101.27 tokens per second)
llama_print_timings:        eval time =  1579.63 ms /    18 runs   (   87.76 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  2221.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The great emperor, you mentioned earlier that the Orientals are defective in scientific thinking because they did not realize that complex things in the universe are actually made up of the simplest units. I just need three to please you, your majesty.



llama_print_timings:        load time =   623.65 ms
llama_print_timings:      sample time =    22.73 ms /    51 runs   (    0.45 ms per token,  2244.22 tokens per second)
llama_print_timings: prompt eval time =   623.61 ms /    68 tokens (    9.17 ms per token,   109.04 tokens per second)
llama_print_timings:        eval time =  4166.79 ms /    50 runs   (   83.34 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  4891.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Qin Shi Huang waved his hand and called three soldiers. They were all young, as the other soldiers of Qin did, and they acted like machines that obeyed their orders exactly.



llama_print_timings:        load time =   609.92 ms
llama_print_timings:      sample time =    18.95 ms /    42 runs   (    0.45 ms per token,  2216.01 tokens per second)
llama_print_timings: prompt eval time =   609.90 ms /    61 tokens (   10.00 ms per token,   100.02 tokens per second)
llama_print_timings:        eval time =  3516.50 ms /    41 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4213.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know your names.



llama_print_timings:        load time =   698.72 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2252.25 tokens per second)
llama_print_timings: prompt eval time =   698.67 ms /   120 tokens (    5.82 ms per token,   171.75 tokens per second)
llama_print_timings:        eval time =   722.02 ms /     8 runs   (   90.25 ms per token,    11.08 tokens per second)
llama_print_timings:       total time =  1438.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Huh, you can make them form a paraboloid attack formation, ” said Qin Shi Huang contemptuously. Newton took out six white flags and gave one each to three soldiers. The whites represented 0 and the blacks represented 1. “Okay, listen up, if both enter l and enter 2 are white flag, raise your hands; otherwise, raise white flag.”



llama_print_timings:        load time =   788.44 ms
llama_print_timings:      sample time =    38.81 ms /    86 runs   (    0.45 ms per token,  2215.64 tokens per second)
llama_print_timings: prompt eval time =   788.40 ms /   168 tokens (    4.69 ms per token,   213.09 tokens per second)
llama_print_timings:        eval time =  7352.40 ms /    85 runs   (   86.50 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  8318.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I think you should change the color, white flag represents surrender.



llama_print_timings:        load time =   578.28 ms
llama_print_timings:      sample time =     6.36 ms /    14 runs   (    0.45 ms per token,  2200.22 tokens per second)
llama_print_timings: prompt eval time =   578.24 ms /    47 tokens (   12.30 ms per token,    81.28 tokens per second)
llama_print_timings:        eval time =  1131.56 ms /    13 runs   (   87.04 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  1739.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ferdinand von Ney, who was excitedly, did not respond to the emperor, but shouted loudly at three soldiers: "Start running now! Into number 1 into number 2, each of you raise a flag, well, raise! Well, raise again! Raising!"



llama_print_timings:        load time =   633.17 ms
llama_print_timings:      sample time =    43.81 ms /    65 runs   (    0.67 ms per token,  1483.82 tokens per second)
llama_print_timings: prompt eval time =   633.13 ms /    75 tokens (    8.44 ms per token,   118.46 tokens per second)
llama_print_timings:        eval time =  5695.91 ms /    64 runs   (   89.00 ms per token,    11.24 tokens per second)
llama_print_timings:       total time =  6529.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the first round, both In 1 and In 2 raised their flags three times. The first time was black-black, the second time was white-black, and the third time was black-white. Both reacted appropriately to out and lifted up a flag once.



llama_print_timings:        load time =   620.94 ms
llama_print_timings:      sample time =    26.30 ms /    59 runs   (    0.45 ms per token,  2242.92 tokens per second)
llama_print_timings: prompt eval time =   620.91 ms /    69 tokens (    9.00 ms per token,   111.13 tokens per second)
llama_print_timings:        eval time =  4904.09 ms /    58 runs   (   84.55 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  5647.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Excellent, running correctly, Your Majesty, your soldiers are very clever!"



llama_print_timings:        load time =   594.45 ms
llama_print_timings:      sample time =     8.28 ms /    18 runs   (    0.46 ms per token,  2173.39 tokens per second)
llama_print_timings: prompt eval time =   594.41 ms /    44 tokens (   13.51 ms per token,    74.02 tokens per second)
llama_print_timings:        eval time =  1457.00 ms /    17 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2089.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is something even an idiot can do, what are they up to?” Qin Shi Huang asked with confusion.



llama_print_timings:        load time =   584.40 ms
llama_print_timings:      sample time =    12.33 ms /    28 runs   (    0.44 ms per token,  2270.15 tokens per second)
llama_print_timings: prompt eval time =   584.36 ms /    53 tokens (   11.03 ms per token,    90.70 tokens per second)
llama_print_timings:        eval time =  2306.90 ms /    27 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  2946.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“These three people make up a component of the computer system, which is a kind of ‘gates’.” F.N. said and then stopped for a moment so that the Emperor could understand.



llama_print_timings:        load time =   618.86 ms
llama_print_timings:      sample time =    19.23 ms /    43 runs   (    0.45 ms per token,  2236.67 tokens per second)
llama_print_timings: prompt eval time =   618.82 ms /    66 tokens (    9.38 ms per token,   106.65 tokens per second)
llama_print_timings:        eval time =  3556.97 ms /    42 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  4263.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Qin Shi Huang said with a stoic expression, “That’s terrible. Continue.”



llama_print_timings:        load time =   582.16 ms
llama_print_timings:      sample time =    10.17 ms /    23 runs   (    0.44 ms per token,  2261.78 tokens per second)
llama_print_timings: prompt eval time =   582.12 ms /    49 tokens (   11.88 ms per token,    84.17 tokens per second)
llama_print_timings:        eval time =  1851.70 ms /    22 runs   (   84.17 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  2478.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

冯·诺伊曼 turned to the three soldiers in a triangular formation and said, “We construct the next component. You, out, just watch if there is anyone raising either Flag 1 or Flag 2 - the situation can have three combinations - black-black, white-black, black-white, and the last combination - white-white. If so, you raise the flag of color that appears more often; if not, you raise the white flag. Understand? Good boy, you are very clever. The correct operation of this gate component is key, do a good job, and the emperor will award you! Let's start running: Raise! Good, raise again! Excellent, the operation runs normally. Your Majesty, this gate component is called either Gate or Or Gate.”



llama_print_timings:        load time =   775.74 ms
llama_print_timings:      sample time =   100.06 ms /   168 runs   (    0.60 ms per token,  1678.93 tokens per second)
llama_print_timings: prompt eval time =   775.69 ms /   157 tokens (    4.94 ms per token,   202.40 tokens per second)
llama_print_timings:        eval time = 15106.10 ms /   167 runs   (   90.46 ms per token,    11.06 tokens per second)
llama_print_timings:       total time = 16343.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Then von Neumann built three gates (and-gate, or-gate and xor-gate), the simplest of which used only two soldiers.



llama_print_timings:        load time =   648.51 ms
llama_print_timings:      sample time =    13.83 ms /    31 runs   (    0.45 ms per token,  2242.15 tokens per second)
llama_print_timings: prompt eval time =   648.48 ms /    82 tokens (    7.91 ms per token,   126.45 tokens per second)
llama_print_timings:        eval time =  2592.65 ms /    30 runs   (   86.42 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  3304.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Noiemann bowed to the emperor and said, “Now, Your Majesty, all of the door components have been demonstrated, which is very simple. After just an hour’s training, any three soldiers can master it.”



llama_print_timings:        load time =   622.89 ms
llama_print_timings:      sample time =    22.46 ms /    49 runs   (    0.46 ms per token,  2181.85 tokens per second)
llama_print_timings: prompt eval time =   622.85 ms /    69 tokens (    9.03 ms per token,   110.78 tokens per second)
llama_print_timings:        eval time =  4087.88 ms /    48 runs   (   85.16 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  4814.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Do they need to learn anything more?" Emperor Qin asked.



llama_print_timings:        load time =   577.75 ms
llama_print_timings:      sample time =     6.94 ms /    15 runs   (    0.46 ms per token,  2161.69 tokens per second)
llama_print_timings: prompt eval time =   577.71 ms /    43 tokens (   13.44 ms per token,    74.43 tokens per second)
llama_print_timings:        eval time =  1192.58 ms /    14 runs   (   85.18 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  1801.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We don’t need to, we assemble a million of these gate pieces and combine them into a system that can carry out the operations needed to solve the differential equations for predicting solar motion. And we call this system … uh, let me think a moment … “



llama_print_timings:        load time =   643.09 ms
llama_print_timings:      sample time =    45.77 ms /    57 runs   (    0.80 ms per token,  1245.33 tokens per second)
llama_print_timings: prompt eval time =   643.04 ms /    84 tokens (    7.66 ms per token,   130.63 tokens per second)
llama_print_timings:        eval time =  5381.71 ms /    56 runs   (   96.10 ms per token,    10.41 tokens per second)
llama_print_timings:       total time =  6230.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The computer, Wang Miao said.



llama_print_timings:        load time =   570.97 ms
llama_print_timings:      sample time =     4.06 ms /     9 runs   (    0.45 ms per token,  2218.93 tokens per second)
llama_print_timings: prompt eval time =   570.93 ms /    39 tokens (   14.64 ms per token,    68.31 tokens per second)
llama_print_timings:        eval time =   738.23 ms /     8 runs   (   92.28 ms per token,    10.84 tokens per second)
llama_print_timings:       total time =  1327.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Yes, " replied von Neumann, holding up his finger. "Computer, what a great name! The whole system is actually just a huge machine, the most complicated ever built!".



llama_print_timings:        load time =   621.40 ms
llama_print_timings:      sample time =    18.63 ms /    41 runs   (    0.45 ms per token,  2201.22 tokens per second)
llama_print_timings: prompt eval time =   621.38 ms /    74 tokens (    8.40 ms per token,   119.09 tokens per second)
llama_print_timings:        eval time =  3495.21 ms /    40 runs   (   87.38 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  4201.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The game is speeding up. It has been three months since the last time I played.



llama_print_timings:        load time =   569.63 ms
llama_print_timings:      sample time =     9.41 ms /    20 runs   (    0.47 ms per token,  2124.72 tokens per second)
llama_print_timings: prompt eval time =   569.59 ms /    36 tokens (   15.82 ms per token,    63.20 tokens per second)
llama_print_timings:        eval time =  1614.73 ms /    19 runs   (   84.99 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2226.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the top of the pyramid, Qin Shi Huang, Newton, Noyaman and Wang Mu stood together. There were a lot of astronomical observatory instruments for them, some of which are European modern equipment. Under them, the grand formations of 3 million soldiers spread over the vast land. It seemed like a giant carpet composed of three million soldiers' mannequins on which birds mistook and ran away in panic or circled Wang Mu. In his mind, if the entire human race stood up in such a formation, the area would be only as big as Pudong New Area in Shanghai, but the power of this formation showed how fragile civilization was.



llama_print_timings:        load time =   893.64 ms
llama_print_timings:      sample time =    66.56 ms /   148 runs   (    0.45 ms per token,  2223.49 tokens per second)
llama_print_timings: prompt eval time =   893.59 ms /   226 tokens (    3.95 ms per token,   252.91 tokens per second)
llama_print_timings:        eval time = 12832.51 ms /   147 runs   (   87.30 ms per token,    11.46 tokens per second)
llama_print_timings:       total time = 14033.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Your army is truly unparalleled, Your Majesty. In such a short timeframe, you managed to complete such a complex training program.”



llama_print_timings:        load time =   607.63 ms
llama_print_timings:      sample time =    13.99 ms /    31 runs   (    0.45 ms per token,  2215.24 tokens per second)
llama_print_timings: prompt eval time =   607.60 ms /    64 tokens (    9.49 ms per token,   105.33 tokens per second)
llama_print_timings:        eval time =  2443.88 ms /    30 runs   (   81.46 ms per token,    12.28 tokens per second)
llama_print_timings:       total time =  3115.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Although it is complex as a whole, but each soldier should do something very simple compared to the training of breaking up the Macedonian phalanx.



llama_print_timings:        load time =   627.90 ms
llama_print_timings:      sample time =    15.91 ms /    34 runs   (    0.47 ms per token,  2137.02 tokens per second)
llama_print_timings: prompt eval time =   627.86 ms /    73 tokens (    8.60 ms per token,   116.27 tokens per second)
llama_print_timings:        eval time =  2842.30 ms /    33 runs   (   86.13 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3542.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

God bless us, that this may be a long epoch.



llama_print_timings:        load time =   584.87 ms
llama_print_timings:      sample time =     7.37 ms /    15 runs   (    0.49 ms per token,  2034.17 tokens per second)
llama_print_timings: prompt eval time =   584.83 ms /    49 tokens (   11.94 ms per token,    83.79 tokens per second)
llama_print_timings:        eval time =  1181.12 ms /    14 runs   (   84.37 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  1801.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Even in the Age of Chaos, my army still trains,” said Emperor Qin proudly, glancing at his soldiers. “In the future, they will complete your calculations for you.”



llama_print_timings:        load time =   643.76 ms
llama_print_timings:      sample time =    18.75 ms /    42 runs   (    0.45 ms per token,  2240.60 tokens per second)
llama_print_timings: prompt eval time =   643.72 ms /    68 tokens (    9.47 ms per token,   105.64 tokens per second)
llama_print_timings:        eval time =  3380.86 ms /    41 runs   (   82.46 ms per token,    12.13 tokens per second)
llama_print_timings:       total time =  4109.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“So, please give the great order of your Majesty!” said Von Neumann excitedly.



llama_print_timings:        load time =   602.46 ms
llama_print_timings:      sample time =     8.86 ms /    20 runs   (    0.44 ms per token,  2258.36 tokens per second)
llama_print_timings: prompt eval time =   602.42 ms /    56 tokens (   10.76 ms per token,    92.96 tokens per second)
llama_print_timings:        eval time =  1577.73 ms /    19 runs   (   83.04 ms per token,    12.04 tokens per second)
llama_print_timings:       total time =  2220.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"I will rule the world with this sword. I will rule the world."



llama_print_timings:        load time =   669.53 ms
llama_print_timings:      sample time =     7.50 ms /    17 runs   (    0.44 ms per token,  2266.36 tokens per second)
llama_print_timings: prompt eval time =   669.49 ms /    94 tokens (    7.12 ms per token,   140.41 tokens per second)
llama_print_timings:        eval time =  1397.31 ms /    16 runs   (   87.33 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2101.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Go to the computer queue!"



llama_print_timings:        load time =   578.03 ms
llama_print_timings:      sample time =     3.61 ms /     8 runs   (    0.45 ms per token,  2218.52 tokens per second)
llama_print_timings: prompt eval time =   577.99 ms /    36 tokens (   16.06 ms per token,    62.29 tokens per second)
llama_print_timings:        eval time =   566.82 ms /     7 runs   (   80.97 ms per token,    12.35 tokens per second)
llama_print_timings:       total time =  1162.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The bronze dishes of the four corners of the pyramid simultaneously ignite, and soldiers standing on the slope face to a square formation sing the Emperor's commands in chorus.



llama_print_timings:        load time =   626.02 ms
llama_print_timings:      sample time =    17.78 ms /    39 runs   (    0.46 ms per token,  2194.09 tokens per second)
llama_print_timings: prompt eval time =   626.00 ms /    74 tokens (    8.46 ms per token,   118.21 tokens per second)
llama_print_timings:        eval time =  3295.79 ms /    38 runs   (   86.73 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  4003.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A Computer Queue



llama_print_timings:        load time =   572.96 ms
llama_print_timings:      sample time =     2.25 ms /     5 runs   (    0.45 ms per token,  2225.19 tokens per second)
llama_print_timings: prompt eval time =   572.92 ms /    37 tokens (   15.48 ms per token,    64.58 tokens per second)
llama_print_timings:        eval time =   334.62 ms /     4 runs   (   83.65 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =   917.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The ground below, the color pattern began to appear disorderly disturbances, complex and refined circuit structure emerged and gradually filled the whole pattern. After ten minutes, a 36 square kilometers-sized computer motherboard appeared on the ground.



llama_print_timings:        load time =   627.24 ms
llama_print_timings:      sample time =    25.45 ms /    55 runs   (    0.46 ms per token,  2161.02 tokens per second)
llama_print_timings: prompt eval time =   627.22 ms /    74 tokens (    8.48 ms per token,   117.98 tokens per second)
llama_print_timings:        eval time =  4592.88 ms /    54 runs   (   85.05 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  5337.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Now, let me introduce the main components of this computer, ‘Guo Er Yi’. Here is the CPU, the core unit which processes instructions according to a set program written in our programming language. There are five battalion generals inside it who will carry out their tasks. And the part in the periphery represents the memory and is composed of small squares where each soldier can handle multiple flags simultaneously without being able to see what they do, which makes it possible for us to achieve the minimum requirement for RAM as we have trained soldiers to work together with different colors of flags, combining them into one single unit, and thus completing twenty times more operations in a single turn. Additionally, there is a bus that runs through the array connecting all components within the system. It transfers information from one point to another.”



llama_print_timings:        load time =   874.01 ms
llama_print_timings:      sample time =    99.81 ms /   169 runs   (    0.59 ms per token,  1693.17 tokens per second)
llama_print_timings: prompt eval time =   873.97 ms /   213 tokens (    4.10 ms per token,   243.72 tokens per second)
llama_print_timings:        eval time = 15512.55 ms /   168 runs   (   92.34 ms per token,    10.83 tokens per second)
llama_print_timings:       total time = 16849.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The bus structure is a great invention. New plug-ins can be attached to the bus very quickly and easily, which makes hardware expansion and upgrades of ‘Chin’ system很方便. Look further on the far side. Maybe you need a telescope to see clearly, but that's the external memory. We called it ‘hard disk’ because it was composed of 30 million highly educated people. They all have personal notebooks and pens, responsible for recording the operation results. Of course, their biggest work is as virtual memory, storing intermediate operation results, operating speed bottleneck in them. Here, the nearest place to us is the display array, which can display the main status parameters of computer operation.”



llama_print_timings:        load time =   841.75 ms
llama_print_timings:      sample time =    67.88 ms /   152 runs   (    0.45 ms per token,  2239.41 tokens per second)
llama_print_timings: prompt eval time =   841.72 ms /   187 tokens (    4.50 ms per token,   222.17 tokens per second)
llama_print_timings:        eval time = 13124.50 ms /   151 runs   (   86.92 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 14282.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Noway and Newton came to the table with a paper roll, and when they opened it, Wang Mu felt his head tightened and was expecting something different than he imagined, but there were only symbols smaller than an apple in his face. He was so dizzy that he could not concentrate on what he saw.



llama_print_timings:        load time =   677.00 ms
llama_print_timings:      sample time =    31.20 ms /    69 runs   (    0.45 ms per token,  2211.25 tokens per second)
llama_print_timings: prompt eval time =   676.96 ms /   112 tokens (    6.04 ms per token,   165.44 tokens per second)
llama_print_timings:        eval time =  5799.06 ms /    68 runs   (   85.28 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6618.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Your Majesty, this is the ‘Qin 1.0’ operating system we have developed, and our software will run on it.” Fan Noi-man pointed to the rank of computer equipment underneath him. “This is hardware, and what is written on the piece of paper here is software. It is similar to a piano and its sheet music in that sense.”



llama_print_timings:        load time =   831.20 ms
llama_print_timings:      sample time =    35.51 ms /    80 runs   (    0.44 ms per token,  2253.20 tokens per second)
llama_print_timings: prompt eval time =   831.16 ms /   192 tokens (    4.33 ms per token,   231.00 tokens per second)
llama_print_timings:        eval time =  6709.41 ms /    79 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  7705.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhēn Shǐhuáng nodded and said, “Okay, let's get started.”



llama_print_timings:        load time =   599.27 ms
llama_print_timings:      sample time =    11.09 ms /    25 runs   (    0.44 ms per token,  2253.88 tokens per second)
llama_print_timings: prompt eval time =   599.25 ms /    42 tokens (   14.27 ms per token,    70.09 tokens per second)
llama_print_timings:        eval time =  2027.82 ms /    24 runs   (   84.49 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  2678.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Werner Noiszmann, hands up to his head in a dignified fashion, proclaimed, "In the name of the Holy One, may this machine be started! The system self-test shall commence!"



llama_print_timings:        load time =   608.34 ms
llama_print_timings:      sample time =    20.20 ms /    45 runs   (    0.45 ms per token,  2227.94 tokens per second)
llama_print_timings: prompt eval time =   608.30 ms /    59 tokens (   10.31 ms per token,    96.99 tokens per second)
llama_print_timings:        eval time =  3787.60 ms /    44 runs   (   86.08 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  4487.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The middle of the pyramid, a flaghand is giving orders with flag codes. At this time, the giant main board under the three million people below it looks like being molten and full of fine ripples. That's thousands of thousands of small flags moving back and forth. In the display array close to the bottom of the pyramid, a green progress bar is extending which indicates the self-examination's progress. Ten minutes later, the progress bar reached its end.



llama_print_timings:        load time =   700.54 ms
llama_print_timings:      sample time =    47.37 ms /   104 runs   (    0.46 ms per token,  2195.48 tokens per second)
llama_print_timings: prompt eval time =   700.50 ms /   126 tokens (    5.56 ms per token,   179.87 tokens per second)
llama_print_timings:        eval time =  8809.40 ms /   103 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  9727.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The self-diagnosis is complete! The boot loader has been loaded.



llama_print_timings:        load time =   576.09 ms
llama_print_timings:      sample time =     7.57 ms /    17 runs   (    0.45 ms per token,  2245.11 tokens per second)
llama_print_timings: prompt eval time =   576.05 ms /    43 tokens (   13.40 ms per token,    74.65 tokens per second)
llama_print_timings:        eval time =  1358.40 ms /    16 runs   (   84.90 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1968.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the following, light soldiers move quickly on the system bus that stretches across human computers. The bus immediately becomes a turbulent river. This river splits into countless smaller streams as it enters the modules. Soon, the white and black flag waves evolve into turbulent surges in the whole motherboard. The center of CPU zone is the most turbulent, like burning powder. Suddenly, the powder appears to have burnt out and the disturbances on the CPU area become calm. Finally, they spread out rapidly with the motion of a frozen sea, leaving only some sporadic white dots flashing in the display array without any life.



llama_print_timings:        load time =   858.35 ms
llama_print_timings:      sample time =    82.15 ms /   145 runs   (    0.57 ms per token,  1765.06 tokens per second)
llama_print_timings: prompt eval time =   858.30 ms /   203 tokens (    4.23 ms per token,   236.51 tokens per second)
llama_print_timings:        eval time = 12926.52 ms /   144 runs   (   89.77 ms per token,    11.14 tokens per second)
llama_print_timings:       total time = 14169.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The system is frozen!” a signalman shouted. The cause of the failure was quickly determined, it was a gate circuit running erroneously in the CPU state register.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Fred. Noway.



llama_print_timings:        load time =   583.71 ms
llama_print_timings:      sample time =     3.16 ms /     7 runs   (    0.45 ms per token,  2214.49 tokens per second)
llama_print_timings: prompt eval time =   583.66 ms /    49 tokens (   11.91 ms per token,    83.95 tokens per second)
llama_print_timings:        eval time =   525.14 ms /     6 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1122.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Slow down!’ Newton waved his hand to stop the signal officer, turned around with a look of malice and said to Emperor Qinshi Huangdi, ‘Your Majesty, some measures should be taken for the faulty parts with high failure rate in order to maintain stable system operation.



llama_print_timings:        load time =   622.87 ms
llama_print_timings:      sample time =    28.13 ms /    63 runs   (    0.45 ms per token,  2239.52 tokens per second)
llama_print_timings: prompt eval time =   622.82 ms /    74 tokens (    8.42 ms per token,   118.81 tokens per second)
llama_print_timings:        eval time =  5248.36 ms /    62 runs   (   84.65 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  6000.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Qin Shi Huang said, "Replace faulty parts. All soldiers of the missing parts will be executed for causing trouble."



llama_print_timings:        load time =   604.91 ms
llama_print_timings:      sample time =    12.50 ms /    28 runs   (    0.45 ms per token,  2240.54 tokens per second)
llama_print_timings: prompt eval time =   604.87 ms /    59 tokens (   10.25 ms per token,    97.54 tokens per second)
llama_print_timings:        eval time =  2307.18 ms /    27 runs   (   85.45 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  2969.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Noi



llama_print_timings:        load time =   674.70 ms
llama_print_timings:      sample time =     2.26 ms /     5 runs   (    0.45 ms per token,  2211.41 tokens per second)
llama_print_timings: prompt eval time =   674.66 ms /   100 tokens (    6.75 ms per token,   148.22 tokens per second)
llama_print_timings:        eval time =   334.92 ms /     4 runs   (   83.73 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  1020.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Start the solar orbit simulation software 'ThreeBody l.0'! "Nelson exclaimed desperately, "Start the main control program! Load the differential module! Load the finite element module! … Initiate with the initial conditions parameters! Calculate start! !"



llama_print_timings:        load time =   650.82 ms
llama_print_timings:      sample time =    52.20 ms /    59 runs   (    0.88 ms per token,  1130.18 tokens per second)
llama_print_timings: prompt eval time =   650.77 ms /    87 tokens (    7.48 ms per token,   133.69 tokens per second)
llama_print_timings:        eval time =  5671.01 ms /    58 runs   (   97.78 ms per token,    10.23 tokens per second)
llama_print_timings:       total time =  6565.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The motherboard gleams with a shimmer of light, displaying array of flags in various colors that flicker in quick succession. The human row of computers begin their endless computation.



llama_print_timings:        load time =   600.64 ms
llama_print_timings:      sample time =    39.57 ms /    39 runs   (    1.01 ms per token,   985.64 tokens per second)
llama_print_timings: prompt eval time =   600.62 ms /    59 tokens (   10.18 ms per token,    98.23 tokens per second)
llama_print_timings:        eval time =  3907.78 ms /    38 runs   (  102.84 ms per token,     9.72 tokens per second)
llama_print_timings:       total time =  4693.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's really interesting, " said Qin Shi Huang, pointing at the magnificent computer. "Such simple actions by so many people result in such a complex creation! Europeans scream about how authoritarianism and totalitarianism have killed social creativity, but with strict discipline and a mass of people working together as one, great wisdom can also be produced."



llama_print_timings:        load time =   650.37 ms
llama_print_timings:      sample time =    46.19 ms /    82 runs   (    0.56 ms per token,  1775.47 tokens per second)
llama_print_timings: prompt eval time =   650.32 ms /    91 tokens (    7.15 ms per token,   139.93 tokens per second)
llama_print_timings:        eval time =  7202.58 ms /    81 runs   (   88.92 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  8067.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Great Emperor, this is the mechanical operation of a machine, not wisdom. These common and ignoble individuals are only 0s when put together with you as an 1; their overall meaning would not exist without it.



llama_print_timings:        load time =   619.13 ms
llama_print_timings:      sample time =    21.32 ms /    48 runs   (    0.44 ms per token,  2251.41 tokens per second)
llama_print_timings: prompt eval time =   619.10 ms /    76 tokens (    8.15 ms per token,   122.76 tokens per second)
llama_print_timings:        eval time =  3941.52 ms /    47 runs   (   83.86 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  4658.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Frau von Neumann looked at Newton and said, “If you predict something wrong according to your theories and mathematical models, we might as well all disappear into nothing.”



llama_print_timings:        load time =   622.86 ms
llama_print_timings:      sample time =    15.88 ms /    36 runs   (    0.44 ms per token,  2267.57 tokens per second)
llama_print_timings: prompt eval time =   622.79 ms /    73 tokens (    8.53 ms per token,   117.21 tokens per second)
llama_print_timings:        eval time =  3054.10 ms /    35 runs   (   87.26 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  3749.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yeah, that's right! We were nothing then.



llama_print_timings:        load time =   591.61 ms
llama_print_timings:      sample time =     7.04 ms /    15 runs   (    0.47 ms per token,  2130.38 tokens per second)
llama_print_timings: prompt eval time =   591.57 ms /    52 tokens (   11.38 ms per token,    87.90 tokens per second)
llama_print_timings:        eval time =  1159.39 ms /    14 runs   (   82.81 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =  1783.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As time passed, the computers ran for a total of one year and four months. After accounting for the time spent on debugging, the actual computing time was about a year and two months. During that time, only the wretched climate interrupted the computer's operation twice, but the storage device saved all the interrupt data from these points and successfully recovered operations from each interruption. When Emperor Qin Shi Huang and European scholars retook control of the pyramid's summit for the first time, the first stage of calculations had been completed. The results of this stage accurately described the path of the sun over the next two years.



llama_print_timings:        load time =   724.99 ms
llama_print_timings:      sample time =    61.34 ms /   138 runs   (    0.44 ms per token,  2249.79 tokens per second)
llama_print_timings: prompt eval time =   724.95 ms /   130 tokens (    5.58 ms per token,   179.32 tokens per second)
llama_print_timings:        eval time = 11785.42 ms /   137 runs   (   86.02 ms per token,    11.62 tokens per second)
llama_print_timings:       total time = 12795.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's a cold dawn, the countless torches shining on the vast motherboards have been extinguished, and "Qin 1.0" enters standby mode after completion, while the waves raging over the surface of the motherboards have become calm ripples.



llama_print_timings:        load time =   631.97 ms
llama_print_timings:      sample time =    27.77 ms /    62 runs   (    0.45 ms per token,  2232.87 tokens per second)
llama_print_timings: prompt eval time =   631.93 ms /    75 tokens (    8.43 ms per token,   118.68 tokens per second)
llama_print_timings:        eval time =  5259.02 ms /    61 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  6019.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fritz Noiemann and Newton presented a long scroll of results recording the operation results to Qin Shi Huangdi. Newton said, “Great Qin Shi Huangdi, ascalculation on day three had been finished, it is because according to calculation results that tonight this endless cold night will come to an end, we will usher in a Long-Duration Era for the first time, from the solar orbit parameters viewpoint, this Long-Duration Era would have a pleasant climate, so please let your kingdom rise again from your water and death.”



llama_print_timings:        load time =   734.24 ms
llama_print_timings:      sample time =    79.47 ms /   118 runs   (    0.67 ms per token,  1484.87 tokens per second)
llama_print_timings: prompt eval time =   734.20 ms /   132 tokens (    5.56 ms per token,   179.79 tokens per second)
llama_print_timings:        eval time = 10896.23 ms /   117 runs   (   93.13 ms per token,    10.74 tokens per second)
llama_print_timings:       total time = 12002.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“My country has never experienced water shortage since the calculation started!” said Emperor Qin. “I have spent all my country’s resources to maintain computer operation, which has cost countless lives through starvation, frozen, or overwork.” Pointing at the distance, he added, “From the edge of the mainboard, there are dozens of white lines radiating out into the sky and disappearing far away. That’s the route of goods being transported from each province to the mainboard,” said Emperor Qin.



llama_print_timings:        load time =   757.68 ms
llama_print_timings:      sample time =    51.84 ms /   114 runs   (    0.45 ms per token,  2199.07 tokens per second)
llama_print_timings: prompt eval time =   757.64 ms /   148 tokens (    5.12 ms per token,   195.34 tokens per second)
llama_print_timings:        eval time =  9643.42 ms /   113 runs   (   85.34 ms per token,    11.72 tokens per second)
llama_print_timings:       total time = 10641.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Your Majesty, you will find that this is worth it. After mastering the laws of solar motion, Qin will develop rapidly and soon become many times more powerful than when the calculation started.” Max Plank said.



llama_print_timings:        load time =   625.98 ms
llama_print_timings:      sample time =    20.47 ms /    46 runs   (    0.44 ms per token,  2247.19 tokens per second)
llama_print_timings: prompt eval time =   625.94 ms /    74 tokens (    8.46 ms per token,   118.22 tokens per second)
llama_print_timings:        eval time =  3858.47 ms /    45 runs   (   85.74 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4577.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As per calculation, the sun will rise soon. Enjoy your glory, Your Majesty!



llama_print_timings:        load time =   579.81 ms
llama_print_timings:      sample time =     9.13 ms /    20 runs   (    0.46 ms per token,  2189.38 tokens per second)
llama_print_timings: prompt eval time =   579.77 ms /    47 tokens (   12.34 ms per token,    81.07 tokens per second)
llama_print_timings:        eval time =  1597.05 ms /    19 runs   (   84.06 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  2219.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In response to Newton's words, the red sun rose from the horizon and illuminated the pyramid and computer array with golden light. The motherboard was engulfed in a wave of applause like a sea of waves.



llama_print_timings:        load time =   619.69 ms
llama_print_timings:      sample time =    22.92 ms /    51 runs   (    0.45 ms per token,  2224.94 tokens per second)
llama_print_timings: prompt eval time =   619.65 ms /    72 tokens (    8.61 ms per token,   116.19 tokens per second)
llama_print_timings:        eval time =  4258.03 ms /    50 runs   (   85.16 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  4983.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this time, a person ran here hurriedly. It is possible that he ran so fast that he fell to the ground when kneeling down and then lay on his belly. This was Shi Cheng, the astronomer of Qin state.



llama_print_timings:        load time =   608.98 ms
llama_print_timings:      sample time =    23.80 ms /    53 runs   (    0.45 ms per token,  2227.17 tokens per second)
llama_print_timings: prompt eval time =   608.94 ms /    63 tokens (    9.67 ms per token,   103.46 tokens per second)
llama_print_timings:        eval time =  4528.49 ms /    52 runs   (   87.09 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  5249.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“My Majesty, something is wrong with the calculation! A disaster is approaching!” He cried out.



llama_print_timings:        load time =   598.72 ms
llama_print_timings:      sample time =    11.39 ms /    22 runs   (    0.52 ms per token,  1930.67 tokens per second)
llama_print_timings: prompt eval time =   598.69 ms /    52 tokens (   11.51 ms per token,    86.86 tokens per second)
llama_print_timings:        eval time =  1779.29 ms /    21 runs   (   84.73 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  2431.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What are you talking about?” didn’t wait for Emperor Qin Shi Huang to answer, Newton kicked the astronomical minister in his foot. “Have you not noticed that the sun rose exactly as expected at the calculated time?”



llama_print_timings:        load time =   618.45 ms
llama_print_timings:      sample time =    28.29 ms /    51 runs   (    0.55 ms per token,  1802.76 tokens per second)
llama_print_timings: prompt eval time =   618.41 ms /    69 tokens (    8.96 ms per token,   111.58 tokens per second)
llama_print_timings:        eval time =  4414.07 ms /    50 runs   (   88.28 ms per token,    11.33 tokens per second)
llama_print_timings:       total time =  5160.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Can……” the prime minister half-stood up, pointing at the sun with his hand, “Is that a few suns?”



llama_print_timings:        load time =   601.62 ms
llama_print_timings:      sample time =    13.52 ms /    30 runs   (    0.45 ms per token,  2219.59 tokens per second)
llama_print_timings: prompt eval time =   601.58 ms /    53 tokens (   11.35 ms per token,    88.10 tokens per second)
llama_print_timings:        eval time =  2446.22 ms /    29 runs   (   84.35 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  3109.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“All people looking at the rising sun are bewildered,” F.Norman said. “Surely you, a Cambridge-educated doctor of philosophy and official minister, won’t be stupid enough not to understand the basics of maths – that the sun is indeed one thing and the temperature is comfortable.”



llama_print_timings:        load time =   639.23 ms
llama_print_timings:      sample time =    37.31 ms /    69 runs   (    0.54 ms per token,  1849.27 tokens per second)
llama_print_timings: prompt eval time =   639.19 ms /    79 tokens (    8.09 ms per token,   123.59 tokens per second)
llama_print_timings:        eval time =  5918.70 ms /    68 runs   (   87.04 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  6732.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, three! There are two more behind it!” exclaimed the minister as he choked.



llama_print_timings:        load time =   597.73 ms
llama_print_timings:      sample time =     9.47 ms /    21 runs   (    0.45 ms per token,  2218.47 tokens per second)
llama_print_timings: prompt eval time =   597.69 ms /    55 tokens (   10.87 ms per token,    92.02 tokens per second)
llama_print_timings:        eval time =  1719.32 ms /    20 runs   (   85.97 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  2361.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The people looked at the sun again and felt very perplexed by minister's words.



llama_print_timings:        load time =   577.43 ms
llama_print_timings:      sample time =     9.13 ms /    20 runs   (    0.46 ms per token,  2190.10 tokens per second)
llama_print_timings: prompt eval time =   577.39 ms /    43 tokens (   13.43 ms per token,    74.47 tokens per second)
llama_print_timings:        eval time =  1627.12 ms /    19 runs   (   85.64 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  2245.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The Imperial Observatory's observations indicate that an unprecedented ‘three-day conjoined’ has occurred, in which three suns move around our planet with the same angular speed. In this way, our planet and the three suns remain on a single line! Our world remains at the tip of that line forever!”



llama_print_timings:        load time =   660.27 ms
llama_print_timings:      sample time =    31.05 ms /    70 runs   (    0.44 ms per token,  2254.36 tokens per second)
llama_print_timings: prompt eval time =   660.23 ms /    94 tokens (    7.02 ms per token,   142.37 tokens per second)
llama_print_timings:        eval time =  5931.86 ms /    69 runs   (   85.97 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  6734.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You must have made a mistake, sir.



llama_print_timings:        load time =   578.08 ms
llama_print_timings:      sample time =     4.46 ms /    10 runs   (    0.45 ms per token,  2241.15 tokens per second)
llama_print_timings: prompt eval time =   578.04 ms /    46 tokens (   12.57 ms per token,    79.58 tokens per second)
llama_print_timings:        eval time =   839.90 ms /     9 runs   (   93.32 ms per token,    10.72 tokens per second)
llama_print_timings:       total time =  1438.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course! The observations were made by astronomers from the Imperial Observatory, among them Kepler and Hevelius. They used the world's largest telescope which had been imported from Europe.



llama_print_timings:        load time =   617.92 ms
llama_print_timings:      sample time =    19.94 ms /    42 runs   (    0.47 ms per token,  2106.64 tokens per second)
llama_print_timings: prompt eval time =   617.88 ms /    68 tokens (    9.09 ms per token,   110.05 tokens per second)
llama_print_timings:        eval time =  3565.82 ms /    41 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  4276.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Newton relaxed his astronomy minister and stood up with a delighted expression, Wang Mo found him white-faced. But he said happily to Qin Shihuangdi, "The most noble, the most respectable emperor, this is an auspicious sign of auspicious signs! Now that three suns are orbiting around our planet, your empire has become the center of the universe! This is God's reward for our efforts!" Then, he slipped away while everyone else was still bewildered. Soon after, someone reported that Mr. Newton had stolen a fast horse and gone missing.



llama_print_timings:        load time =   784.18 ms
llama_print_timings:      sample time =    60.49 ms /   134 runs   (    0.45 ms per token,  2215.13 tokens per second)
llama_print_timings: prompt eval time =   784.15 ms /   163 tokens (    4.81 ms per token,   207.87 tokens per second)
llama_print_timings:        eval time = 11466.64 ms /   133 runs   (   86.22 ms per token,    11.60 tokens per second)
llama_print_timings:       total time = 12534.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After a moment of tense silence, Wang Miao suddenly said, "Your Majesty, please draw your sword."



llama_print_timings:        load time =   595.87 ms
llama_print_timings:      sample time =    10.93 ms /    24 runs   (    0.46 ms per token,  2196.60 tokens per second)
llama_print_timings: prompt eval time =   595.82 ms /    54 tokens (   11.03 ms per token,    90.63 tokens per second)
llama_print_timings:        eval time =  1917.96 ms /    23 runs   (   83.39 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  2564.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What are you doing? asked Qin Shi Huang, but he still gestured to his sword-wielding soldier who immediately drew out a longsword.



llama_print_timings:        load time =   598.81 ms
llama_print_timings:      sample time =    20.45 ms /    36 runs   (    0.57 ms per token,  1760.31 tokens per second)
llama_print_timings: prompt eval time =   598.77 ms /    62 tokens (    9.66 ms per token,   103.54 tokens per second)
llama_print_timings:        eval time =  3065.34 ms /    35 runs   (   87.58 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  3758.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said, "Wave to me."



llama_print_timings:        load time =   576.93 ms
llama_print_timings:      sample time =     4.47 ms /    10 runs   (    0.45 ms per token,  2239.14 tokens per second)
llama_print_timings: prompt eval time =   576.89 ms /    42 tokens (   13.74 ms per token,    72.80 tokens per second)
llama_print_timings:        eval time =   779.08 ms /     9 runs   (   86.56 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  1375.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When he received the sword, Qin Shi Huang waved it a few times and was surprised by its lightness: “Hey, why is this so light?”



llama_print_timings:        load time =   594.73 ms
llama_print_timings:      sample time =    16.67 ms /    37 runs   (    0.45 ms per token,  2219.16 tokens per second)
llama_print_timings: prompt eval time =   594.69 ms /    56 tokens (   10.62 ms per token,    94.17 tokens per second)
llama_print_timings:        eval time =  3031.32 ms /    36 runs   (   84.20 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  3703.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The game's VR headset can't simulate the feeling of weightlessness, otherwise we would also feel very light.



llama_print_timings:        load time =   585.10 ms
llama_print_timings:      sample time =    12.76 ms /    28 runs   (    0.46 ms per token,  2195.05 tokens per second)
llama_print_timings: prompt eval time =   585.06 ms /    50 tokens (   11.70 ms per token,    85.46 tokens per second)
llama_print_timings:        eval time =  2358.60 ms /    27 runs   (   87.36 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3001.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Look below!” someone cried, and everyone looked down. They saw a team of mounted troops approaching at the foot of the pyramid; their horses seemed to float in mid-air, reaching about 3 feet from the ground only once every several yards. They also saw a few runners, who could easily jump 10 meters with one stride but landed slowly upon each jump. On the top of the pyramid, a guard tried to jump and successfully reached 3 feet high.



llama_print_timings:        load time =   717.53 ms
llama_print_timings:      sample time =    71.20 ms /   104 runs   (    0.68 ms per token,  1460.59 tokens per second)
llama_print_timings: prompt eval time =   717.48 ms /   130 tokens (    5.52 ms per token,   181.19 tokens per second)
llama_print_timings:        eval time =  9625.14 ms /   103 runs   (   93.45 ms per token,    10.70 tokens per second)
llama_print_timings:       total time = 10674.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What’s going on?” Qin Shi Huang looked at the person who had just jumped to halfway in the air with horror.



llama_print_timings:        load time =   601.94 ms
llama_print_timings:      sample time =    17.59 ms /    32 runs   (    0.55 ms per token,  1819.73 tokens per second)
llama_print_timings: prompt eval time =   601.91 ms /    54 tokens (   11.15 ms per token,    89.71 tokens per second)
llama_print_timings:        eval time =  2782.90 ms /    31 runs   (   89.77 ms per token,    11.14 tokens per second)
llama_print_timings:       total time =  3463.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Holy One, three suns converge into a single line in the sky toward our planet. Their gravitational forces overlap here…” The astronomical minister explained and found herself hovering halfway in mid-air, others also leaned at different angles while their feet had left the ground and were struggling to stay steady by flapping their limbs like drowning people. Meanwhile, the ground they just left cracked open, expanding with spider web cracks and a loud crash as if the entire planet was falling apart. Through the gap between the floating stones in the ruined palace, Wang Meng saw the transformed great hall and the boiling cauldron he had been chained to sitting at its center.



llama_print_timings:        load time =   864.27 ms
llama_print_timings:      sample time =    70.57 ms /   150 runs   (    0.47 ms per token,  2125.43 tokens per second)
llama_print_timings: prompt eval time =   864.22 ms /   211 tokens (    4.10 ms per token,   244.15 tokens per second)
llama_print_timings:        eval time = 12947.29 ms /   149 runs   (   86.89 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 14140.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the sun rose in the sky, everything—people, boulders, astronomical instruments, bronze c



llama_print_timings:        load time =   930.04 ms
llama_print_timings:      sample time =    10.21 ms /    23 runs   (    0.44 ms per token,  2253.58 tokens per second)
llama_print_timings: prompt eval time =   930.00 ms /   239 tokens (    3.89 ms per token,   256.99 tokens per second)
llama_print_timings:        eval time =  1917.95 ms /    22 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2895.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

All things on the surface of the $三体$ world are attracted to the sun.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Fen Noyaman looked around and saw Qin Shi Huangdi. Fen Noyaman was floating and shouting something to him, but the sound didn't come out and only an image of a small paragraph appeared on the screen: "I got it. I suppose we can use electric components to build gate circuits and make computers with them! That way computer speed would be many times faster and size will be much smaller!" Fen Noyaman said.



llama_print_timings:        load time =   741.86 ms
llama_print_timings:      sample time =    43.36 ms /    97 runs   (    0.45 ms per token,  2237.34 tokens per second)
llama_print_timings: prompt eval time =   741.82 ms /   133 tokens (    5.58 ms per token,   179.29 tokens per second)
llama_print_timings:        eval time =  8345.69 ms /    96 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  9292.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Feng Nai Man kicked a floating rock to escape from the sword slashed by Qin Shi Huang. The sword slashed on the rock, sparking up some fragments that landed all over Feng's body and blood vessels. But Wang Mo did not hear the rock collision sound, because there was no sound around him anymore due to air loss. Floating bodies were frozen in a fluid state with transparent body fluids before turning into ice crystals surrounded by shapeless shapes. Because of the disappearing atmosphere, it became darker in space and everything reflected solar light formed an elegant spiral cloud that flowed towards its final destination - the sun.



llama_print_timings:        load time =   883.41 ms
llama_print_timings:      sample time =    93.22 ms /   141 runs   (    0.66 ms per token,  1512.62 tokens per second)
llama_print_timings: prompt eval time =   883.38 ms /   222 tokens (    3.98 ms per token,   251.31 tokens per second)
llama_print_timings:        eval time = 13165.60 ms /   140 runs   (   94.04 ms per token,    10.63 tokens per second)
llama_print_timings:       total time = 14480.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is the beginning of a beautiful dream.



llama_print_timings:        load time =   669.84 ms
llama_print_timings:      sample time =     4.79 ms /    10 runs   (    0.48 ms per token,  2089.86 tokens per second)
llama_print_timings: prompt eval time =   669.79 ms /    96 tokens (    6.98 ms per token,   143.33 tokens per second)
llama_print_timings:        eval time =   769.60 ms /     9 runs   (   85.51 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1462.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The 184th civilization was destroyed in the gravitational superposition of the "three-day continuous string", and evolved to the scientific revolution and industrial revolution.



llama_print_timings:        load time =   614.33 ms
llama_print_timings:      sample time =    17.44 ms /    37 runs   (    0.47 ms per token,  2121.19 tokens per second)
llama_print_timings: prompt eval time =   614.29 ms /    59 tokens (   10.41 ms per token,    96.05 tokens per second)
llama_print_timings:        eval time =  3123.04 ms /    36 runs   (   86.75 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  3821.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In this time, Newton established the classical mechanics system at low speed. At the same time, thanks to the invention of calculus and von Neumann's structure computer, the basis for quantitative mathematical analysis on trilogy motion was laid down.



llama_print_timings:        load time =   626.86 ms
llama_print_timings:      sample time =    24.25 ms /    53 runs   (    0.46 ms per token,  2186.02 tokens per second)
llama_print_timings: prompt eval time =   626.82 ms /    72 tokens (    8.71 ms per token,   114.87 tokens per second)
llama_print_timings:        eval time =  4452.97 ms /    52 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  5190.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After a long time, life and civilization will restart and resume their evolution within the Three Body Universe.



llama_print_timings:        load time =   584.51 ms
llama_print_timings:      sample time =    10.61 ms /    24 runs   (    0.44 ms per token,  2263.08 tokens per second)
llama_print_timings: prompt eval time =   584.47 ms /    52 tokens (   11.24 ms per token,    88.97 tokens per second)
llama_print_timings:        eval time =  1983.42 ms /    23 runs   (   86.24 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2615.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Welcome back.



llama_print_timings:        load time =   568.28 ms
llama_print_timings:      sample time =     2.19 ms /     5 runs   (    0.44 ms per token,  2278.94 tokens per second)
llama_print_timings: prompt eval time =   568.24 ms /    33 tokens (   17.22 ms per token,    58.07 tokens per second)
llama_print_timings:        eval time =   349.12 ms /     4 runs   (   87.28 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =   927.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao just left the game and received a strange phone call from a voice with magnetism. "Hello, first of all, thank you for leaving a real number, I'm the administrator of The Three-Body Problem game."



llama_print_timings:        load time =   617.82 ms
llama_print_timings:      sample time =    22.98 ms /    50 runs   (    0.46 ms per token,  2175.62 tokens per second)
llama_print_timings: prompt eval time =   617.78 ms /    71 tokens (    8.70 ms per token,   114.93 tokens per second)
llama_print_timings:        eval time =  4136.29 ms /    49 runs   (   84.41 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  4859.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A wave of excitement and nervousness washed over Wang Miao.



llama_print_timings:        load time =   577.63 ms
llama_print_timings:      sample time =     7.43 ms /    16 runs   (    0.46 ms per token,  2152.27 tokens per second)
llama_print_timings: prompt eval time =   577.59 ms /    38 tokens (   15.20 ms per token,    65.79 tokens per second)
llama_print_timings:        eval time =  1273.93 ms /    15 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1886.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The administrator says, "Please fill in your age, educational background, work department and position when you register."



llama_print_timings:        load time =   592.02 ms
llama_print_timings:      sample time =    10.27 ms /    23 runs   (    0.45 ms per token,  2240.41 tokens per second)
llama_print_timings: prompt eval time =   592.00 ms /    53 tokens (   11.17 ms per token,    89.53 tokens per second)
llama_print_timings:        eval time =  1870.95 ms /    22 runs   (   85.04 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2508.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“These are related to games?”



llama_print_timings:        load time =   579.30 ms
llama_print_timings:      sample time =     4.05 ms /     9 runs   (    0.45 ms per token,  2219.48 tokens per second)
llama_print_timings: prompt eval time =   579.26 ms /    37 tokens (   15.66 ms per token,    63.87 tokens per second)
llama_print_timings:        eval time =   646.79 ms /     8 runs   (   80.85 ms per token,    12.37 tokens per second)
llama_print_timings:       total time =  1245.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You must provide this information if you reach this level. If you refuse, ‘The Three-Body Problem’ will permanently shut you down.”



llama_print_timings:        load time =   592.81 ms
llama_print_timings:      sample time =    14.41 ms /    31 runs   (    0.46 ms per token,  2150.84 tokens per second)
llama_print_timings: prompt eval time =   592.77 ms /    54 tokens (   10.98 ms per token,    91.10 tokens per second)
llama_print_timings:        eval time =  2513.38 ms /    30 runs   (   83.78 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  3175.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Moli answered the manager's question honestly.



llama_print_timings:        load time =   586.86 ms
llama_print_timings:      sample time =     6.32 ms /    14 runs   (    0.45 ms per token,  2216.59 tokens per second)
llama_print_timings: prompt eval time =   586.82 ms /    40 tokens (   14.67 ms per token,    68.16 tokens per second)
llama_print_timings:        eval time =  1108.87 ms /    13 runs   (   85.30 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  1724.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That's great, Professor Wang. You meet the requirements to continue into The Three-Body Problem."



llama_print_timings:        load time =   584.99 ms
llama_print_timings:      sample time =    10.43 ms /    23 runs   (    0.45 ms per token,  2205.81 tokens per second)
llama_print_timings: prompt eval time =   584.95 ms /    46 tokens (   12.72 ms per token,    78.64 tokens per second)
llama_print_timings:        eval time =  1855.16 ms /    22 runs   (   84.33 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  2488.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thank you. Can I ask you some questions?



llama_print_timings:        load time =   592.60 ms
llama_print_timings:      sample time =     4.84 ms /    11 runs   (    0.44 ms per token,  2272.73 tokens per second)
llama_print_timings: prompt eval time =   592.56 ms /    47 tokens (   12.61 ms per token,    79.32 tokens per second)
llama_print_timings:        eval time =   887.15 ms /    10 runs   (   88.72 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  1501.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"No, but there's a Three-Body online event tomorrow night. You're welcome to join."



llama_print_timings:        load time =   588.69 ms
llama_print_timings:      sample time =    11.52 ms /    25 runs   (    0.46 ms per token,  2169.95 tokens per second)
llama_print_timings: prompt eval time =   588.65 ms /    56 tokens (   10.51 ms per token,    95.13 tokens per second)
llama_print_timings:        eval time =  2027.27 ms /    24 runs   (   84.47 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  2669.31 ms


translated 126.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The gathering place of the Netizens of Three Body is a quiet little coffee shop in Wang Miao's impression. According to Wang Miao, game netizens at this era gathered like large-scale, noisy parties, but this time there are only seven people coming with him, and the six others look like nothing more than game enthusiasts either. There are two younger ones among them, while the rest, including a lady in her twenties, are all middle-aged people, and an elderly man who looks to be sixty years old or so.



llama_print_timings:        load time =   705.31 ms
llama_print_timings:      sample time =    74.79 ms /   121 runs   (    0.62 ms per token,  1617.91 tokens per second)
llama_print_timings: prompt eval time =   705.28 ms /   121 tokens (    5.83 ms per token,   171.56 tokens per second)
llama_print_timings:        eval time = 11048.36 ms /   120 runs   (   92.07 ms per token,    10.86 tokens per second)
llama_print_timings:       total time = 12094.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   743.63 ms
llama_print_timings:      sample time =     0.93 ms /     2 runs   (    0.46 ms per token,  2159.83 tokens per second)
llama_print_timings: prompt eval time =   743.59 ms /   145 tokens (    5.13 ms per token,   195.00 tokens per second)
llama_print_timings:        eval time =    92.58 ms /     1 runs   (   92.58 ms per token,    10.80 tokens per second)
llama_print_timings:       total time =   839.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The six people in the room are all strangers to Wang Moxue, except for two of them. The old scholar with white hair and an ethereal smile is a renowned scholar who has given modern scientific meaning to eastern philosophy. The lady with an eccentric style is a well-known author with many readers. The other four people are two middle-aged men (who dress simple but give no clue as to their professional backgrounds), a vice president of China's largest software company, and a senior official in the state power corporation. Two young people are in attendance: a journalist from a major media outlet and a graduate student studying science. Wang Moxue now realizes that many players of the game “The Three-Body Problem” may be these kinds of social elite.



llama_print_timings:        load time =   796.59 ms
llama_print_timings:      sample time =    74.91 ms /   168 runs   (    0.45 ms per token,  2242.75 tokens per second)
llama_print_timings: prompt eval time =   796.54 ms /   176 tokens (    4.53 ms per token,   220.96 tokens per second)
llama_print_timings:        eval time = 14565.02 ms /   167 runs   (   87.22 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 15717.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The organizer of the party came quickly, and Wang Moxi saw him. Her heart quickened suddenly as he was Pan Han, the first suspect in killing Shen Yufei. She silently took out her phone under the table and sent a text message to Dashi from underneath.



llama_print_timings:        load time =   630.71 ms
llama_print_timings:      sample time =    28.19 ms /    63 runs   (    0.45 ms per token,  2235.15 tokens per second)
llama_print_timings: prompt eval time =   630.68 ms /    77 tokens (    8.19 ms per token,   122.09 tokens per second)
llama_print_timings:        eval time =  5278.96 ms /    62 runs   (   85.14 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  6035.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Haha, everyone is here so early!" Pan Han relaxed and greeted with a smile. It seemed that nothing had happened. He changed his habit of being disheveled like a vagabond in the media to be well-groomed and formal in suits and tie. "You are all the elites I imagined, 《The Three-Body Problem》 is designed for people like you who possess such levels of knowledge and attainment. It requires a high level of expertise and profound insights that common people would not be able to comprehend."



llama_print_timings:        load time =   726.27 ms
llama_print_timings:      sample time =    55.24 ms /   123 runs   (    0.45 ms per token,  2226.81 tokens per second)
llama_print_timings: prompt eval time =   726.23 ms /   133 tokens (    5.46 ms per token,   183.14 tokens per second)
llama_print_timings:        eval time = 10657.75 ms /   122 runs   (   87.36 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 11637.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Miao's text message has been sent: 'I have seen Pan Han.' In the Yunhe Coffee Shop in Xicheng District.



llama_print_timings:        load time =   589.05 ms
llama_print_timings:      sample time =    15.94 ms /    36 runs   (    0.44 ms per token,  2258.04 tokens per second)
llama_print_timings: prompt eval time =   589.00 ms /    49 tokens (   12.02 ms per token,    83.19 tokens per second)
llama_print_timings:        eval time =  2972.75 ms /    35 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  3632.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Han continues, “All of you are excellent players in The Three-Body Problem and have made a great contribution to the game. I believe that it has become a part of your lives.”



llama_print_timings:        load time =   624.04 ms
llama_print_timings:      sample time =    18.25 ms /    40 runs   (    0.46 ms per token,  2191.18 tokens per second)
llama_print_timings: prompt eval time =   624.01 ms /    66 tokens (    9.45 ms per token,   105.77 tokens per second)
llama_print_timings:        eval time =  3374.91 ms /    39 runs   (   86.54 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  4082.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said, “It is a part of life.”



llama_print_timings:        load time =   576.21 ms
llama_print_timings:      sample time =     5.29 ms /    12 runs   (    0.44 ms per token,  2268.86 tokens per second)
llama_print_timings: prompt eval time =   576.17 ms /    42 tokens (   13.72 ms per token,    72.90 tokens per second)
llama_print_timings:        eval time =   953.66 ms /    11 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  1552.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I saw it by chance from Grandson’s computer,” the old philosopher said with his pipe, “the young man played around for a few moments before giving up saying it was too deep. But I was fascinated; the profound meanings hidden within simplicity, intricate world-setting, and abundant information and precise details in simple appearance, all enraptured us.” Including Wang Meng among others nodded affirmatively. At this time, Wang Meng received a text message from Dashui: “We’ve seen him too; don’t worry, just follow his lead.” Pay attention, though, when you are in their presence, you should try to act as bizarrely as possible but not so much that it does not look like it.



llama_print_timings:        load time =   786.61 ms
llama_print_timings:      sample time =    85.37 ms /   166 runs   (    0.51 ms per token,  1944.50 tokens per second)
llama_print_timings: prompt eval time =   786.56 ms /   163 tokens (    4.83 ms per token,   207.23 tokens per second)
llama_print_timings:        eval time = 14431.29 ms /   165 runs   (   87.46 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 15612.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, ” the female writer nodded in agreement. “From a literary perspective, ‘The Three-Body Problem’ is also excellent. The rise and fall of the two hundred and three rounds of civilization really makes for some wonderful epic poems.” She mentioned the two hundred and three rounds of civilization while Wang Mo experienced the one hundred and ninety-one rounds, which made Wang Mo reassured again: “Each player has an independent process in ‘The Three-Body Problem’.”



llama_print_timings:        load time =   685.29 ms
llama_print_timings:      sample time =    48.32 ms /   107 runs   (    0.45 ms per token,  2214.22 tokens per second)
llama_print_timings: prompt eval time =   685.25 ms /   114 tokens (    6.01 ms per token,   166.36 tokens per second)
llama_print_timings:        eval time =  9234.86 ms /   106 runs   (   87.12 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 10140.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm so tired of the real world that "The Three-Body Problem" has become my second reality.



llama_print_timings:        load time =   588.12 ms
llama_print_timings:      sample time =    10.55 ms /    24 runs   (    0.44 ms per token,  2274.88 tokens per second)
llama_print_timings: prompt eval time =   588.08 ms /    51 tokens (   11.53 ms per token,    86.72 tokens per second)
llama_print_timings:        eval time =  1982.84 ms /    23 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2617.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Peng Han was very interested in asking a question.



llama_print_timings:        load time =   573.59 ms
llama_print_timings:      sample time =     5.65 ms /    12 runs   (    0.47 ms per token,  2122.39 tokens per second)
llama_print_timings: prompt eval time =   573.56 ms /    43 tokens (   13.34 ms per token,    74.97 tokens per second)
llama_print_timings:        eval time =   921.71 ms /    11 runs   (   83.79 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1520.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I think so too. The reality is so mediocre and vulgar compared to the imaginative world of ‘The Three-Body Problem’.”



llama_print_timings:        load time =   586.52 ms
llama_print_timings:      sample time =    15.33 ms /    33 runs   (    0.46 ms per token,  2153.20 tokens per second)
llama_print_timings: prompt eval time =   586.48 ms /    51 tokens (   11.50 ms per token,    86.96 tokens per second)
llama_print_timings:        eval time =  2714.97 ms /    32 runs   (   84.84 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  3372.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Sorry, it's just a game." said the leaders of Guodian Corporation.



llama_print_timings:        load time =   580.72 ms
llama_print_timings:      sample time =     9.32 ms /    21 runs   (    0.44 ms per token,  2252.74 tokens per second)
llama_print_timings: prompt eval time =   580.70 ms /    43 tokens (   13.50 ms per token,    74.05 tokens per second)
llama_print_timings:        eval time =  1735.36 ms /    20 runs   (   86.77 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  2357.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That's great," Pan Han nodded, and Wang Mu noticed the excitement in his eyes.



llama_print_timings:        load time =   597.22 ms
llama_print_timings:      sample time =    10.03 ms /    22 runs   (    0.46 ms per token,  2193.64 tokens per second)
llama_print_timings: prompt eval time =   597.18 ms /    52 tokens (   11.48 ms per token,    87.08 tokens per second)
llama_print_timings:        eval time =  1777.89 ms /    21 runs   (   84.66 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2420.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“There is a question that I think we all want to know.” Wang Miao said.



llama_print_timings:        load time =   587.05 ms
llama_print_timings:      sample time =     9.62 ms /    20 runs   (    0.48 ms per token,  2079.00 tokens per second)
llama_print_timings: prompt eval time =   587.00 ms /    47 tokens (   12.49 ms per token,    80.07 tokens per second)
llama_print_timings:        eval time =  1630.47 ms /    19 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2261.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I know what it is, but you asked.”



llama_print_timings:        load time =   576.38 ms
llama_print_timings:      sample time =     5.56 ms /    12 runs   (    0.46 ms per token,  2157.89 tokens per second)
llama_print_timings: prompt eval time =   576.36 ms /    43 tokens (   13.40 ms per token,    74.61 tokens per second)
llama_print_timings:        eval time =   959.61 ms /    11 runs   (   87.24 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1561.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But then, a few people shook their heads. "I don't think so," one said, "I think it is more than just a game."



llama_print_timings:        load time =   740.77 ms
llama_print_timings:      sample time =    17.28 ms /    33 runs   (    0.52 ms per token,  1909.83 tokens per second)
llama_print_timings: prompt eval time =   740.73 ms /    52 tokens (   14.24 ms per token,    70.20 tokens per second)
llama_print_timings:        eval time =  2797.55 ms /    32 runs   (   87.42 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  3616.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Han stood up and said gravely, “The Three Body World is a real place.”



llama_print_timings:        load time =   585.73 ms
llama_print_timings:      sample time =     8.87 ms /    20 runs   (    0.44 ms per token,  2255.30 tokens per second)
llama_print_timings: prompt eval time =   585.69 ms /    45 tokens (   13.02 ms per token,    76.83 tokens per second)
llama_print_timings:        eval time =  1679.07 ms /    19 runs   (   88.37 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  2305.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Where are you?



llama_print_timings:        load time =   587.66 ms
llama_print_timings:      sample time =     2.23 ms /     5 runs   (    0.45 ms per token,  2245.17 tokens per second)
llama_print_timings: prompt eval time =   587.62 ms /    42 tokens (   13.99 ms per token,    71.47 tokens per second)
llama_print_timings:        eval time =   354.12 ms /     4 runs   (   88.53 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =   951.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Xiaosu sat down and remained silent for a long time before speaking. "I can answer some questions, but not all of them. If you have a chance to encounter the Three Body world in the future, there will always be an answer to all your questions."



llama_print_timings:        load time =   618.91 ms
llama_print_timings:      sample time =    28.88 ms /    57 runs   (    0.51 ms per token,  1973.55 tokens per second)
llama_print_timings: prompt eval time =   618.88 ms /    67 tokens (    9.24 ms per token,   108.26 tokens per second)
llama_print_timings:        eval time =  4835.76 ms /    56 runs   (   86.35 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  5585.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“So, did the game represent any realistic components of the Three-Body universe?”



llama_print_timings:        load time =   776.45 ms
llama_print_timings:      sample time =    10.14 ms /    19 runs   (    0.53 ms per token,  1873.95 tokens per second)
llama_print_timings: prompt eval time =   776.35 ms /    48 tokens (   16.17 ms per token,    61.83 tokens per second)
llama_print_timings:        eval time =  1611.78 ms /    18 runs   (   89.54 ms per token,    11.17 tokens per second)
llama_print_timings:       total time =  2434.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

First, in many rounds of civilization, the dehydration function of the Three Body people is true. In order to adapt to the unpredictable natural environment, they can immediately remove all the water inside their body and turn themselves into dry fiber objects to escape from harsh weather conditions that are not suitable for survival.



llama_print_timings:        load time =   649.25 ms
llama_print_timings:      sample time =    43.31 ms /    71 runs   (    0.61 ms per token,  1639.50 tokens per second)
llama_print_timings: prompt eval time =   649.21 ms /    85 tokens (    7.64 ms per token,   130.93 tokens per second)
llama_print_timings:        eval time =  6193.81 ms /    70 runs   (   88.48 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  7042.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What does the alien look like?”



llama_print_timings:        load time =   573.83 ms
llama_print_timings:      sample time =     4.56 ms /    10 runs   (    0.46 ms per token,  2194.43 tokens per second)
llama_print_timings: prompt eval time =   573.81 ms /    36 tokens (   15.94 ms per token,    62.74 tokens per second)
llama_print_timings:        eval time =   777.16 ms /     9 runs   (   86.35 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  1371.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Hanxiao shook his head. “I don’t know, I really don't know. In every round of civilization, the appearance of the Three Body problem is very different. In addition, in the game there is also a reflection of an existing real-life version of the Three Kingdoms: human-level computer.”



llama_print_timings:        load time =   633.43 ms
llama_print_timings:      sample time =    30.95 ms /    70 runs   (    0.44 ms per token,  2262.00 tokens per second)
llama_print_timings: prompt eval time =   633.39 ms /    69 tokens (    9.18 ms per token,   108.94 tokens per second)
llama_print_timings:        eval time =  5777.85 ms /    69 runs   (   83.74 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  6551.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Heh, I think that's the least realistic thing! "The IT vice president said. "I conducted a simple test with hundreds of employees from our company and even if that idea was to be achieved, it would probably be slower than a human manually calculating numbers."



llama_print_timings:        load time =   624.02 ms
llama_print_timings:      sample time =    25.49 ms /    57 runs   (    0.45 ms per token,  2236.26 tokens per second)
llama_print_timings: prompt eval time =   623.98 ms /    80 tokens (    7.80 ms per token,   128.21 tokens per second)
llama_print_timings:        eval time =  4782.00 ms /    56 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  5522.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, but if the 30 million soldiers that make up a computer move their black and white flags 100,000 times per second, and they are running at a speed much faster than sound velocity, then the results will be different,” said Pan Huan with a mysterious smile. “You asked about the appearance of the Trisquelans. Based on some clues, it is possible that the outer casing of the computer made up by Trisquelans would cover a reflective surface, which evolved to survive in harsh sunlight conditions. The reflective surface can change into various shapes and they communicate using light beams focused by this reflective surface, which is very fast in information transmission. This is the foundation for the efficiency of the computer that the Trisquelans created. Of course, it is still a low-efficiency machine, but it does perform tasks that humans cannot complete.”



llama_print_timings:        load time =   871.58 ms
llama_print_timings:      sample time =   100.78 ms /   195 runs   (    0.52 ms per token,  1934.97 tokens per second)
llama_print_timings: prompt eval time =   871.53 ms /   211 tokens (    4.13 ms per token,   242.10 tokens per second)
llama_print_timings:        eval time = 17346.89 ms /   194 runs   (   89.42 ms per token,    11.18 tokens per second)
llama_print_timings:       total time = 18689.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Han stood up and paced around the backs of his followers. "All I can say at this point is that, as a game, The Three-Body Problem only uses humanity's background to simulate the evolution of Trisolaris. It's just a way for gamers to familiarize themselves with a given setting, but there are significant differences between the actual trisolar system and the one presented in the game. Nevertheless, it is a fact that three suns exist, which is based on the natural structure of this world."



llama_print_timings:        load time =   671.84 ms
llama_print_timings:      sample time =    75.88 ms /   115 runs   (    0.66 ms per token,  1515.51 tokens per second)
llama_print_timings: prompt eval time =   671.79 ms /   101 tokens (    6.65 ms per token,   150.34 tokens per second)
llama_print_timings:        eval time = 10447.13 ms /   114 runs   (   91.64 ms per token,    10.91 tokens per second)
llama_print_timings:       total time = 11465.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The IT vice president said, "Developing this game definitely took a lot of effort, but its clear purpose is not to make money."



llama_print_timings:        load time =   596.72 ms
llama_print_timings:      sample time =    13.09 ms /    29 runs   (    0.45 ms per token,  2216.28 tokens per second)
llama_print_timings: prompt eval time =   596.70 ms /    50 tokens (   11.93 ms per token,    83.79 tokens per second)
llama_print_timings:        eval time =  2340.48 ms /    28 runs   (   83.59 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  2994.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The purpose of the game is simple, just to gather up people like us who are in agreement.



llama_print_timings:        load time =   588.86 ms
llama_print_timings:      sample time =     9.29 ms /    21 runs   (    0.44 ms per token,  2261.23 tokens per second)
llama_print_timings: prompt eval time =   588.82 ms /    54 tokens (   10.90 ms per token,    91.71 tokens per second)
llama_print_timings:        eval time =  1698.66 ms /    20 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2328.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What is your aspiration and what is your way?" asked Wang Moxi, but immediately regretted it, thinking whether her question revealed any animosity towards the others. However, Pan Han didn't reply immediately. He looked at everyone in the room before speaking softly: "If the Three Body Civilization wishes to enter the human world, what would be your attitude?"



llama_print_timings:        load time =   674.84 ms
llama_print_timings:      sample time =    35.96 ms /    81 runs   (    0.44 ms per token,  2252.69 tokens per second)
llama_print_timings: prompt eval time =   674.80 ms /   103 tokens (    6.55 ms per token,   152.64 tokens per second)
llama_print_timings:        eval time =  6716.08 ms /    80 runs   (   83.95 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  7555.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I am very happy,” the young reporter broke the silence first, “What I have seen over these years has disappointed me so much. Human society is no longer capable of self-improvement and requires external intervention. ”



llama_print_timings:        load time =   616.83 ms
llama_print_timings:      sample time =    22.25 ms /    50 runs   (    0.44 ms per token,  2247.60 tokens per second)
llama_print_timings: prompt eval time =   616.79 ms /    70 tokens (    8.81 ms per token,   113.49 tokens per second)
llama_print_timings:        eval time =  4262.80 ms /    49 runs   (   87.00 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  4978.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The female writer exclaimed in a loud voice, excited that she had finally found an opportunity to express something, “Human beings? What nasty things they are! For years, I’ve used my literary scalpel to expose their ugliness. Now even that has become tedious for me. I long for the Three Body Civilization to bring true beauty to this world.” Peng Han didn't respond, but his eyes lit up with a new kind of excitement.



llama_print_timings:        load time =   693.66 ms
llama_print_timings:      sample time =    44.84 ms /   100 runs   (    0.45 ms per token,  2230.15 tokens per second)
llama_print_timings: prompt eval time =   693.62 ms /   115 tokens (    6.03 ms per token,   165.80 tokens per second)
llama_print_timings:        eval time =  8731.17 ms /    99 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  9629.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The old philosopher waved his extinguished pipe and said gravely, "Let's explore this matter further: What are your impressions of the Aztec civilization?"



llama_print_timings:        load time =   613.66 ms
llama_print_timings:      sample time =    18.65 ms /    41 runs   (    0.45 ms per token,  2198.63 tokens per second)
llama_print_timings: prompt eval time =   613.63 ms /    66 tokens (    9.30 ms per token,   107.56 tokens per second)
llama_print_timings:        eval time =  3489.08 ms /    40 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  4188.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The female writer said, “Dark and bloody, a pyramid of gore radiated by the ominous firelight from among the trees. That was my impression.”



llama_print_timings:        load time =   613.62 ms
llama_print_timings:      sample time =    17.23 ms /    39 runs   (    0.44 ms per token,  2262.84 tokens per second)
llama_print_timings: prompt eval time =   613.60 ms /    64 tokens (    9.59 ms per token,   104.30 tokens per second)
llama_print_timings:        eval time =  3228.19 ms /    38 runs   (   84.95 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  3921.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The philosopher nodded and said, “Now imagine if there was no Spanish intervention; what impact would this civilization have had on human history?”



llama_print_timings:        load time =   599.39 ms
llama_print_timings:      sample time =    14.73 ms /    33 runs   (    0.45 ms per token,  2240.02 tokens per second)
llama_print_timings: prompt eval time =   599.35 ms /    61 tokens (    9.83 ms per token,   101.78 tokens per second)
llama_print_timings:        eval time =  2825.87 ms /    32 runs   (   88.31 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  3492.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are turning black and white upside down,” the IT manager pointed at the philosopher. “The Spanish invaders of America were nothing but robbers and murderers!”



llama_print_timings:        load time =   599.34 ms
llama_print_timings:      sample time =    17.41 ms /    39 runs   (    0.45 ms per token,  2240.35 tokens per second)
llama_print_timings: prompt eval time =   599.30 ms /    60 tokens (    9.99 ms per token,   100.12 tokens per second)
llama_print_timings:        eval time =  3406.35 ms /    38 runs   (   89.64 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =  4084.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Even if so, they stopped the following from happening: unlimited expansion of Aztecs and turning America into a bloody and dark vast empire. At that time, when America and mankind are more democratic and civilized, it would be later than now or never occur at all.”



llama_print_timings:        load time =   696.73 ms
llama_print_timings:      sample time =    28.42 ms /    64 runs   (    0.44 ms per token,  2252.09 tokens per second)
llama_print_timings: prompt eval time =   696.69 ms /   111 tokens (    6.28 ms per token,   159.33 tokens per second)
llama_print_timings:        eval time =  5556.10 ms /    63 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  6385.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He asked, “Have you thought about that? The Aztec civilization was destroyed by Western invaders.”



llama_print_timings:        load time =   687.42 ms
llama_print_timings:      sample time =    10.62 ms /    24 runs   (    0.44 ms per token,  2260.95 tokens per second)
llama_print_timings: prompt eval time =   687.37 ms /    76 tokens (    9.04 ms per token,   110.57 tokens per second)
llama_print_timings:        eval time =  2008.89 ms /    23 runs   (   87.34 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2745.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Doctor Xi said, "I have the same opinion as you." He pointed to Professor Fang and nodded. 



llama_print_timings:        load time =   666.37 ms
llama_print_timings:      sample time =    12.47 ms /    27 runs   (    0.46 ms per token,  2165.54 tokens per second)
llama_print_timings: prompt eval time =   666.33 ms /    92 tokens (    7.24 ms per token,   138.07 tokens per second)
llama_print_timings:        eval time =  2249.15 ms /    26 runs   (   86.51 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  2972.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I stand with them.



llama_print_timings:        load time =   601.87 ms
llama_print_timings:      sample time =     2.66 ms /     6 runs   (    0.44 ms per token,  2259.04 tokens per second)
llama_print_timings: prompt eval time =   601.83 ms /    62 tokens (    9.71 ms per token,   103.02 tokens per second)
llama_print_timings:        eval time =   409.88 ms /     5 runs   (   81.98 ms per token,    12.20 tokens per second)
llama_print_timings:       total time =  1023.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That’s fine, ” Pan Han said to the IT manager and the national power company executives. “You two are no longer suitable for this party and not suitable for playing ‘The Three Body Problem’. Your ID will be cancelled and you will leave now. Thank you for your attendance and please!” The two stood up and looked at each other confusedly, then turned to leave the room.



llama_print_timings:        load time =   682.15 ms
llama_print_timings:      sample time =    54.32 ms /    86 runs   (    0.63 ms per token,  1583.33 tokens per second)
llama_print_timings: prompt eval time =   682.11 ms /   108 tokens (    6.32 ms per token,   158.33 tokens per second)
llama_print_timings:        eval time =  7761.12 ms /    85 runs   (   91.31 ms per token,    10.95 tokens per second)
llama_print_timings:       total time =  8692.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Huan extended his hand to the remaining five people and shook hands with each of them solemnly. Finally, he said solemnly, "We are comrades."
translated 43.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

When Wang Moxue entered the world for the fifth time, she found that the world had completely changed. The large pyramid that was always present in previous visions of dawn no longer existed, and instead, a tall modern building with a black facade stood where it once was. This building was familiar to Wang Moxue, as it resembled the United Nations headquarters. In addition, numerous large buildings with clear, polished surfaces grown like giant crystals in the morning light were scattered across the land.



llama_print_timings:        load time =  1234.96 ms
llama_print_timings:      sample time =    48.98 ms /   108 runs   (    0.45 ms per token,  2204.98 tokens per second)
llama_print_timings: prompt eval time =  1234.93 ms /   139 tokens (    8.88 ms per token,   112.56 tokens per second)
llama_print_timings:        eval time =  9161.98 ms /   107 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time = 10617.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He heard a violin playing, it was Mozart's music. Though not very proficient, there was a special flavor that seemed to be telling him something—it felt like he was listening to the music for himself and enjoyed it. The old man played with his silver hair swaying in the wind. He put a straw hat on his head inside which some coins had already been added by someone.



llama_print_timings:        load time =   693.51 ms
llama_print_timings:      sample time =    39.90 ms /    88 runs   (    0.45 ms per token,  2205.68 tokens per second)
llama_print_timings: prompt eval time =   693.47 ms /   123 tokens (    5.64 ms per token,   177.37 tokens per second)
llama_print_timings:        eval time =  7498.86 ms /    87 runs   (   86.19 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  8367.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao suddenly realized that the sun had risen, but it was still pitch black in the sky where the horizon rose. There were no dawnings before the rising of the sun. The huge sun. The half-risen sun filled with a third of the horizon. Wang Miao's heart beats faster, only this can mean another great disaster. But when he turns back to see that the old man sits still and plays the lute without any care, his silver hair burned like it is lit by the sunlight.



llama_print_timings:        load time =   749.66 ms
llama_print_timings:      sample time =    54.55 ms /   114 runs   (    0.48 ms per token,  2089.75 tokens per second)
llama_print_timings: prompt eval time =   749.62 ms /   141 tokens (    5.32 ms per token,   188.09 tokens per second)
llama_print_timings:        eval time =  9738.78 ms /   113 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time = 10730.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The sun is silver like the hair of an old man. It emits a silver light over the earth, but Wang Meng feels no warmth from it.$He looks at the full sun that has risen above the horizon and sees on the plates that reflect its light a pattern made of wood grain. He understands that it is not the real sun that is shining but another huge moon which is running fast through the sky, gradually becoming smaller and then larger again. The elderly man's music is playing in the cold morning wind, as if the universe itself were transformed into sound. When the huge crescent moon drops to the ground with increased brightness, it becomes a pair of sharp corners that remain above the horizon. Wang Meng suddenly imagines them as two horns on an ancient and majestic bull running towards the sun.



llama_print_timings:        load time =  1001.11 ms
llama_print_timings:      sample time =    92.95 ms /   178 runs   (    0.52 ms per token,  1914.99 tokens per second)
llama_print_timings: prompt eval time =  1001.08 ms /   276 tokens (    3.63 ms per token,   275.70 tokens per second)
llama_print_timings:        eval time = 15824.79 ms /   177 runs   (   89.41 ms per token,    11.18 tokens per second)
llama_print_timings:       total time = 17243.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Dear Copernicus, please stop your hasty steps. It would be better if we both could have a meal while enjoying a piece of Mozart.” The elder raised his head to speak as the huge moon had completely set down.



llama_print_timings:        load time =   651.13 ms
llama_print_timings:      sample time =    23.15 ms /    52 runs   (    0.45 ms per token,  2246.22 tokens per second)
llama_print_timings: prompt eval time =   651.09 ms /    72 tokens (    9.04 ms per token,   110.58 tokens per second)
llama_print_timings:        eval time =  4430.89 ms /    51 runs   (   86.88 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  5184.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If I'm not mistaken, Wang Miao looked at the wrinkled face and said, Those wrinkles are very long and the curves are very smooth, trying to create harmony.



llama_print_timings:        load time =   621.83 ms
llama_print_timings:      sample time =    19.01 ms /    41 runs   (    0.46 ms per token,  2156.87 tokens per second)
llama_print_timings: prompt eval time =   621.79 ms /    69 tokens (    9.01 ms per token,   110.97 tokens per second)
llama_print_timings:        eval time =  3331.48 ms /    40 runs   (   83.29 ms per token,    12.01 tokens per second)
llama_print_timings:       total time =  4038.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You didn't make a mistake, I am Albert Einstein, an unfortunate man who was abandoned by God.



llama_print_timings:        load time =   585.52 ms
llama_print_timings:      sample time =    11.79 ms /    26 runs   (    0.45 ms per token,  2205.63 tokens per second)
llama_print_timings: prompt eval time =   585.50 ms /    53 tokens (   11.05 ms per token,    90.52 tokens per second)
llama_print_timings:        eval time =  2172.17 ms /    25 runs   (   86.89 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  2809.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I've never seen that big moon before. What happened just now?



llama_print_timings:        load time =   590.02 ms
llama_print_timings:      sample time =     7.10 ms /    16 runs   (    0.44 ms per token,  2254.47 tokens per second)
llama_print_timings: prompt eval time =   589.98 ms /    47 tokens (   12.55 ms per token,    79.66 tokens per second)
llama_print_timings:        eval time =  1302.02 ms /    15 runs   (   86.80 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  1922.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It has cooled down.



llama_print_timings:        load time =   574.88 ms
llama_print_timings:      sample time =     3.89 ms /     8 runs   (    0.49 ms per token,  2054.97 tokens per second)
llama_print_timings: prompt eval time =   574.85 ms /    37 tokens (   15.54 ms per token,    64.37 tokens per second)
llama_print_timings:        eval time =   583.51 ms /     7 runs   (   83.36 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  1176.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who?



llama_print_timings:        load time =   583.58 ms
llama_print_timings:      sample time =     1.36 ms /     3 runs   (    0.45 ms per token,  2209.13 tokens per second)
llama_print_timings: prompt eval time =   583.55 ms /    33 tokens (   17.68 ms per token,    56.55 tokens per second)
llama_print_timings:        eval time =   179.26 ms /     2 runs   (   89.63 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =   768.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The big moon, when I was a child it still glowed warmly. At its mid-height, you could see the red light on the core plain; now it's cold... have you heard about the great tearing?



llama_print_timings:        load time =   613.85 ms
llama_print_timings:      sample time =    22.56 ms /    51 runs   (    0.44 ms per token,  2260.94 tokens per second)
llama_print_timings: prompt eval time =   613.81 ms /    68 tokens (    9.03 ms per token,   110.78 tokens per second)
llama_print_timings:        eval time =  4198.47 ms /    50 runs   (   83.97 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  4911.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No, what's the matter?



llama_print_timings:        load time =   572.77 ms
llama_print_timings:      sample time =     4.01 ms /     9 runs   (    0.45 ms per token,  2244.95 tokens per second)
llama_print_timings: prompt eval time =   572.75 ms /    37 tokens (   15.48 ms per token,    64.60 tokens per second)
llama_print_timings:        eval time =   686.68 ms /     8 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1277.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Einstein sighed and shook his head. "I can't even bear to think about it, my past, the past of civilization, the universe's past, all gone with the wind!"



llama_print_timings:        load time =   612.92 ms
llama_print_timings:      sample time =    19.84 ms /    44 runs   (    0.45 ms per token,  2217.74 tokens per second)
llama_print_timings: prompt eval time =   612.88 ms /    67 tokens (    9.15 ms per token,   109.32 tokens per second)
llama_print_timings:        eval time =  3618.73 ms /    43 runs   (   84.16 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  4318.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How did you get to this point?” Wang Mo nodded, reaching into his pocket and actually putting some coins in. He bent down and put the money inside his hat.



llama_print_timings:        load time =   612.41 ms
llama_print_timings:      sample time =    17.02 ms /    38 runs   (    0.45 ms per token,  2232.27 tokens per second)
llama_print_timings: prompt eval time =   612.34 ms /    62 tokens (    9.88 ms per token,   101.25 tokens per second)
llama_print_timings:        eval time =  3151.00 ms /    37 runs   (   85.16 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  3838.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thanks to Mr. Guth, but I don't have much faith in this. It seems that you and Newton and their group in the East with that model of calculation, is very close to the right one, but there is still so little difference that, for Newton or others, it is a never-to-be-crossed great wall. I always believed that if it were not for me, someone else would discover General Relativity, but Newtons missing link, which describes the gravitational perturbation of planetary orbits and causing the error in calculation, is fatal to results. Adding the correction of the gravitational perturbation to the classic equation yields the correct mathematical model. Its computational workload is much greater than what you did in the East, but it's not an issue for modern computers.



llama_print_timings:        load time =   860.24 ms
llama_print_timings:      sample time =    75.91 ms /   167 runs   (    0.45 ms per token,  2200.03 tokens per second)
llama_print_timings: prompt eval time =   860.20 ms /   188 tokens (    4.58 ms per token,   218.55 tokens per second)
llama_print_timings:        eval time = 14385.01 ms /   166 runs   (   86.66 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 15585.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Have the results of this calculation been confirmed by astronomical observations?”



llama_print_timings:        load time =   580.57 ms
llama_print_timings:      sample time =     6.77 ms /    15 runs   (    0.45 ms per token,  2216.31 tokens per second)
llama_print_timings: prompt eval time =   580.54 ms /    40 tokens (   14.51 ms per token,    68.90 tokens per second)
llama_print_timings:        eval time =  1200.10 ms /    14 runs   (   85.72 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  1810.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I would be here if I were to think in that way, but aesthetically speaking, I am not wrong. The universe is the one that’s wrong since it rejected me first and then everyone else followed suit. Nobody wants me, not even Princeton, they took away my professorship. Neither UNESCO nor any other science organization offers me a scientific advisory position. Before I wouldn't even beg for them to kiss my ass; but now I am thinking of applying as president of Israel, only to find out that they say they changed their minds and consider me just a fraud, oh —”



llama_print_timings:        load time =   713.65 ms
llama_print_timings:      sample time =    59.32 ms /   132 runs   (    0.45 ms per token,  2225.37 tokens per second)
llama_print_timings: prompt eval time =   713.61 ms /   128 tokens (    5.58 ms per token,   179.37 tokens per second)
llama_print_timings:        eval time = 11274.84 ms /   131 runs   (   86.07 ms per token,    11.62 tokens per second)
llama_print_timings:       total time = 12256.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Einstein finished speaking and then pulled out his violin to resume playing exactly where he left off. After listening for a while, Wang Mo walked towards the main door of the building.



llama_print_timings:        load time =   620.91 ms
llama_print_timings:      sample time =    18.23 ms /    40 runs   (    0.46 ms per token,  2194.07 tokens per second)
llama_print_timings: prompt eval time =   620.87 ms /    66 tokens (    9.41 ms per token,   106.30 tokens per second)
llama_print_timings:        eval time =  3348.50 ms /    39 runs   (   85.86 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  4051.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"There is no one inside, everyone who attended this UN conference is attending the solo piano start ceremony in the back of the building," Einstein said while he played on the piano.



llama_print_timings:        load time =   603.07 ms
llama_print_timings:      sample time =    17.58 ms /    39 runs   (    0.45 ms per token,  2218.05 tokens per second)
llama_print_timings: prompt eval time =   603.03 ms /    57 tokens (   10.58 ms per token,    94.52 tokens per second)
llama_print_timings:        eval time =  3170.47 ms /    38 runs   (   83.43 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  3853.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He circled the building and saw an incredible thing immediately: a giant single pendulum perched on top of it. In fact, the upper part of the pendulum could be seen from outside, but Wang Mu did not know what it was at that time. This is the giant pendulum Wang Mu saw for the first time in his journey into the Three Body when he was on the ground during the Warring States Period. It was built by Fuxi to put sun-god asleep.



llama_print_timings:        load time =   844.32 ms
llama_print_timings:      sample time =    49.48 ms /   109 runs   (    0.45 ms per token,  2202.95 tokens per second)
llama_print_timings: prompt eval time =   844.28 ms /   199 tokens (    4.24 ms per token,   235.70 tokens per second)
llama_print_timings:        eval time =  9336.47 ms /   108 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 10408.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They are waiting for something, talking quietly among themselves in groups of three or five. They may be the heads of state who are attending a summit meeting.



llama_print_timings:        load time =   638.59 ms
llama_print_timings:      sample time =    15.11 ms /    34 runs   (    0.44 ms per token,  2249.72 tokens per second)
llama_print_timings: prompt eval time =   638.56 ms /    68 tokens (    9.39 ms per token,   106.49 tokens per second)
llama_print_timings:        eval time =  2845.74 ms /    33 runs   (   86.23 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  3554.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Oh, Copernicus, the man who spanned five centuries!" exclaimed someone, and others cheered in welcome.



llama_print_timings:        load time =   594.70 ms
llama_print_timings:      sample time =    11.90 ms /    26 runs   (    0.46 ms per token,  2184.51 tokens per second)
llama_print_timings: prompt eval time =   594.66 ms /    54 tokens (   11.01 ms per token,    90.81 tokens per second)
llama_print_timings:        eval time =  2148.93 ms /    25 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  2797.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

And you, my friend, have seen the single pendulum in person during the War of the Five Kings! A well-disposed African man holds Wang Mu's hand as he is introduced to be this year's Secretary General of the United Nations.



llama_print_timings:        load time =   624.68 ms
llama_print_timings:      sample time =    24.90 ms /    55 runs   (    0.45 ms per token,  2208.66 tokens per second)
llama_print_timings: prompt eval time =   624.66 ms /    68 tokens (    9.19 ms per token,   108.86 tokens per second)
llama_print_timings:        eval time =  4641.98 ms /    54 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  5378.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, I have seen it. But why is it being built now?



llama_print_timings:        load time =   585.62 ms
llama_print_timings:      sample time =     7.48 ms /    16 runs   (    0.47 ms per token,  2139.90 tokens per second)
llama_print_timings: prompt eval time =   585.59 ms /    50 tokens (   11.71 ms per token,    85.38 tokens per second)
llama_print_timings:        eval time =  1288.93 ms /    15 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1907.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It is a monument to Three Body, but it is also a gravestone. The Secretary looked up at the pendulum in the air and said, looking at it from here, it looks like about the size of a submarine.



llama_print_timings:        load time =   614.40 ms
llama_print_timings:      sample time =    22.14 ms /    50 runs   (    0.44 ms per token,  2257.95 tokens per second)
llama_print_timings: prompt eval time =   614.37 ms /    63 tokens (    9.75 ms per token,   102.54 tokens per second)
llama_print_timings:        eval time =  4202.37 ms /    49 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4914.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Who's tomb is this?”



llama_print_timings:        load time =   585.27 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2252.25 tokens per second)
llama_print_timings: prompt eval time =   585.23 ms /    37 tokens (   15.82 ms per token,    63.22 tokens per second)
llama_print_timings:        eval time =   693.32 ms /     8 runs   (   86.66 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1296.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The effort of a hundred and ninety-nine civilizations, the pursuit of a solution to the Three Body Problem, and the search for the sun's orbital path.



llama_print_timings:        load time =   620.46 ms
llama_print_timings:      sample time =    17.71 ms /    39 runs   (    0.45 ms per token,  2202.39 tokens per second)
llama_print_timings: prompt eval time =   620.41 ms /    58 tokens (   10.70 ms per token,    93.49 tokens per second)
llama_print_timings:        eval time =  3264.99 ms /    38 runs   (   85.92 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  3965.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Does this effort end here?



llama_print_timings:        load time =   566.64 ms
llama_print_timings:      sample time =     3.11 ms /     7 runs   (    0.44 ms per token,  2248.63 tokens per second)
llama_print_timings: prompt eval time =   566.60 ms /    36 tokens (   15.74 ms per token,    63.54 tokens per second)
llama_print_timings:        eval time =   545.94 ms /     6 runs   (   90.99 ms per token,    10.99 tokens per second)
llama_print_timings:       total time =  1126.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Until now, it has been completely over.”



llama_print_timings:        load time =   577.58 ms
llama_print_timings:      sample time =     5.38 ms /    12 runs   (    0.45 ms per token,  2229.65 tokens per second)
llama_print_timings: prompt eval time =   577.54 ms /    38 tokens (   15.20 ms per token,    65.80 tokens per second)
llama_print_timings:        eval time =   955.58 ms /    11 runs   (   86.87 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1557.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I hesitated for a moment and took out a stack of materials. These are the links to Wei Cheng's mathematical model for the Three Body Problem, which I believe is very likely successful in solving the problem.



llama_print_timings:        load time =   636.72 ms
llama_print_timings:      sample time =    20.11 ms /    45 runs   (    0.45 ms per token,  2237.92 tokens per second)
llama_print_timings: prompt eval time =   636.68 ms /    79 tokens (    8.06 ms per token,   124.08 tokens per second)
llama_print_timings:        eval time =  3878.99 ms /    44 runs   (   88.16 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  4605.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As soon as Wang Mu spoke, he noticed that everyone around him had lost interest and went back to their small circles. Some of them even left with a laugh and shaking head. The secretary passed the data sheet to the thin tall man next to him and said in respect and admiration for his lofty reputation: "Based on my respect for you, please let our science advisor have a look." In fact, everyone around him has expressed this respect and would immediately ridicule anyone else.



llama_print_timings:        load time =   702.87 ms
llama_print_timings:      sample time =    46.53 ms /   103 runs   (    0.45 ms per token,  2213.77 tokens per second)
llama_print_timings: prompt eval time =   702.82 ms /   128 tokens (    5.49 ms per token,   182.12 tokens per second)
llama_print_timings:        eval time =  8762.17 ms /   102 runs   (   85.90 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  9676.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Scientific consultant takes the data and turns it over in his hand: “Evolutionary algorithms? Göebelli, you are a genius. Anyone who can develop such algorithms must be a genius; besides superior mathematical skills, imagination is also required.”



llama_print_timings:        load time =   620.99 ms
llama_print_timings:      sample time =    23.95 ms /    53 runs   (    0.45 ms per token,  2213.22 tokens per second)
llama_print_timings: prompt eval time =   620.96 ms /    71 tokens (    8.75 ms per token,   114.34 tokens per second)
llama_print_timings:        eval time =  4519.10 ms /    52 runs   (   86.91 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  5246.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I hear what you're saying. Have someone already created this mathematical model?



llama_print_timings:        load time =   573.41 ms
llama_print_timings:      sample time =     8.14 ms /    17 runs   (    0.48 ms per token,  2087.43 tokens per second)
llama_print_timings: prompt eval time =   573.35 ms /    42 tokens (   13.65 ms per token,    73.25 tokens per second)
llama_print_timings:        eval time =  1363.44 ms /    16 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  1972.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, there are dozens of other mathematical models that are far more sophisticated than yours and have been created and implemented on computers. This has been the center activity of the world for centuries as people wait impatiently for the results to come in.



llama_print_timings:        load time =   651.63 ms
llama_print_timings:      sample time =    24.98 ms /    56 runs   (    0.45 ms per token,  2241.70 tokens per second)
llama_print_timings: prompt eval time =   651.58 ms /    88 tokens (    7.40 ms per token,   135.06 tokens per second)
llama_print_timings:        eval time =  4720.94 ms /    55 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  5486.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What about the result?”



llama_print_timings:        load time =   576.63 ms
llama_print_timings:      sample time =     3.11 ms /     7 runs   (    0.44 ms per token,  2253.70 tokens per second)
llama_print_timings: prompt eval time =   576.59 ms /    34 tokens (   16.96 ms per token,    58.97 tokens per second)
llama_print_timings:        eval time =   497.19 ms /     6 runs   (   82.87 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =  1087.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Already has been proved definitively that the three-body problem is unsolvable.



llama_print_timings:        load time =   590.34 ms
llama_print_timings:      sample time =     8.88 ms /    20 runs   (    0.44 ms per token,  2251.24 tokens per second)
llama_print_timings: prompt eval time =   590.30 ms /    41 tokens (   14.40 ms per token,    69.46 tokens per second)
llama_print_timings:        eval time =  1686.13 ms /    19 runs   (   88.74 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  2316.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The huge pendulum shines brightly in the morning light. It reflects everything around it, as if it were a mirror of the world. In that distant age which is separated by many civilizations, he and Zhou Wenwang had walked through the trees towards the palace of Shi Huangdi. History has written an extended circle and returned to where it all began.



llama_print_timings:        load time =   696.05 ms
llama_print_timings:      sample time =    37.32 ms /    82 runs   (    0.46 ms per token,  2196.98 tokens per second)
llama_print_timings: prompt eval time =   696.01 ms /   118 tokens (    5.90 ms per token,   169.54 tokens per second)
llama_print_timings:        eval time =  6984.25 ms /    81 runs   (   86.23 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  7850.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As we had predicted, the Three-Body Problem is a chaotic system that will magnify even small perturbations infinitely and its mathematical rules cannot be predictable.



llama_print_timings:        load time =   621.54 ms
llama_print_timings:      sample time =    15.24 ms /    34 runs   (    0.45 ms per token,  2231.56 tokens per second)
llama_print_timings: prompt eval time =   621.50 ms /    72 tokens (    8.63 ms per token,   115.85 tokens per second)
llama_print_timings:        eval time =  2859.34 ms /    33 runs   (   86.65 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  3549.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Wang Mu felt his entire knowledge of science and thought system blurred in an instant, replaced by a new sense of uncertainty: “If even such a simple system as the Three Body can remain unpredictable chaos, how can we have confidence in exploring the laws of the complex universe?”



llama_print_timings:        load time =   648.82 ms
llama_print_timings:      sample time =    28.99 ms /    63 runs   (    0.46 ms per token,  2173.16 tokens per second)
llama_print_timings: prompt eval time =   648.79 ms /    87 tokens (    7.46 ms per token,   134.10 tokens per second)
llama_print_timings:        eval time =  5252.33 ms /    62 runs   (   84.71 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  6035.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

God is a shameless gambler, he abandoned us! Einstein came up at some time and waved his violin saying.



llama_print_timings:        load time =   611.11 ms
llama_print_timings:      sample time =    13.64 ms /    31 runs   (    0.44 ms per token,  2272.73 tokens per second)
llama_print_timings: prompt eval time =   611.07 ms /    58 tokens (   10.54 ms per token,    94.92 tokens per second)
llama_print_timings:        eval time =  2534.56 ms /    30 runs   (   84.49 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  3208.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Secretary General nodded slowly: "Yes, God is a gambler. The only hope for the Three-Body Problem's solution, is that He also gambled."



llama_print_timings:        load time =   601.29 ms
llama_print_timings:      sample time =    16.82 ms /    38 runs   (    0.44 ms per token,  2259.35 tokens per second)
llama_print_timings: prompt eval time =   601.27 ms /    58 tokens (   10.37 ms per token,    96.46 tokens per second)
llama_print_timings:        eval time =  3129.35 ms /    37 runs   (   84.58 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  3806.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this time, the giant moon rose from the horizon of the dark night, and its silvery image was reflected on the smooth surface of the pendulum, lighting and winking as if it had developed a mysterious connection with the pendulum.



llama_print_timings:        load time =   621.93 ms
llama_print_timings:      sample time =    25.54 ms /    56 runs   (    0.46 ms per token,  2192.38 tokens per second)
llama_print_timings: prompt eval time =   621.89 ms /    78 tokens (    7.97 ms per token,   125.42 tokens per second)
llama_print_timings:        eval time =  4669.84 ms /    55 runs   (   84.91 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  5406.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You said civilization, and it seems that this civilization has developed to a considerable extent.



llama_print_timings:        load time =   605.84 ms
llama_print_timings:      sample time =     8.95 ms /    20 runs   (    0.45 ms per token,  2234.39 tokens per second)
llama_print_timings: prompt eval time =   605.80 ms /    52 tokens (   11.65 ms per token,    85.84 tokens per second)
llama_print_timings:        eval time =  1607.49 ms /    19 runs   (   84.60 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  2252.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, mastering nuclear power is necessary for entering the information age,” said the secretary-general. But it seemed to him that it was not so much an achievement as a burden.



llama_print_timings:        load time =   600.36 ms
llama_print_timings:      sample time =    18.70 ms /    41 runs   (    0.46 ms per token,  2192.40 tokens per second)
llama_print_timings: prompt eval time =   600.32 ms /    54 tokens (   11.12 ms per token,    89.95 tokens per second)
llama_print_timings:        eval time =  3410.66 ms /    40 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  4095.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

There is a hope that civilization continues to develop and reaches another level, although the solar orbit of the sun cannot be known, but can survive in this chaotic era and resist those disastrous cataclysms caused by the abnormalities in the past.



llama_print_timings:        load time =   627.36 ms
llama_print_timings:      sample time =    26.68 ms /    58 runs   (    0.46 ms per token,  2173.83 tokens per second)
llama_print_timings: prompt eval time =   627.32 ms /    76 tokens (    8.25 ms per token,   121.15 tokens per second)
llama_print_timings:        eval time =  4903.50 ms /    57 runs   (   86.03 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  5654.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They used to think that, and it was one of the driving forces behind the triology civilization's survival, but they realized how naive this idea really is when they saw what happened to this giant moon rising in the sky. It had become larger than the planet upon which it orbited and had torn itself apart as a result.



llama_print_timings:        load time =   697.18 ms
llama_print_timings:      sample time =    50.88 ms /    71 runs   (    0.72 ms per token,  1395.44 tokens per second)
llama_print_timings: prompt eval time =   697.13 ms /   118 tokens (    5.91 ms per token,   169.26 tokens per second)
llama_print_timings:        eval time =  6639.34 ms /    70 runs   (   94.85 ms per token,    10.54 tokens per second)
llama_print_timings:       total time =  7570.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What does "big tear" mean?



llama_print_timings:        load time =   690.59 ms
llama_print_timings:      sample time =     6.86 ms /    10 runs   (    0.69 ms per token,  1458.36 tokens per second)
llama_print_timings: prompt eval time =   690.56 ms /    35 tokens (   19.73 ms per token,    50.68 tokens per second)
llama_print_timings:        eval time =   911.66 ms /     9 runs   (  101.30 ms per token,     9.87 tokens per second)
llama_print_timings:       total time =  1634.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   668.41 ms
llama_print_timings:      sample time =    21.08 ms /    47 runs   (    0.45 ms per token,  2229.71 tokens per second)
llama_print_timings: prompt eval time =   668.37 ms /    78 tokens (    8.57 ms per token,   116.70 tokens per second)
llama_print_timings:        eval time =  3952.10 ms /    46 runs   (   85.92 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  4716.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When he heard the last four words, Wang Miao tightened. “Flying stars stay still” is the biggest omen in the Three Body World. The “stars”, also called the distant sun, stops moving on the background of space when observed from the ground and only means that the sun and planet move in a straight line with the same speed or the sun moves away from the planet or moves towards the planet. Before civilization No. 191, it was just an imaginary disaster that never happened but fear and vigilance of people did not loosen at all, making “stars stay still” become a most unlucky oath in several Three Body civilizations. Even if only one star stops moving, it is also terrifying.



llama_print_timings:        load time =   963.43 ms
llama_print_timings:      sample time =    92.94 ms /   160 runs   (    0.58 ms per token,  1721.54 tokens per second)
llama_print_timings: prompt eval time =   963.33 ms /   185 tokens (    5.21 ms per token,   192.04 tokens per second)
llama_print_timings:        eval time = 14451.27 ms /   159 runs   (   90.89 ms per token,    11.00 tokens per second)
llama_print_timings:       total time = 15837.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“At that time, three stars simultaneously stopped in the air. 191 civilizations stood on Earth helplessly watching these three stars hanging still in the sky. A few days later, a solar eclipse occured with the sun at the horizon, and then two other planets came into sight. This is not an ordinary ‘three-days suspension’ as the last star turned into the sun, while the first sun passed from its near position. Then, the other two planets followed one by one.”



llama_print_timings:        load time =  1049.61 ms
llama_print_timings:      sample time =    49.60 ms /   108 runs   (    0.46 ms per token,  2177.24 tokens per second)
llama_print_timings: prompt eval time =  1049.56 ms /   295 tokens (    3.56 ms per token,   281.07 tokens per second)
llama_print_timings:        eval time =  9509.39 ms /   107 runs   (   88.87 ms per token,    11.25 tokens per second)
llama_print_timings:       total time = 10783.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Secretary pointed to the orbital body that had been raised to a near-vacuum state, “This is just the smaller half of what once was called Halo-2, where civilization 191 left its ruins behind. But it’s now a world without life.”



llama_print_timings:        load time =  1119.37 ms
llama_print_timings:      sample time =    28.21 ms /    62 runs   (    0.45 ms per token,  2198.04 tokens per second)
llama_print_timings: prompt eval time =  1119.33 ms /   330 tokens (    3.39 ms per token,   294.82 tokens per second)
llama_print_timings:        eval time =  5347.67 ms /    61 runs   (   87.67 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  6597.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The universe in which the Three-Body series takes place is colder than we might imagine. What would happen if our planet no longer passes close to the sun’s surface during a flyby but instead dives into the sun’s fires? This likelihood increases over time.”



llama_print_timings:        load time =   660.49 ms
llama_print_timings:      sample time =    26.97 ms /    59 runs   (    0.46 ms per token,  2187.86 tokens per second)
llama_print_timings: prompt eval time =   660.45 ms /    88 tokens (    7.51 ms per token,   133.24 tokens per second)
llama_print_timings:        eval time =  4914.49 ms /    58 runs   (   84.73 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  5697.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This originally was just a terrible supposition, but recent astronomical discovery made us utterly despair for the fate of the Trisolarian world. The study aimed to reconstruct the history of stars and planets within this galaxy based on some residues found in it. By accidentally discovering that twelve planets existed in the distant past in this galaxy, we now realize that our world is merely a remnant of the big hunt, and the fact that our civilization has reborn 192 times only is just lucky. In addition to further research, we have also discovered the respiratory phenomena of these three suns.



llama_print_timings:        load time =   773.85 ms
llama_print_timings:      sample time =    62.37 ms /   138 runs   (    0.45 ms per token,  2212.57 tokens per second)
llama_print_timings: prompt eval time =   773.80 ms /   164 tokens (    4.72 ms per token,   211.94 tokens per second)
llama_print_timings:        eval time = 11974.61 ms /   137 runs   (   87.41 ms per token,    11.44 tokens per second)
llama_print_timings:       total time = 13034.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What do you mean by “star breathing?”



llama_print_timings:        load time =   578.94 ms
llama_print_timings:      sample time =     5.26 ms /    11 runs   (    0.48 ms per token,  2092.85 tokens per second)
llama_print_timings: prompt eval time =   578.90 ms /    34 tokens (   17.03 ms per token,    58.73 tokens per second)
llama_print_timings:        eval time =   844.58 ms /    10 runs   (   84.46 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1446.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Just a metaphor, you have discovered the outer gaseous layer of the star but you don't know that it expands and contracts constantly like breathing. When the gas is expanded, its thickness can increase up to ten times, which makes the star's diameter significantly increase and easier to capture planets. When a planet passes close to the sun and experiences severe friction, it will fall into the sun's firepit like a meteor. It has been documented that in the long history of three-bodies systems, when the solar gas layer expands once, it can consume one to two planets at most, which are the eleven planets that have subsequently fallen into the sun's fire pit after passing close to the sun during its last expansion. Now, the gaseous layers of all three stars are contracting, otherwise our planet would have fallen into the sun during the last rubbing pass. According to scholars' predictions, the next expansion will occur within one hundred and fifty thousand to two million years from now.



llama_print_timings:        load time =   925.51 ms
llama_print_timings:      sample time =    98.50 ms /   219 runs   (    0.45 ms per token,  2223.37 tokens per second)
llama_print_timings: prompt eval time =   925.46 ms /   241 tokens (    3.84 ms per token,   260.41 tokens per second)
llama_print_timings:        eval time = 19377.88 ms /   218 runs   (   88.89 ms per token,    11.25 tokens per second)
llama_print_timings:       total time = 20759.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This place is so annoying, I can’t stay here any longer.” Einstein held his violin in the position of an old beggar and sat on the ground.



llama_print_timings:        load time =   605.34 ms
llama_print_timings:      sample time =    16.84 ms /    38 runs   (    0.44 ms per token,  2256.00 tokens per second)
llama_print_timings: prompt eval time =   605.31 ms /    55 tokens (   11.01 ms per token,    90.86 tokens per second)
llama_print_timings:        eval time =  3172.88 ms /    37 runs   (   85.75 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3854.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Secretary-General nodded and said, "I can't stay here any longer. There is only one way for the Three Civilizations to survive, which is to bet on the universe."



llama_print_timings:        load time =   603.88 ms
llama_print_timings:      sample time =    19.16 ms /    42 runs   (    0.46 ms per token,  2192.64 tokens per second)
llama_print_timings: prompt eval time =   603.83 ms /    59 tokens (   10.23 ms per token,    97.71 tokens per second)
llama_print_timings:        eval time =  3507.52 ms /    41 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4197.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What's the bet?” asked Miao.



llama_print_timings:        load time =   614.39 ms
llama_print_timings:      sample time =     5.33 ms /    12 runs   (    0.44 ms per token,  2250.56 tokens per second)
llama_print_timings: prompt eval time =   614.35 ms /    40 tokens (   15.36 ms per token,    65.11 tokens per second)
llama_print_timings:        eval time =   900.53 ms /    11 runs   (   81.87 ms per token,    12.22 tokens per second)
llama_print_timings:       total time =  1538.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fly out of the Three-Body System, fly towards the boundless star sea and search for new lands to migrate to in the Milky Way!



llama_print_timings:        load time =   587.38 ms
llama_print_timings:      sample time =    15.19 ms /    33 runs   (    0.46 ms per token,  2172.20 tokens per second)
llama_print_timings: prompt eval time =   587.34 ms /    53 tokens (   11.08 ms per token,    90.24 tokens per second)
llama_print_timings:        eval time =  2758.85 ms /    32 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  3414.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao heard a "rroll" sound and saw that the huge pendulum was being lifted by a thin cable of an overhead crane in a nearby area. The crescent moon on the background behind it is lowering gradually, slowly.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The Secretary-General solemnly proclaimed, “Single pendulum swinging!”



llama_print_timings:        load time =   580.20 ms
llama_print_timings:      sample time =     8.43 ms /    19 runs   (    0.44 ms per token,  2254.12 tokens per second)
llama_print_timings: prompt eval time =   580.18 ms /    40 tokens (   14.50 ms per token,    68.94 tokens per second)
llama_print_timings:        eval time =  1518.79 ms /    18 runs   (   84.38 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  2137.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The crane released the heavy pendulum, which made a slow but rapid swinging downward. The huge pendulum moved along a smooth arc-shaped curve with maximum speed and soundless crash when the wind was heard after the noise disappeared at the lowest point. The pendulum then resumed its swings up to the same height as before, halting for a moment and began another round of swings as the period of movement changed due to the gravitational fluctuations caused by the large moon revolving around the mother star: when the large moon was at one side of the mother star, it neutralized the gravitational pull with the mother star; but when it moved to the other side of the mother star, the pull nearly reached its previous intensity.



llama_print_timings:        load time =   913.38 ms
llama_print_timings:      sample time =    72.55 ms /   163 runs   (    0.45 ms per token,  2246.79 tokens per second)
llama_print_timings: prompt eval time =   913.35 ms /   234 tokens (    3.90 ms per token,   256.20 tokens per second)
llama_print_timings:        eval time = 14090.89 ms /   162 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 15340.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   710.00 ms
llama_print_timings:      sample time =     0.44 ms /     1 runs   (    0.44 ms per token,  2272.73 tokens per second)
llama_print_timings: prompt eval time =   709.96 ms /   127 tokens (    5.59 ms per token,   178.88 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   711.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Four hundred and fifty-one years later, civilization number 192 was destroyed in the blazing fire of double sunrise. It evolved to the atomic and information age.



llama_print_timings:        load time =   604.44 ms
llama_print_timings:      sample time =    18.44 ms /    41 runs   (    0.45 ms per token,  2223.31 tokens per second)
llama_print_timings: prompt eval time =   604.40 ms /    59 tokens (   10.24 ms per token,    97.62 tokens per second)
llama_print_timings:        eval time =  3362.63 ms /    40 runs   (   84.07 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  4051.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   649.33 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2285.71 tokens per second)
llama_print_timings: prompt eval time =   649.29 ms /    85 tokens (    7.64 ms per token,   130.91 tokens per second)
llama_print_timings:        eval time =    82.61 ms /     1 runs   (   82.61 ms per token,    12.11 tokens per second)
llama_print_timings:       total time =   735.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Fly to the universe, search for a new home.



llama_print_timings:        load time =   572.86 ms
llama_print_timings:      sample time =     5.72 ms /    13 runs   (    0.44 ms per token,  2274.32 tokens per second)
llama_print_timings: prompt eval time =   572.83 ms /    37 tokens (   15.48 ms per token,    64.59 tokens per second)
llama_print_timings:        eval time =  1007.34 ms /    12 runs   (   83.95 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  1607.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Welcome back.



llama_print_timings:        load time =   575.04 ms
llama_print_timings:      sample time =     2.23 ms /     5 runs   (    0.45 ms per token,  2244.17 tokens per second)
llama_print_timings: prompt eval time =   575.00 ms /    33 tokens (   17.42 ms per token,    57.39 tokens per second)
llama_print_timings:        eval time =   335.44 ms /     4 runs   (   83.86 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =   920.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   991.74 ms
llama_print_timings:      sample time =     0.93 ms /     2 runs   (    0.46 ms per token,  2157.50 tokens per second)
llama_print_timings: prompt eval time =   991.70 ms /    29 tokens (   34.20 ms per token,    29.24 tokens per second)
llama_print_timings:        eval time =    79.61 ms /     1 runs   (   79.61 ms per token,    12.56 tokens per second)
llama_print_timings:       total time =  1075.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After leaving "The Three-Body Problem", Wang Mu felt as tired as he always did after playing the game, but this time he only rested for half an hour before logging in again. On entering The Three-body Problem again, a surprising message appeared on the black background:



llama_print_timings:        load time =   660.99 ms
llama_print_timings:      sample time =    26.63 ms /    58 runs   (    0.46 ms per token,  2177.91 tokens per second)
llama_print_timings: prompt eval time =   660.94 ms /    84 tokens (    7.87 ms per token,   127.09 tokens per second)
llama_print_timings:        eval time =  4860.35 ms /    57 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  5643.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The situation is urgent. The servers for "Three-Body" will be shut down shortly, and free logging in will remain available. After the current chapter is complete, "Three-Body" will be immediately transferred to the final scene.



llama_print_timings:        load time =   586.85 ms
llama_print_timings:      sample time =    22.93 ms /    50 runs   (    0.46 ms per token,  2180.36 tokens per second)
llama_print_timings: prompt eval time =   586.81 ms /    54 tokens (   10.87 ms per token,    92.02 tokens per second)
llama_print_timings:        eval time =  4161.19 ms /    49 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  4852.14 ms


translated 70.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The vast expanse of land above him was empty, without the pyramids, without the United Nations headquarters, and the Great Monument was nowhere to be found. Only barren Gobi desert extended endlessly into the sky, just as it had when he first came here. But soon Wan Miao discovered that this illusion only existed in his mind, for small stones of all sizes littered the Gobi, which filled up almost every space of sight! After calculating roughly, he estimated that there were billions of people standing on the ground below him. He knew the entire population of Trisolaris had gathered here. The silence was eerie and suffocating, as if everything in this Dawn-filled sea of people was waiting for something. Wan Miao looked around and saw that everyone was looking up at the sky.



llama_print_timings:        load time =   876.31 ms
llama_print_timings:      sample time =    79.17 ms /   175 runs   (    0.45 ms per token,  2210.54 tokens per second)
llama_print_timings: prompt eval time =   876.28 ms /   223 tokens (    3.93 ms per token,   254.49 tokens per second)
llama_print_timings:        eval time = 15293.02 ms /   174 runs   (   87.89 ms per token,    11.38 tokens per second)
llama_print_timings:       total time = 16533.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Wang Miao looked up, he found that the stars had changed in an unmistakable way: they had been arranged into a square array! But Wang Miao soon discovered that this arrangement of stars was located on the synchronous orbit of a planetary system, and the galaxy's star field had become a faint background. The array of stars against the background was clearly in motion, moving slowly but steadily in a unified and organized manner. At the side facing the rising sun, there were more than thirty star bodies, and their total number was about 1000. This obviously showed that the array was made up of man-made objects and moved as a whole through the background of stars, giving off an imposing sense of power.



llama_print_timings:        load time =   848.52 ms
llama_print_timings:      sample time =    71.31 ms /   158 runs   (    0.45 ms per token,  2215.80 tokens per second)
llama_print_timings: prompt eval time =   848.48 ms /   204 tokens (    4.16 ms per token,   240.43 tokens per second)
llama_print_timings:        eval time = 13713.74 ms /   157 runs   (   87.35 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 14893.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this time, a man standing next to him gently pushed him and whispered softly, “Ah, great Copernicus, how come you're so late? How could you miss such a remarkable cause?”



llama_print_timings:        load time =   631.55 ms
llama_print_timings:      sample time =    20.56 ms /    46 runs   (    0.45 ms per token,  2237.35 tokens per second)
llama_print_timings: prompt eval time =   631.52 ms /    73 tokens (    8.65 ms per token,   115.59 tokens per second)
llama_print_timings:        eval time =  3847.46 ms /    45 runs   (   85.50 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  4572.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What is that?" Wang Mu pointed to the array of stars in space.



llama_print_timings:        load time =   579.27 ms
llama_print_timings:      sample time =     7.55 ms /    17 runs   (    0.44 ms per token,  2252.55 tokens per second)
llama_print_timings: prompt eval time =   579.23 ms /    48 tokens (   12.07 ms per token,    82.87 tokens per second)
llama_print_timings:        eval time =  1418.72 ms /    16 runs   (   88.67 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  2031.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That is the great Three-Body Star Fleet, and it will soon set sail for an expedition.”



llama_print_timings:        load time =   580.40 ms
llama_print_timings:      sample time =    10.25 ms /    23 runs   (    0.45 ms per token,  2243.46 tokens per second)
llama_print_timings: prompt eval time =   580.36 ms /    44 tokens (   13.19 ms per token,    75.82 tokens per second)
llama_print_timings:        eval time =  1846.55 ms /    22 runs   (   83.93 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  2473.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, the Three-Body Civilization has achieved the ability to travel across space?



llama_print_timings:        load time =   573.77 ms
llama_print_timings:      sample time =     8.56 ms /    18 runs   (    0.48 ms per token,  2102.07 tokens per second)
llama_print_timings: prompt eval time =   573.72 ms /    43 tokens (   13.34 ms per token,    74.95 tokens per second)
llama_print_timings:        eval time =  1494.81 ms /    17 runs   (   87.93 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  2108.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, those magnificent spacecraft can achieve a tenth of the speed of light.



llama_print_timings:        load time =   590.66 ms
llama_print_timings:      sample time =     8.96 ms /    19 runs   (    0.47 ms per token,  2120.30 tokens per second)
llama_print_timings: prompt eval time =   590.62 ms /    46 tokens (   12.84 ms per token,    77.88 tokens per second)
llama_print_timings:        eval time =  1576.83 ms /    18 runs   (   87.60 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  2210.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Reaching one-tenth of the speed of light, at least in my knowledge, is a great accomplishment. But for interstellar travel, it still seems slow.”



llama_print_timings:        load time =   599.48 ms
llama_print_timings:      sample time =    16.79 ms /    38 runs   (    0.44 ms per token,  2263.79 tokens per second)
llama_print_timings: prompt eval time =   599.44 ms /    57 tokens (   10.52 ms per token,    95.09 tokens per second)
llama_print_timings:        eval time =  3066.67 ms /    37 runs   (   82.88 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =  3741.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“A journey of a thousand miles begins with a single step,” the man said. “But it is important to find the right destination.”



llama_print_timings:        load time =   584.57 ms
llama_print_timings:      sample time =    12.78 ms /    29 runs   (    0.44 ms per token,  2270.06 tokens per second)
llama_print_timings: prompt eval time =   584.53 ms /    50 tokens (   11.69 ms per token,    85.54 tokens per second)
llama_print_timings:        eval time =  2425.58 ms /    28 runs   (   86.63 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  3067.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Where is the destination of the fleet?



llama_print_timings:        load time =   596.82 ms
llama_print_timings:      sample time =     4.32 ms /     9 runs   (    0.48 ms per token,  2085.26 tokens per second)
llama_print_timings: prompt eval time =   596.78 ms /    38 tokens (   15.70 ms per token,    63.67 tokens per second)
llama_print_timings:        eval time =   715.86 ms /     8 runs   (   89.48 ms per token,    11.18 tokens per second)
llama_print_timings:       total time =  1332.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The star, located 4 light-years away, has a planet orbiting it. This is the closest known star to the trilogy world.



llama_print_timings:        load time =   594.98 ms
llama_print_timings:      sample time =    15.14 ms /    33 runs   (    0.46 ms per token,  2178.94 tokens per second)
llama_print_timings: prompt eval time =   594.95 ms /    50 tokens (   11.90 ms per token,    84.04 tokens per second)
llama_print_timings:        eval time =  2766.71 ms /    32 runs   (   86.46 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  3431.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That's amazing, " said Wang Miao. "The nearest star to us is also four light-years away."



llama_print_timings:        load time =   581.93 ms
llama_print_timings:      sample time =    13.14 ms /    29 runs   (    0.45 ms per token,  2206.83 tokens per second)
llama_print_timings: prompt eval time =   581.88 ms /    48 tokens (   12.12 ms per token,    82.49 tokens per second)
llama_print_timings:        eval time =  2436.98 ms /    28 runs   (   87.04 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  3079.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What about you?



llama_print_timings:        load time =   569.53 ms
llama_print_timings:      sample time =     2.23 ms /     5 runs   (    0.45 ms per token,  2240.14 tokens per second)
llama_print_timings: prompt eval time =   569.50 ms /    33 tokens (   17.26 ms per token,    57.95 tokens per second)
llama_print_timings:        eval time =   312.99 ms /     4 runs   (   78.25 ms per token,    12.78 tokens per second)
llama_print_timings:       total time =   893.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Earth. The water of Changlang River.



llama_print_timings:        load time =   568.84 ms
llama_print_timings:      sample time =     5.84 ms /    12 runs   (    0.49 ms per token,  2054.79 tokens per second)
llama_print_timings: prompt eval time =   568.80 ms /    37 tokens (   15.37 ms per token,    65.05 tokens per second)
llama_print_timings:        eval time =   937.66 ms /    11 runs   (   85.24 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1533.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, that's not surprising. The density of stars in the galaxy is very evenly distributed over a large area, which is a result of long-term gravitional regulation among them. The vast majority of stars are within three to six light years apart from each other.



llama_print_timings:        load time =   621.99 ms
llama_print_timings:      sample time =    27.11 ms /    59 runs   (    0.46 ms per token,  2176.00 tokens per second)
llama_print_timings: prompt eval time =   621.95 ms /    79 tokens (    7.87 ms per token,   127.02 tokens per second)
llama_print_timings:        eval time =  4950.93 ms /    58 runs   (   85.36 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  5695.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The end.



llama_print_timings:        load time =   973.03 ms
llama_print_timings:      sample time =     1.78 ms /     4 runs   (    0.44 ms per token,  2248.45 tokens per second)
llama_print_timings: prompt eval time =   972.98 ms /   257 tokens (    3.79 ms per token,   264.14 tokens per second)
llama_print_timings:        eval time =   267.61 ms /     3 runs   (   89.20 ms per token,    11.21 tokens per second)
llama_print_timings:       total time =  1249.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Expedition of the Trisolaran Civilization has started, their fleet is on its way.



llama_print_timings:        load time =   580.96 ms
llama_print_timings:      sample time =    10.74 ms /    23 runs   (    0.47 ms per token,  2141.33 tokens per second)
llama_print_timings: prompt eval time =   580.92 ms /    45 tokens (   12.91 ms per token,    77.46 tokens per second)
llama_print_timings:        eval time =  1848.96 ms /    22 runs   (   84.04 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  2479.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The game has ended. When you return to reality, if you have honored the commitment you made, please attend the gathering of the Earth Three-Body organization according to the email address we have sent you afterward.



llama_print_timings:        load time =   625.34 ms
llama_print_timings:      sample time =    20.88 ms /    47 runs   (    0.44 ms per token,  2251.39 tokens per second)
llama_print_timings: prompt eval time =   625.30 ms /    68 tokens (    9.20 ms per token,   108.75 tokens per second)
llama_print_timings:        eval time =  3941.30 ms /    46 runs   (   85.68 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  4660.26 ms


translated 18.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Unlike the last time when only a few friends gathered, this time there were more than three hundred people at the gathering. The meeting place was the dining room of an out-of-use plant which is due to be demolished. The building inside was rather dilapidated but quite spacious. Many familiar faces filled the venue, including prominent social figures and experts from various fields such as famous scientists, writers, and politicians.



llama_print_timings:        load time =   679.49 ms
llama_print_timings:      sample time =    43.60 ms /    96 runs   (    0.45 ms per token,  2201.99 tokens per second)
llama_print_timings: prompt eval time =   679.46 ms /   107 tokens (    6.35 ms per token,   157.48 tokens per second)
llama_print_timings:        eval time =  8172.82 ms /    95 runs   (   86.03 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  9048.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Firstly, what caught Wang Miao's attention was the magical thing in the center of the hall. It was three silver ball bodies that were slightly smaller than a regulation bowling ball on metal base floating and rotating randomly in mid-air. Based on Wang Miao's observation, he suspected this device to be based on magnetic levitation principle. Three spheres are moving on random trajectories of the three-body problem.



llama_print_timings:        load time =   678.85 ms
llama_print_timings:      sample time =    41.72 ms /    93 runs   (    0.45 ms per token,  2229.20 tokens per second)
llama_print_timings: prompt eval time =   678.81 ms /   103 tokens (    6.59 ms per token,   151.74 tokens per second)
llama_print_timings:        eval time =  7911.88 ms /    92 runs   (   86.00 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  8777.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Other people didn't pay much attention to the artwork that demonstrated three-dimensional motion, their attention being fixed on Pan Han who stood on a broken dinner table.



llama_print_timings:        load time =   608.84 ms
llama_print_timings:      sample time =    15.55 ms /    35 runs   (    0.44 ms per token,  2250.66 tokens per second)
llama_print_timings: prompt eval time =   608.80 ms /    60 tokens (   10.15 ms per token,    98.56 tokens per second)
llama_print_timings:        eval time =  2862.97 ms /    34 runs   (   84.21 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  3540.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Is it you who killed Xin Yufei? someone asked.



llama_print_timings:        load time =   577.05 ms
llama_print_timings:      sample time =     6.73 ms /    15 runs   (    0.45 ms per token,  2228.83 tokens per second)
llama_print_timings: prompt eval time =   577.01 ms /    44 tokens (   13.11 ms per token,    76.25 tokens per second)
llama_print_timings:        eval time =  1220.15 ms /    14 runs   (   87.15 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  1826.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It’s me,” said Pan Han tranquilly. “The organisation has arrived to this dangerous state because of the traitorous betrayal of someone within our ranks like her.”



llama_print_timings:        load time =   624.08 ms
llama_print_timings:      sample time =    17.68 ms /    40 runs   (    0.44 ms per token,  2261.93 tokens per second)
llama_print_timings: prompt eval time =   624.04 ms /    65 tokens (    9.60 ms per token,   104.16 tokens per second)
llama_print_timings:        eval time =  3339.18 ms /    39 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  4044.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who gave you the power to kill?



llama_print_timings:        load time =   593.91 ms
llama_print_timings:      sample time =     4.58 ms /     9 runs   (    0.51 ms per token,  1965.49 tokens per second)
llama_print_timings: prompt eval time =   593.88 ms /    37 tokens (   16.05 ms per token,    62.30 tokens per second)
llama_print_timings:        eval time =   674.89 ms /     8 runs   (   84.36 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  1290.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I am doing this out of a sense of responsibility to the organization!



llama_print_timings:        load time =   642.38 ms
llama_print_timings:      sample time =     6.60 ms /    15 runs   (    0.44 ms per token,  2272.73 tokens per second)
llama_print_timings: prompt eval time =   642.34 ms /    39 tokens (   16.47 ms per token,    60.72 tokens per second)
llama_print_timings:        eval time =  1228.03 ms /    14 runs   (   87.72 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  1899.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You still have a sense of responsibility? You are always dishonest and have no moral conscience!”



llama_print_timings:        load time =   579.48 ms
llama_print_timings:      sample time =    10.39 ms /    23 runs   (    0.45 ms per token,  2213.88 tokens per second)
llama_print_timings: prompt eval time =   579.44 ms /    45 tokens (   12.88 ms per token,    77.66 tokens per second)
llama_print_timings:        eval time =  1800.75 ms /    22 runs   (   81.85 ms per token,    12.22 tokens per second)
llama_print_timings:       total time =  2427.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Speak up loudly!”



llama_print_timings:        load time =   575.47 ms
llama_print_timings:      sample time =     3.60 ms /     8 runs   (    0.45 ms per token,  2223.46 tokens per second)
llama_print_timings: prompt eval time =   575.43 ms /    36 tokens (   15.98 ms per token,    62.56 tokens per second)
llama_print_timings:        eval time =   565.40 ms /     7 runs   (   80.77 ms per token,    12.38 tokens per second)
llama_print_timings:       total time =  1156.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What have you done with your own environment branch? Your responsibility is to use and manipulate the environment in order to provoke dislike towards science and modern industry. But what about you yourself? Using the most advanced technology and predictions, you gain fame for yourself.”



llama_print_timings:        load time =   627.53 ms
llama_print_timings:      sample time =    24.01 ms /    54 runs   (    0.44 ms per token,  2249.06 tokens per second)
llama_print_timings: prompt eval time =   627.49 ms /    76 tokens (    8.26 ms per token,   121.12 tokens per second)
llama_print_timings:        eval time =  4614.28 ms /    53 runs   (   87.06 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  5349.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I am famous for myself? The entire humanity in my eyes is already a garbage, I still care about fame? But don't I go out of name if I want to? How can I guide people's thoughts without being famous?”



llama_print_timings:        load time =   613.24 ms
llama_print_timings:      sample time =    23.68 ms /    52 runs   (    0.46 ms per token,  2196.41 tokens per second)
llama_print_timings: prompt eval time =   613.20 ms /    66 tokens (    9.29 ms per token,   107.63 tokens per second)
llama_print_timings:        eval time =  4295.38 ms /    51 runs   (   84.22 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  5014.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   999.10 ms
llama_print_timings:      sample time =     0.92 ms /     2 runs   (    0.46 ms per token,  2178.65 tokens per second)
llama_print_timings: prompt eval time =   999.06 ms /    29 tokens (   34.45 ms per token,    29.03 tokens per second)
llama_print_timings:        eval time =    85.55 ms /     1 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1088.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You choose the easy ones and avoid the hard ones! Your branch of the environment is meant to create environmental disasters, but you can use those for your own benefit by exploiting them. And no, you haven't done anything at all!”



llama_print_timings:        load time =   677.38 ms
llama_print_timings:      sample time =    23.36 ms /    53 runs   (    0.44 ms per token,  2269.32 tokens per second)
llama_print_timings: prompt eval time =   677.34 ms /   112 tokens (    6.05 ms per token,   165.35 tokens per second)
llama_print_timings:        eval time =  4443.04 ms /    52 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  5225.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We have a lot of plans and schemes, but they are all rejected by the commanding general. At least in the past, doing this was stupid, bio-medical branches have manufactured a huge abuse of antibiotics disaster, aren't we caught on it quite quickly enough? The European task force came so close to causing a catastrophe!



llama_print_timings:        load time =   649.38 ms
llama_print_timings:      sample time =    34.62 ms /    78 runs   (    0.44 ms per token,  2253.10 tokens per second)
llama_print_timings: prompt eval time =   649.34 ms /    82 tokens (    7.92 ms per token,   126.28 tokens per second)
llama_print_timings:        eval time =  6516.80 ms /    77 runs   (   84.63 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  7324.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You killed someone and now you're in trouble!



llama_print_timings:        load time =   572.37 ms
llama_print_timings:      sample time =     6.17 ms /    12 runs   (    0.51 ms per token,  1944.26 tokens per second)
llama_print_timings: prompt eval time =   572.34 ms /    41 tokens (   13.96 ms per token,    71.64 tokens per second)
llama_print_timings:        eval time =   926.04 ms /    11 runs   (   84.19 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  1525.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Listen, comrades,” said Liu Lei, “sooner or later this is going to happen! You have all heard that governments from around the world are now entering a state of war. In Europe and North America, investigations into 3011 have already begun. What we must do first here at the headquarters is remove any loyalists to 3011 who remain in our ranks.”



llama_print_timings:        load time =   662.60 ms
llama_print_timings:      sample time =    39.91 ms /    90 runs   (    0.44 ms per token,  2254.96 tokens per second)
llama_print_timings: prompt eval time =   662.54 ms /    99 tokens (    6.69 ms per token,   149.42 tokens per second)
llama_print_timings:        eval time =  7605.03 ms /    89 runs   (   85.45 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  8446.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is not something you should worry about.



llama_print_timings:        load time =   585.59 ms
llama_print_timings:      sample time =     4.43 ms /    10 runs   (    0.44 ms per token,  2256.32 tokens per second)
llama_print_timings: prompt eval time =   585.54 ms /    37 tokens (   15.83 ms per token,    63.19 tokens per second)
llama_print_timings:        eval time =   797.22 ms /     9 runs   (   88.58 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  1402.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, we should consider the general. But I can assure you that the general is descended from the heavens!



llama_print_timings:        load time =   590.81 ms
llama_print_timings:      sample time =    12.31 ms /    27 runs   (    0.46 ms per token,  2192.63 tokens per second)
llama_print_timings: prompt eval time =   590.77 ms /    54 tokens (   10.94 ms per token,    91.41 tokens per second)
llama_print_timings:        eval time =  2316.13 ms /    26 runs   (   89.08 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  2963.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are just talking nonsense. The generals have prestige and the saviors can be removed easily!



llama_print_timings:        load time =   608.70 ms
llama_print_timings:      sample time =    10.94 ms /    24 runs   (    0.46 ms per token,  2193.98 tokens per second)
llama_print_timings: prompt eval time =   608.66 ms /    61 tokens (    9.98 ms per token,   100.22 tokens per second)
llama_print_timings:        eval time =  1910.02 ms /    23 runs   (   83.04 ms per token,    12.04 tokens per second)
llama_print_timings:       total time =  2568.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Maybe the general has his own reasons for holding this meeting, and maybe it's all just a ploy to get me off guard.



llama_print_timings:        load time =   582.08 ms
llama_print_timings:      sample time =    13.03 ms /    29 runs   (    0.45 ms per token,  2225.63 tokens per second)
llama_print_timings: prompt eval time =   582.04 ms /    43 tokens (   13.54 ms per token,    73.88 tokens per second)
llama_print_timings:        eval time =  2410.76 ms /    28 runs   (   86.10 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3054.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As attention shifted from Pan Hong to the current crisis, a well-known expert who had won a Turing Award jumped onto a table and waved his arms proclaiming:



llama_print_timings:        load time =   618.36 ms
llama_print_timings:      sample time =    18.22 ms /    39 runs   (    0.47 ms per token,  2140.27 tokens per second)
llama_print_timings: prompt eval time =   618.32 ms /    66 tokens (    9.37 ms per token,   106.74 tokens per second)
llama_print_timings:        eval time =  3325.05 ms /    38 runs   (   87.50 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  4027.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What should we do now?”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Rebellion throughout the world!”



llama_print_timings:        load time =   571.65 ms
llama_print_timings:      sample time =     4.45 ms /    10 runs   (    0.44 ms per token,  2248.20 tokens per second)
llama_print_timings: prompt eval time =   571.62 ms /    34 tokens (   16.81 ms per token,    59.48 tokens per second)
llama_print_timings:        eval time =   794.98 ms /     9 runs   (   88.33 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  1386.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Isn't this self-destruction?



llama_print_timings:        load time =   574.17 ms
llama_print_timings:      sample time =     5.26 ms /    11 runs   (    0.48 ms per token,  2089.27 tokens per second)
llama_print_timings: prompt eval time =   574.15 ms /    37 tokens (   15.52 ms per token,    64.44 tokens per second)
llama_print_timings:        eval time =   890.73 ms /    10 runs   (   89.07 ms per token,    11.23 tokens per second)
llama_print_timings:       total time =  1490.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Long live the Three-Body Spirit! We are the resilient seeds, and wildfire can't burn us away!



llama_print_timings:        load time =   592.65 ms
llama_print_timings:      sample time =    12.37 ms /    28 runs   (    0.44 ms per token,  2263.72 tokens per second)
llama_print_timings: prompt eval time =   592.62 ms /    48 tokens (   12.35 ms per token,    81.00 tokens per second)
llama_print_timings:        eval time =  2302.26 ms /    27 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  2949.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Rebellion can bring our existence to the world political stage, which will mark the first time that Earth Three Organization is shown on the human history stage! Just if its doctrine is suitable, it will arouse wide response in the world!



llama_print_timings:        load time =   625.19 ms
llama_print_timings:      sample time =    23.85 ms /    51 runs   (    0.47 ms per token,  2138.28 tokens per second)
llama_print_timings: prompt eval time =   625.15 ms /    68 tokens (    9.19 ms per token,   108.77 tokens per second)
llama_print_timings:        eval time =  4245.87 ms /    50 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  4979.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The last sentence is what Pan Han said, which triggered some empathy.



llama_print_timings:        load time =   573.72 ms
llama_print_timings:      sample time =     7.52 ms /    17 runs   (    0.44 ms per token,  2259.74 tokens per second)
llama_print_timings: prompt eval time =   573.69 ms /    40 tokens (   14.34 ms per token,    69.72 tokens per second)
llama_print_timings:        eval time =  1356.54 ms /    16 runs   (   84.78 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  1964.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Someone shouts, "The commander is here!" The crowd makes a path for the man and Wang Sen looks up, feeling dazzled as the world in front of his eyes changes to black-and-white, with only one person remaining in colour.



llama_print_timings:        load time =   626.98 ms
llama_print_timings:      sample time =    24.63 ms /    54 runs   (    0.46 ms per token,  2192.45 tokens per second)
llama_print_timings: prompt eval time =   626.94 ms /    73 tokens (    8.59 ms per token,   116.44 tokens per second)
llama_print_timings:        eval time =  4575.39 ms /    53 runs   (   86.33 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  5314.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the company of a group of young bodyguards, Earth's supreme commander Ye Wenjie walked steadily forward.



llama_print_timings:        load time =   595.36 ms
llama_print_timings:      sample time =    13.21 ms /    29 runs   (    0.46 ms per token,  2195.31 tokens per second)
llama_print_timings: prompt eval time =   595.32 ms /    52 tokens (   11.45 ms per token,    87.35 tokens per second)
llama_print_timings:        eval time =  2450.63 ms /    28 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  3106.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie stepped to the empty circle she had made and lifted her slender fist. With a firmness that was astonishing to Wang Sen, she said: “Extinction of humanity’s tyranny! ”



llama_print_timings:        load time =   616.33 ms
llama_print_timings:      sample time =    22.98 ms /    51 runs   (    0.45 ms per token,  2219.32 tokens per second)
llama_print_timings: prompt eval time =   616.29 ms /    72 tokens (    8.56 ms per token,   116.83 tokens per second)
llama_print_timings:        eval time =  4334.87 ms /    50 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  5055.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They chorused the obvious mantra, “The world belongs to Three-Body!”



llama_print_timings:        load time =   592.08 ms
llama_print_timings:      sample time =     9.16 ms /    19 runs   (    0.48 ms per token,  2073.78 tokens per second)
llama_print_timings: prompt eval time =   592.06 ms /    52 tokens (   11.39 ms per token,    87.83 tokens per second)
llama_print_timings:        eval time =  1522.01 ms /    18 runs   (   84.56 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  2156.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Hello, comrades,” said Ye Wenjie. Her voice returned to the soft and slow pace she was familiar with, which finally convinced Yasheng that it really was her. “I've been poorly lately and haven't seen everyone for a while, so I came to see you all.”



llama_print_timings:        load time =   645.76 ms
llama_print_timings:      sample time =    30.18 ms /    68 runs   (    0.44 ms per token,  2253.52 tokens per second)
llama_print_timings: prompt eval time =   645.72 ms /    87 tokens (    7.42 ms per token,   134.73 tokens per second)
llama_print_timings:        eval time =  5790.74 ms /    67 runs   (   86.43 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  6573.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The general was heard to say, "Take care...". People said this in a sincere voice.



llama_print_timings:        load time =   594.07 ms
llama_print_timings:      sample time =    10.96 ms /    23 runs   (    0.48 ms per token,  2098.54 tokens per second)
llama_print_timings: prompt eval time =   594.02 ms /    54 tokens (   11.00 ms per token,    90.91 tokens per second)
llama_print_timings:        eval time =  1898.40 ms /    22 runs   (   86.29 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  2544.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving aside the discussion of major issues, let's deal with something small first. Pan Han says, looking at everyone's eyes.



llama_print_timings:        load time =   608.58 ms
llama_print_timings:      sample time =    13.96 ms /    30 runs   (    0.47 ms per token,  2149.77 tokens per second)
llama_print_timings: prompt eval time =   608.53 ms /    60 tokens (   10.14 ms per token,    98.60 tokens per second)
llama_print_timings:        eval time =  2469.28 ms /    29 runs   (   85.15 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  3142.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“General, I’m here.” Pan Han from the crowd comes out, before he tried to hide in the crowd. He appears calm on the surface, but his fear can easily be seen. The General didn't call him comrade, a bad omen.



llama_print_timings:        load time =   632.53 ms
llama_print_timings:      sample time =    26.08 ms /    56 runs   (    0.47 ms per token,  2147.32 tokens per second)
llama_print_timings: prompt eval time =   632.49 ms /    79 tokens (    8.01 ms per token,   124.90 tokens per second)
llama_print_timings:        eval time =  4731.96 ms /    55 runs   (   86.04 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  5483.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You have seriously violated organization rules.” Ye Wenjie spoke, still not looking at Pan Han. Her voice remained soft and gentle as if she were talking to a child who had done something wrong.



llama_print_timings:        load time =   609.11 ms
llama_print_timings:      sample time =    19.73 ms /    44 runs   (    0.45 ms per token,  2230.11 tokens per second)
llama_print_timings: prompt eval time =   609.09 ms /    64 tokens (    9.52 ms per token,   105.07 tokens per second)
llama_print_timings:        eval time =  3643.73 ms /    43 runs   (   84.74 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  4342.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

General, the army is facing a crisis. If we don't take decisive measures to remove the traitors and enemies within our ranks, we will lose everything!



llama_print_timings:        load time =   607.12 ms
llama_print_timings:      sample time =    15.77 ms /    35 runs   (    0.45 ms per token,  2219.83 tokens per second)
llama_print_timings: prompt eval time =   607.07 ms /    60 tokens (   10.12 ms per token,    98.84 tokens per second)
llama_print_timings:        eval time =  2929.14 ms /    34 runs   (   86.15 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3609.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You look up at Pan Han and your eyes are soft, but they still stop his breath for a few seconds. “The ultimate ideal of the Earth Three Body organization is to lose everything, including all that humans have now.”



llama_print_timings:        load time =   627.73 ms
llama_print_timings:      sample time =    20.95 ms /    47 runs   (    0.45 ms per token,  2243.65 tokens per second)
llama_print_timings: prompt eval time =   627.68 ms /    72 tokens (    8.72 ms per token,   114.71 tokens per second)
llama_print_timings:        eval time =  3893.06 ms /    46 runs   (   84.63 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  4616.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are the messenger of God! Supreme Commander, please state this explicitly,” he shouted, raising his arm and looking around. No one responded as everyone was silenced; it was clear that this matter was very important to them.



llama_print_timings:        load time =   642.25 ms
llama_print_timings:      sample time =    22.66 ms /    51 runs   (    0.44 ms per token,  2250.36 tokens per second)
llama_print_timings: prompt eval time =   642.20 ms /    81 tokens (    7.93 ms per token,   126.13 tokens per second)
llama_print_timings:        eval time =  4233.65 ms /    50 runs   (   84.67 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  4977.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You shouldn't have asked for this. You have seriously violated organization rules, and if you appeal now, you can; otherwise, you will be held responsible for it.



llama_print_timings:        load time =   641.32 ms
llama_print_timings:      sample time =    16.98 ms /    37 runs   (    0.46 ms per token,  2178.65 tokens per second)
llama_print_timings: prompt eval time =   641.29 ms /    81 tokens (    7.92 ms per token,   126.31 tokens per second)
llama_print_timings:        eval time =  3116.54 ms /    36 runs   (   86.57 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  3835.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I was the one who removed that math prodigy. This was a decision made by Ewing, and it passed unanimously at the meeting. If that genius really produced the complete mathematical model of three-dimensional motion, Earth would have been destroyed. I simply acted in self-defense, and Shen Yufei fired the first shot.”



llama_print_timings:        load time =   652.74 ms
llama_print_timings:      sample time =    33.11 ms /    74 runs   (    0.45 ms per token,  2234.91 tokens per second)
llama_print_timings: prompt eval time =   652.69 ms /    91 tokens (    7.17 ms per token,   139.42 tokens per second)
llama_print_timings:        eval time =  6301.86 ms /    73 runs   (   86.33 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  7106.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving her to believe him, she nodded and said, “All right, let’s just believe you for now. Hopefully we will trust you more with our next move.”



llama_print_timings:        load time =   623.63 ms
llama_print_timings:      sample time =    17.30 ms /    39 runs   (    0.44 ms per token,  2253.81 tokens per second)
llama_print_timings: prompt eval time =   623.59 ms /    67 tokens (    9.31 ms per token,   107.44 tokens per second)
llama_print_timings:        eval time =  3230.49 ms /    38 runs   (   85.01 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  3931.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Huan was taken aback, but he did not seem to breathe a sigh of relief as he replied, “I…… I do belong to the descending sect. After all, it is the philosophy that you have strived for.”



llama_print_timings:        load time =   618.31 ms
llama_print_timings:      sample time =    25.47 ms /    55 runs   (    0.46 ms per token,  2159.32 tokens per second)
llama_print_timings: prompt eval time =   618.28 ms /    67 tokens (    9.23 ms per token,   108.36 tokens per second)
llama_print_timings:        eval time =  4583.42 ms /    54 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  5319.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Then repeat the綱領.”



llama_print_timings:        load time =   574.94 ms
llama_print_timings:      sample time =     3.62 ms /     8 runs   (    0.45 ms per token,  2209.33 tokens per second)
llama_print_timings: prompt eval time =   574.90 ms /    38 tokens (   15.13 ms per token,    66.10 tokens per second)
llama_print_timings:        eval time =   596.81 ms /     7 runs   (   85.26 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  1187.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The human society is no longer able to solve its own problems and restrain its own madness with its own strength. Therefore, it should invite the Supreme Being to come to the world, using its strength for forced supervision and transformation of human society, creating a new, perfect human civilization.



llama_print_timings:        load time =   649.26 ms
llama_print_timings:      sample time =    28.03 ms /    62 runs   (    0.45 ms per token,  2211.99 tokens per second)
llama_print_timings: prompt eval time =   649.22 ms /    84 tokens (    7.73 ms per token,   129.39 tokens per second)
llama_print_timings:        eval time =  5265.37 ms /    61 runs   (   86.32 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  6043.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Are you loyal to this doctrine?



llama_print_timings:        load time =   582.89 ms
llama_print_timings:      sample time =     3.49 ms /     8 runs   (    0.44 ms per token,  2290.29 tokens per second)
llama_print_timings: prompt eval time =   582.85 ms /    40 tokens (   14.57 ms per token,    68.63 tokens per second)
llama_print_timings:        eval time =   593.92 ms /     7 runs   (   84.85 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  1191.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course! Please don't believe rumors.



llama_print_timings:        load time =   581.98 ms
llama_print_timings:      sample time =     5.27 ms /    12 runs   (    0.44 ms per token,  2277.47 tokens per second)
llama_print_timings: prompt eval time =   581.94 ms /    42 tokens (   13.86 ms per token,    72.17 tokens per second)
llama_print_timings:        eval time =   939.92 ms /    11 runs   (   85.45 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  1545.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is a rumor!” exclaimed an European. “I am Rafael, Israeli. Three years ago, my fourteen-year-old son was involved in a car accident and I donated the kidney of his child to a Palestinian girl suffering from uremia. This way, I expressed my hope for peace between Israelis and Palestinians. To realize this wish, I would even be willing to give up my life, as countless Israeli and Palestinian people are also making their sincere efforts to do the same. But all of these efforts have been in vain; our homeland is plunging deeper into a quagmire of hatred. This has made me lose faith in humanity, which has led me to join the Three-Body Problem organization. Despair has transformed me from a pacifist to an extremist, and thanks to my large donation to the organization, I was able to enter its core.”



llama_print_timings:        load time =   942.60 ms
llama_print_timings:      sample time =    90.56 ms /   201 runs   (    0.45 ms per token,  2219.60 tokens per second)
llama_print_timings: prompt eval time =   942.55 ms /   249 tokens (    3.79 ms per token,   264.18 tokens per second)
llama_print_timings:        eval time = 17608.54 ms /   200 runs   (   88.04 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 18971.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The real agenda of the Illuminati has been made public.



llama_print_timings:        load time =   594.61 ms
llama_print_timings:      sample time =     6.63 ms /    15 runs   (    0.44 ms per token,  2261.42 tokens per second)
llama_print_timings: prompt eval time =   594.58 ms /    46 tokens (   12.93 ms per token,    77.37 tokens per second)
llama_print_timings:        eval time =  1203.14 ms /    14 runs   (   85.94 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1828.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“But you don't know that this is not the result of evolution, but was what Ewing had set as his life goal from the beginning, an objective of terror! It was him who turned Initiates into a horrible terrorist state composed of extreme environmentalists and haters of humanity. Ewing was going toward it all along, it was him who transformed Initiates into such a terrorist nation.”



llama_print_timings:        load time =   694.82 ms
llama_print_timings:      sample time =    39.57 ms /    88 runs   (    0.45 ms per token,  2223.74 tokens per second)
llama_print_timings: prompt eval time =   694.77 ms /   112 tokens (    6.20 ms per token,   161.20 tokens per second)
llama_print_timings:        eval time =  7436.45 ms /    87 runs   (   85.48 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  8312.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I only learned later on Evans' true motives,” said Ye Wenjie. “Still, I tried to mend the rift and make the Earth Three Kingdoms Organization a whole again, but the other actions taken by the Inquisition made that impossible.”



llama_print_timings:        load time =   630.87 ms
llama_print_timings:      sample time =    26.19 ms /    58 runs   (    0.45 ms per token,  2214.59 tokens per second)
llama_print_timings: prompt eval time =   630.83 ms /    78 tokens (    8.09 ms per token,   123.65 tokens per second)
llama_print_timings:        eval time =  4984.42 ms /    57 runs   (   87.45 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  5734.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Han said, "Commander, the Disembarkation Army is the core of the Earth Three-Body Organization. Without us, there will be no Earth Three-Body movement!"



llama_print_timings:        load time =   596.32 ms
llama_print_timings:      sample time =    17.79 ms /    40 runs   (    0.44 ms per token,  2248.33 tokens per second)
llama_print_timings: prompt eval time =   596.27 ms /    55 tokens (   10.84 ms per token,    92.24 tokens per second)
llama_print_timings:        eval time =  3371.57 ms /    39 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  4047.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But this is not the reason why you have a monopoly over communication!



llama_print_timings:        load time =   583.19 ms
llama_print_timings:      sample time =     7.83 ms /    17 runs   (    0.46 ms per token,  2169.75 tokens per second)
llama_print_timings: prompt eval time =   583.15 ms /    41 tokens (   14.22 ms per token,    70.31 tokens per second)
llama_print_timings:        eval time =  1354.40 ms /    16 runs   (   84.65 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1974.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"The Second Red Bottom Base is our creation, of course it should be operated by us!"



llama_print_timings:        load time =   584.93 ms
llama_print_timings:      sample time =     9.80 ms /    21 runs   (    0.47 ms per token,  2143.51 tokens per second)
llama_print_timings: prompt eval time =   584.88 ms /    45 tokens (   13.00 ms per token,    76.94 tokens per second)
llama_print_timings:        eval time =  1687.91 ms /    20 runs   (   84.40 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  2316.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the end, it was the Condition that enabled the betrayal of Immolation. You intercepted and tampered with information sent to the organization; what you transmitted was a mere fraction of what had been received, and what had been altered; furthermore, you also passed on large amounts of unauthorized messages through Second Red Base Station.



llama_print_timings:        load time =   665.21 ms
llama_print_timings:      sample time =    34.38 ms /    77 runs   (    0.45 ms per token,  2239.80 tokens per second)
llama_print_timings: prompt eval time =   665.17 ms /    93 tokens (    7.15 ms per token,   139.81 tokens per second)
llama_print_timings:        eval time =  6485.99 ms /    76 runs   (   85.34 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  7309.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The silence descended upon the hall, like a very heavy thing making Wang Meng's scalp tighten. Pan Han didn't reply, his expression became cold as if he were saying: Good, finally something happened.



llama_print_timings:        load time =   630.81 ms
llama_print_timings:      sample time =    21.43 ms /    48 runs   (    0.45 ms per token,  2240.37 tokens per second)
llama_print_timings: prompt eval time =   630.77 ms /    71 tokens (    8.88 ms per token,   112.56 tokens per second)
llama_print_timings:        eval time =  4001.74 ms /    47 runs   (   85.14 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  4730.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“There are ample evidence proving the betrayal of The Dawn Brigade, and Ms. Xin Yufei is one of their chief informants. But despite being part of this criminal organization in her heart, she was a firm believer in Salvationists who was only discovered by you later on.”



llama_print_timings:        load time =   672.58 ms
llama_print_timings:      sample time =    30.21 ms /    68 runs   (    0.44 ms per token,  2251.06 tokens per second)
llama_print_timings: prompt eval time =   672.55 ms /    95 tokens (    7.08 ms per token,   141.25 tokens per second)
llama_print_timings:        eval time =  5683.54 ms /    67 runs   (   84.83 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  6494.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pan Bansu looked around, apparently assessing the situation. His movements were noticed by Ye Wenjie.



llama_print_timings:        load time =   591.65 ms
llama_print_timings:      sample time =    12.22 ms /    25 runs   (    0.49 ms per token,  2045.99 tokens per second)
llama_print_timings: prompt eval time =   591.58 ms /    52 tokens (   11.38 ms per token,    87.90 tokens per second)
llama_print_timings:        eval time =  2059.65 ms /    24 runs   (   85.82 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2707.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You can see that most of the attendees are Savingists, with few Transcenders present. They will surely stand on our side. However, it is very difficult to change you and Mr Evans' minds.”



llama_print_timings:        load time =   678.72 ms
llama_print_timings:      sample time =    21.63 ms /    48 runs   (    0.45 ms per token,  2218.93 tokens per second)
llama_print_timings: prompt eval time =   678.68 ms /    90 tokens (    7.54 ms per token,   132.61 tokens per second)
llama_print_timings:        eval time =  4064.99 ms /    47 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  4844.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The silence once again descended. After a couple of minutes, one of the beauty of the guardians of Yee Wenjie, a slim and beautiful girl charmingly smile, that is very noticeable, which draws many people's attention to her. The girl delicately walked toward Pan Han. Pan Han's face suddenly changed, he put his hand into his coat in front of him but the girl was already moving so fast that nobody had time to understand what happened. She quickly ran over and used a seemingly soft as spring bamboo like arm to turn Pan Han's head by one hundred and eighty degrees, while she also put her hand on his head skillfully and with very little strength and an extremely precise angle of support in order to successfully turn his head. The girl quickly released her hands and it seemed that his head was hotter than the sun. She took out a gun from a dead person's belt, who had been killed by Shin Yuefei. Her hands were moving faster and she successfully turned his head one hundred and eighty degrees a


llama_print_timings:        load time =  1009.83 ms
llama_print_timings:      sample time =   136.86 ms /   305 runs   (    0.45 ms per token,  2228.59 tokens per second)
llama_print_timings: prompt eval time =  1009.78 ms /   274 tokens (    3.69 ms per token,   271.35 tokens per second)
llama_print_timings:        eval time = 27009.35 ms /   304 runs   (   88.85 ms per token,    11.26 tokens per second)
llama_print_timings:       total time = 28661.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Ah, Shao Ming is here too,” said Ye Wenjie as her gaze shifted to Wang Sen. She gave him a warm smile and then turned to the rest of them and said, “This is academician Wang Meng from the Chinese Academy of Sciences. He is my friend.”



llama_print_timings:        load time =   680.02 ms
llama_print_timings:      sample time =    29.40 ms /    65 runs   (    0.45 ms per token,  2210.73 tokens per second)
llama_print_timings: prompt eval time =   679.99 ms /    96 tokens (    7.08 ms per token,   141.18 tokens per second)
llama_print_timings:        eval time =  5488.73 ms /    64 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  6302.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No one looked at Wang Miao, and Wang Sen also did not have the strength to make any gesture. He didn't even hold onto the sleeve of a nearby person to stabilize himself but that man pushed his hand away gently.



llama_print_timings:        load time =   619.29 ms
llama_print_timings:      sample time =    22.98 ms /    52 runs   (    0.44 ms per token,  2263.13 tokens per second)
llama_print_timings: prompt eval time =   619.25 ms /    70 tokens (    8.85 ms per token,   113.04 tokens per second)
llama_print_timings:        eval time =  4421.20 ms /    51 runs   (   86.69 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  5146.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie said, “Come on, continue the story from last time. Let our comrades also listen to it. It's not a waste of time at this moment. We need to review our party’s history during such an important period.”



llama_print_timings:        load time =   623.09 ms
llama_print_timings:      sample time =    25.27 ms /    57 runs   (    0.44 ms per token,  2255.82 tokens per second)
llama_print_timings: prompt eval time =   623.05 ms /    73 tokens (    8.53 ms per token,   117.17 tokens per second)
llama_print_timings:        eval time =  4753.46 ms /    56 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  5491.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Red Bank……not finished?” Wang Sen asked blankly.



llama_print_timings:        load time =   577.43 ms
llama_print_timings:      sample time =     6.64 ms /    15 runs   (    0.44 ms per token,  2260.06 tokens per second)
llama_print_timings: prompt eval time =   577.39 ms /    46 tokens (   12.55 ms per token,    79.67 tokens per second)
llama_print_timings:        eval time =  1213.05 ms /    14 runs   (   86.65 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1820.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving the model of the Three Body Problem before her, Ye Wenjie took slow steps and watched the silver ball that was floating in the air. The setting sun shining through a broken window illuminated the model and irregularly projected the flames to the formidable warlord's body.



llama_print_timings:        load time =   649.99 ms
llama_print_timings:      sample time =    30.16 ms /    67 runs   (    0.45 ms per token,  2221.49 tokens per second)
llama_print_timings: prompt eval time =   649.94 ms /    82 tokens (    7.93 ms per token,   126.16 tokens per second)
llama_print_timings:        eval time =  5724.94 ms /    66 runs   (   86.74 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  6515.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's just the beginning, ” said Ye Wenjie with a smile.



llama_print_timings:        load time =   585.92 ms
llama_print_timings:      sample time =     9.23 ms /    20 runs   (    0.46 ms per token,  2167.08 tokens per second)
llama_print_timings: prompt eval time =   585.89 ms /    44 tokens (   13.32 ms per token,    75.10 tokens per second)
llama_print_timings:        eval time =  1627.64 ms /    19 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2254.58 ms


translated 66.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After entering the Red Bank base, Ye Wenjie never thought she would be able to go outside. Once she learned about the true purpose of the Red Bank project (a piece of highly classified information that very few at the base's lower levels knew), she also severed all mental contact with the outside world and dedicated herself entirely to her work. From then on, Ye Wenjie began to immerse herself more deeply into the technical aspect of Red Bank, assuming a larger number of research assignments that were transferred to her due to her status as an engineer. Yang Weining's trust in Ye Wenjie remained vexed, but he was still willing to hand her important research assignments — on behalf of Ye Wenjie's own achievements and publications, which she had no right to claim as a result; at the base, only Chief Commander Lei Zhicheng had graduated from a university with a major in astrophysics, and was an outstanding example of political officers who were also highly skilled professionals. In this w


llama_print_timings:        load time =   849.22 ms
llama_print_timings:      sample time =   117.87 ms /   262 runs   (    0.45 ms per token,  2222.75 tokens per second)
llama_print_timings: prompt eval time =   849.19 ms /   207 tokens (    4.10 ms per token,   243.76 tokens per second)
llama_print_timings:        eval time = 23068.11 ms /   261 runs   (   88.38 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 24462.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The original reason for $Lei Wenjie's entry into Red Bank Base was the paper she published in the Journal of Astronomy, which tried to establish a mathematical model for describing the Sun. After all, compared to the Earth, the Sun is simply made up of two simple elements - hydrogen and helium - and its physical process may seem complex at first glance, but it is actually very simple because all that happens is that hydrogen undergoes fusion into helium. So there was hope that a mathematical model could be used to more accurately describe the Sun. The original paper was quite basic, but Yang Weining and Lei Zhihe realized from it that there might be a solution to solving the technical problem in Red Bank Monitoring System.



llama_print_timings:        load time =   783.75 ms
llama_print_timings:      sample time =    71.36 ms /   159 runs   (    0.45 ms per token,  2228.08 tokens per second)
llama_print_timings: prompt eval time =   783.72 ms /   156 tokens (    5.02 ms per token,   199.05 tokens per second)
llama_print_timings:        eval time = 13564.57 ms /   158 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time = 14674.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The problem of solar interference has been a long-standing one for the surveillance operations of Lingshan. This term was borrowed from communication satellite technology, which occurs when the ground station points its antenna at the satellite while the sun, Earth and the satellite are on the same line. The electromagnetic emission from the sun is so strong that it interferes with the microwave signals received by the ground station at that time. This problem couldn't be solved until the 21st century. Lingshan receives solar interference similar to this, but different from communication satellites in that the source of interference is located between the transmitter and receiver, while the satellite is located on Earth and the receiving antenna is shared by surveillance and transmitting systems. This makes solar interference a more serious issue for Lingshan than it was originally designed to be.



llama_print_timings:        load time =   843.19 ms
llama_print_timings:      sample time =    85.00 ms /   189 runs   (    0.45 ms per token,  2223.66 tokens per second)
llama_print_timings: prompt eval time =   843.14 ms /   205 tokens (    4.11 ms per token,   243.14 tokens per second)
llama_print_timings:        eval time = 16619.21 ms /   188 runs   (   88.40 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 17851.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining and Lei Zhicheng had a simple idea: to understand the spectrum regularities and characteristics of the electromagnetic radiation emitted by the sun, and then filter it digitally in order to exclude the interference. Both were experts in technology, which was a rare achievement during an era when managers were in command while engineers played a supporting role. However, Yang Weining is not a specialist in astrophysics, while Lei Zhicheng has gone down a career path that lies more on managerial duties than technical proficiency. In fact, the stability of solar electromagnetic radiation is confined to the visible light range and nearby ultraviolet or infrared bands. Yue Wenjie first made it clear in her first report: when there are intense activities such as sunspots, coronal mass ejections, or solar flares, interference elimination cannot be achieved. Therefore, she limited the research object to the electromagnetic radiation of the sun during its normal activity on the red


llama_print_timings:        load time =   876.66 ms
llama_print_timings:      sample time =   100.83 ms /   225 runs   (    0.45 ms per token,  2231.39 tokens per second)
llama_print_timings: prompt eval time =   876.61 ms /   212 tokens (    4.13 ms per token,   241.84 tokens per second)
llama_print_timings:        eval time = 19849.18 ms /   224 runs   (   88.61 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 21193.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The research conditions in the base were still good, with a range of foreign materials that could be requested according to the topic of research. There was also a very timely supply of foreign academic journals from Europe and North America. This was not an easy task in those days with Ye Wenjie's military connection with two research units for solar studies at the Chinese Academy of Sciences, which she would receive through faxes with their real-time observational data. Eighteen years old, a girl's novel.



llama_print_timings:        load time =   682.16 ms
llama_print_timings:      sample time =    48.74 ms /   109 runs   (    0.45 ms per token,  2236.26 tokens per second)
llama_print_timings: prompt eval time =   682.13 ms /   103 tokens (    6.62 ms per token,   151.00 tokens per second)
llama_print_timings:        eval time =  9253.86 ms /   108 runs   (   85.68 ms per token,    11.67 tokens per second)
llama_print_timings:       total time = 10159.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After six months of research, Ye Wenjie found no sign of success. She soon discovered that in the frequency range observed by Red Bank, the radiation from the Sun fluctuates wildly and mysteriously. Through analyzing large amounts of observational data, she discovered a baffling fact: sometimes during the mutation of the specific frequency range mentioned above, the surface activity on the Sun remains calm, with thousands of observations confirming this. This made her puzzled. Radiation in short-wave and microwave frequencies is not able to penetrate through几十万公里 of the Sun's surface layer from the core of the Sun, only the sun's surface activity can produce such radiation when mutation occurs. If there are no disturbances on the Sun, what is causing the specific frequency range mutation? This made her more and more curious about it.



llama_print_timings:        load time =   826.04 ms
llama_print_timings:      sample time =    82.33 ms /   182 runs   (    0.45 ms per token,  2210.59 tokens per second)
llama_print_timings: prompt eval time =   825.99 ms /   184 tokens (    4.49 ms per token,   222.76 tokens per second)
llama_print_timings:        eval time = 15760.96 ms /   181 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 16970.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have finally reached the end of your research. You decided to give up and you acknowledged it in your final report. This matter should be explained well. The military has commissioned a few institutes at the Chinese Academy of Sciences and universities to conduct similar studies, but all these studies ended in failure. Yang Wenning simply wanted to test his talents again using Ye Wenjie's talents. As for Mr. Lei Zhicheng, his true intention is very simple: he just wants Ye Wenjie's paper. This research has a strong theoretical aspect and can better show his level and achievements. As the craze about the social movement gradually subsides, the requirements for cadres have also changed somewhat, and people like him with political maturity and academic accomplishment are rare, of course they would have an unlimited future. Whether or not the Daying issue can be solved is not his most concerned matter. The Night Walker novel



llama_print_timings:        load time =   847.99 ms
llama_print_timings:      sample time =    90.11 ms /   202 runs   (    0.45 ms per token,  2241.83 tokens per second)
llama_print_timings: prompt eval time =   847.96 ms /   197 tokens (    4.30 ms per token,   232.32 tokens per second)
llama_print_timings:        eval time = 17605.55 ms /   201 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 18870.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But Ye Wenjie eventually didn't submit the report, and she thought that if the research ended, then the base would stop collecting data and subscribing to foreign periodicals on this topic, so she wouldn't be able to access such rich materials in astronomy. So in name, she still went on with her work, but in fact, she concentrated on her own solar math model.



llama_print_timings:        load time =   676.10 ms
llama_print_timings:      sample time =    38.61 ms /    86 runs   (    0.45 ms per token,  2227.58 tokens per second)
llama_print_timings: prompt eval time =   676.06 ms /   104 tokens (    6.50 ms per token,   153.83 tokens per second)
llama_print_timings:        eval time =  7223.09 ms /    85 runs   (   84.98 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  8078.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   686.69 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2270.15 tokens per second)
llama_print_timings: prompt eval time =   686.66 ms /   112 tokens (    6.13 ms per token,   163.11 tokens per second)
llama_print_timings:        eval time =    82.60 ms /     1 runs   (   82.60 ms per token,    12.11 tokens per second)
llama_print_timings:       total time =   772.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the short news "New Emission Source in the Solar System" published last week, Wilson Mountain Observatory's Harry Biddix博士 announced a set of data about two radiometers he detected emissions from Jupiter. The emission was caused by planetary rotation and occurred for 81 seconds on June 12th and 76 seconds on July 2nd respectively. This set of data recorded the frequency range and other parameters during the radiation bursts. Biddix described changes in Big Red Spot, the Great Red Spot on Jupiter's surface, during the radiometers. The Jovian radiometers have aroused great interest among astronomy researchers. In this issue, G. McKenzie wrote an article about the radiometers and suggested it was a sign of hydrogen fission starting inside Jupiter; in the next issue we will publish a paper by Ishiyama Sanda which explains these radiometers as the result of the motion of metallic hydrogen plates inside Jupiter.



llama_print_timings:        load time =   904.39 ms
llama_print_timings:      sample time =    98.69 ms /   219 runs   (    0.45 ms per token,  2219.07 tokens per second)
llama_print_timings: prompt eval time =   904.34 ms /   233 tokens (    3.88 ms per token,   257.65 tokens per second)
llama_print_timings:        eval time = 19106.22 ms /   218 runs   (   87.64 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 20472.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lei Wenjie clearly remembers these dates and times. At that time, the Hongba station was subjected to strong solar interference. She checked the log files and confirmed her memory; however, the delay caused by solar interference arrived at Earth sixteen minutes and forty-two seconds later than it did for Jupiter's electromagnetic radiation. This critical sixteen minutes and forty-two seconds! Lei Wenjie suppressed her rapid heartbeat, requested that relevant personnel from the data room contact with the National Observatory, which obtained those two time coordinates of Jupiter and Earth. She drew a huge triangle on the blackboard, with three corners being the sun, the Earth, and Jupiter. She marked down the distance between each corner on the three sides and at the Earth's vertex she marked in the arrival times of both dates. The time needed to travel from Jupiter to the Earth was easy to calculate using the time spent for electromagnetic radiation to travel from Jupiter to the Earth 


llama_print_timings:        load time =   843.74 ms
llama_print_timings:      sample time =    99.63 ms /   222 runs   (    0.45 ms per token,  2228.22 tokens per second)
llama_print_timings: prompt eval time =   843.70 ms /   192 tokens (    4.39 ms per token,   227.57 tokens per second)
llama_print_timings:        eval time = 19274.75 ms /   221 runs   (   87.22 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 20588.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$She then calculated the time it takes for electromagnetic radiation to travel from Jupiter to the Sun, and back again. The two times matched exactly sixteen minutes forty-two seconds! Using a mathematical model she had created earlier of the sun's structure, Ye Wenjie searched for clues to find out some hidden truths about solar radiation. Her gaze shifted quickly to something called "energy mirror" in the sun's radiation layer. From the core region of solar fission, high energy gamma rays are released and then absorbed by the radiation zone before being transmitted again through numerous long processes, each taking thousands of years (a photon leaving the Sun may take a thousand years to reach Earth). These processes have been well-studied in solar research. Ye Wenjie's mathematical model produced a new result: A clear boundary exists between different frequencies of radiation during the conversion from one form to another, and these boundaries appear in the solar radiation zone. Cal


llama_print_timings:        load time =  1029.59 ms
llama_print_timings:      sample time =   107.56 ms /   240 runs   (    0.45 ms per token,  2231.35 tokens per second)
llama_print_timings: prompt eval time =  1029.55 ms /   278 tokens (    3.70 ms per token,   270.02 tokens per second)
llama_print_timings:        eval time = 21200.88 ms /   239 runs   (   88.71 ms per token,    11.27 tokens per second)
llama_print_timings:       total time = 22736.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lei Wenjie began to carefully study this floating and indeterminate layer of thin membrane that was suspended over the sun's plasma sea, and she discovered that it had many amazing properties, the most remarkable of which was its “amplification reflection” attribute. This seemingly connected to the mystery of the sun's electromagnetic radiation but so bizarre as to be incredible, even Lei Wenjie herself could hardly believe it, and more likely is that the illusion arising from some overly complicated calculations.



llama_print_timings:        load time =   739.38 ms
llama_print_timings:      sample time =    55.22 ms /   120 runs   (    0.46 ms per token,  2173.01 tokens per second)
llama_print_timings: prompt eval time =   739.34 ms /   129 tokens (    5.73 ms per token,   174.48 tokens per second)
llama_print_timings:        eval time = 10330.32 ms /   119 runs   (   86.81 ms per token,    11.52 tokens per second)
llama_print_timings:       total time = 11327.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As of now, Ye Wenjie has preliminarily confirmed her hypothesis about solar energy mirrors with magnification of low-frequency electromagnetic radiation. The previous mysterious shifts in narrow spectrum aren't really perturbations on the sun's surface, but rather results of enlarged emitted radiation from the cosmos. So no anomaly can be observed on the surface of the sun.



llama_print_timings:        load time =   666.89 ms
llama_print_timings:      sample time =    39.65 ms /    87 runs   (    0.46 ms per token,  2194.25 tokens per second)
llama_print_timings: prompt eval time =   666.85 ms /   103 tokens (    6.47 ms per token,   154.46 tokens per second)
llama_print_timings:        eval time =  7366.27 ms /    86 runs   (   85.65 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  8216.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's highly likely that the sun received electromagnetic radiation from Jupiter and emitted it back, but with an increase in intensity of nearly a billionfold! The Earth received these two types of radiation respectively within a 16-minute 42-second time difference.



llama_print_timings:        load time =   618.38 ms
llama_print_timings:      sample time =    27.15 ms /    60 runs   (    0.45 ms per token,  2209.94 tokens per second)
llama_print_timings: prompt eval time =   618.34 ms /    70 tokens (    8.83 ms per token,   113.21 tokens per second)
llama_print_timings:        eval time =  5004.69 ms /    59 runs   (   84.83 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  5752.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The sun is a radio amplifier!



llama_print_timings:        load time =   570.34 ms
llama_print_timings:      sample time =     4.26 ms /     9 runs   (    0.47 ms per token,  2113.17 tokens per second)
llama_print_timings: prompt eval time =   570.30 ms /    36 tokens (   15.84 ms per token,    63.12 tokens per second)
llama_print_timings:        eval time =   692.15 ms /     8 runs   (   86.52 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  1282.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The question here is clear: why does the Sun only amplify a fraction of the electromagnetic radiation it receives, including radio waves emitted by Earth? The answer is self-evident: in addition to choosing a reflection frequency for its energy mirror, the Sun shields us from electromagnetic radiation originating from space with its convective atmosphere. Its surface is constantly bubbling and the convective layer, located just above the radiation layer, forms the most outer liquid layer of the Sun. The radio waves have to pass through the convective layer before reaching the energy mirror on their way to being amplified and then reflected back to us on Earth. This requires that the incoming radio waves have a power greater than a threshold value, which is far lower than the threshold value for most radio emissions from Earth! But when it comes to electromagnetic radiation, Jupiter's emission exceeds—and even the maximum power emitted by red jets goes beyond this value.



llama_print_timings:        load time =   819.79 ms
llama_print_timings:      sample time =    93.73 ms /   207 runs   (    0.45 ms per token,  2208.47 tokens per second)
llama_print_timings: prompt eval time =   819.74 ms /   179 tokens (    4.58 ms per token,   218.36 tokens per second)
llama_print_timings:        eval time = 18044.50 ms /   206 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 19306.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, the solar interference problem remained unresolved. But a fascinating possibility arose: Human beings could use the sun as a super-antenna to transmit electromagnetic waves towards space in the star level power. The electricity was much stronger than the one that is used on Earth and by a factor of billions times.



llama_print_timings:        load time =   676.24 ms
llama_print_timings:      sample time =    31.63 ms /    71 runs   (    0.45 ms per token,  2244.78 tokens per second)
llama_print_timings: prompt eval time =   676.21 ms /    87 tokens (    7.77 ms per token,   128.66 tokens per second)
llama_print_timings:        eval time =  5940.11 ms /    70 runs   (   84.86 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6764.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Earth civilization may be able to launch a 2nd-level civilization launch!



llama_print_timings:        load time =   577.15 ms
llama_print_timings:      sample time =     9.07 ms /    20 runs   (    0.45 ms per token,  2206.04 tokens per second)
llama_print_timings: prompt eval time =   577.11 ms /    42 tokens (   13.74 ms per token,    72.78 tokens per second)
llama_print_timings:        eval time =  1637.34 ms /    19 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2255.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the next step, we need to compare the waveforms of the two waves of Jupiter's electromagnetic radiation and the interference caused by the solar eclipse. If they match up, this conjecture would be further confirmed.



llama_print_timings:        load time =   623.70 ms
llama_print_timings:      sample time =    21.92 ms /    49 runs   (    0.45 ms per token,  2235.20 tokens per second)
llama_print_timings: prompt eval time =   623.65 ms /    68 tokens (    9.17 ms per token,   109.03 tokens per second)
llama_print_timings:        eval time =  4055.19 ms /    48 runs   (   84.48 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  4781.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie made a request to her supervisor, wanting to contact Harry Beidson to obtain the recordings of the electromagnetic radiation on Jupiter during those two days. It was not an easy matter - the channels were difficult to find, and there were numerous formalities to be handled without any suspicion that she might be getting some favorable treatment by her supervisor.



llama_print_timings:        load time =   653.49 ms
llama_print_timings:      sample time =    36.85 ms /    82 runs   (    0.45 ms per token,  2225.48 tokens per second)
llama_print_timings: prompt eval time =   653.45 ms /    94 tokens (    6.95 ms per token,   143.85 tokens per second)
llama_print_timings:        eval time =  6929.71 ms /    81 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  7755.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Another way to confirm this is that the Red Flag launches a signal direct into the sun with more power than the threshold.



llama_print_timings:        load time =   597.99 ms
llama_print_timings:      sample time =    12.89 ms /    27 runs   (    0.48 ms per token,  2094.97 tokens per second)
llama_print_timings: prompt eval time =   597.96 ms /    55 tokens (   10.87 ms per token,    91.98 tokens per second)
llama_print_timings:        eval time =  2182.99 ms /    26 runs   (   83.96 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  2841.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving the idea of her own to one side, Ye Wenjie went to see the head of the department and proposed this request. But she didn't dare to express it directly as that would be too absurd and likely get rejected, she simply said that this was a trial of using Sun research as a radio detection radar for the sun, analyzing some information reflected by solar radiation via receiving its echoes. Rei Zhiqing and Yang Huining both had deep technological backgrounds, so it would be difficult to deceive them; however, Ye Wenjie's proposal of this trial was indeed based on a precedent in western solar research. In fact, this experiment was simpler than the radar detection of planets undergoing at that very moment.



llama_print_timings:        load time =   760.80 ms
llama_print_timings:      sample time =    72.36 ms /   163 runs   (    0.44 ms per token,  2252.78 tokens per second)
llama_print_timings: prompt eval time =   760.76 ms /   149 tokens (    5.11 ms per token,   195.86 tokens per second)
llama_print_timings:        eval time = 14066.30 ms /   162 runs   (   86.83 ms per token,    11.52 tokens per second)
llama_print_timings:       total time = 15168.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Ye Wenjie, you are becoming more and more outrageous. Your research paper, do not need to make such a big fuss about it.” Lei Zhi-cheng shaking his head said.



llama_print_timings:        load time =   633.10 ms
llama_print_timings:      sample time =    22.94 ms /    49 runs   (    0.47 ms per token,  2135.63 tokens per second)
llama_print_timings: prompt eval time =   633.06 ms /    67 tokens (    9.45 ms per token,   105.83 tokens per second)
llama_print_timings:        eval time =  4083.15 ms /    48 runs   (   85.07 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  4824.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Commander, there may be a major discovery. Is it necessary to conduct experiments?” Ye Wenjie begged desperately.



llama_print_timings:        load time =   645.54 ms
llama_print_timings:      sample time =    13.37 ms /    30 runs   (    0.45 ms per token,  2243.33 tokens per second)
llama_print_timings: prompt eval time =   645.50 ms /    57 tokens (   11.32 ms per token,    88.30 tokens per second)
llama_print_timings:        eval time =  2502.65 ms /    29 runs   (   86.30 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3209.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Wenning said, “Lei Bingbing, why don’t we just try it once? The operation seems to have no major difficulties. Returned signals back in launch will be …”



llama_print_timings:        load time =   602.14 ms
llama_print_timings:      sample time =    20.80 ms /    44 runs   (    0.47 ms per token,  2115.49 tokens per second)
llama_print_timings: prompt eval time =   602.10 ms /    61 tokens (    9.87 ms per token,   101.31 tokens per second)
llama_print_timings:        eval time =  3683.23 ms /    43 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  4382.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It should take about fifteen minutes, " said Lai Zhicheng.



llama_print_timings:        load time =   574.31 ms
llama_print_timings:      sample time =     7.64 ms /    17 runs   (    0.45 ms per token,  2224.84 tokens per second)
llama_print_timings: prompt eval time =   574.26 ms /    40 tokens (   14.36 ms per token,    69.65 tokens per second)
llama_print_timings:        eval time =  1383.33 ms /    16 runs   (   86.46 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  1991.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This is the exact time when Red Bank will be ready to switch into receiving state."



llama_print_timings:        load time =   573.96 ms
llama_print_timings:      sample time =     8.47 ms /    19 runs   (    0.45 ms per token,  2242.15 tokens per second)
llama_print_timings: prompt eval time =   573.93 ms /    43 tokens (   13.35 ms per token,    74.92 tokens per second)
llama_print_timings:        eval time =  1531.47 ms /    18 runs   (   85.08 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2144.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ray Zhicheng shook his head again. “I know that there is no problem with the technology and workload, but you …… Yang General, don't you think the meaning of the political test would be strong electric waves towards the Red Sun?”



llama_print_timings:        load time =   653.45 ms
llama_print_timings:      sample time =    24.18 ms /    54 runs   (    0.45 ms per token,  2233.62 tokens per second)
llama_print_timings: prompt eval time =   653.40 ms /    86 tokens (    7.60 ms per token,   131.62 tokens per second)
llama_print_timings:        eval time =  4496.27 ms /    53 runs   (   84.84 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  5262.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Weining and Ye Wenjie were stunned for a moment. They werent surprised by this reason, instead, they felt sorry that they didnt think of it sooner. In those days, everything was interpreted in a political manner to an extremely absurd extent. Ye Wenjies research report had to be carefully reviewed and the words concerning solar technology had to be repeatedly modified, like ‘solar spots’ could not appear in the text. No matter how positive the experiments could be explained, just one negative explanation would result in catastrophe. So this reason was definitely unbeatable.



llama_print_timings:        load time =   773.59 ms
llama_print_timings:      sample time =    57.68 ms /   128 runs   (    0.45 ms per token,  2219.10 tokens per second)
llama_print_timings: prompt eval time =   773.55 ms /   164 tokens (    4.72 ms per token,   212.01 tokens per second)
llama_print_timings:        eval time = 10971.27 ms /   127 runs   (   86.39 ms per token,    11.58 tokens per second)
llama_print_timings:       total time = 12016.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie didn't give up. In fact, it was easy to do this thing as long as the risks were not too great. The launchers of Red Bank Launch System were ultrahigh-power devices, which used domestically produced components during the Cultural Revolution. Due to the poor quality, the failure rate was very high and had to be comprehensive inspected every 15 launches in order to reduce the damage. After the inspection, there must also be routine test running before participating in launching, and other parameters are relatively arbitrary.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

During a shift, Ye Wenjie was assigned to perform routine testing after the maintenance. Due to reduced operational steps, there were only five people present in the field, excluding Ye Qujiaying. Three of them were operation staff who knew little about equipment principles and another technician and engineer had been working for two consecutive days, exhausted and unfocused. Firstly, Ye Wenjie set the launch power to just exceed the theoretical threshold above which solar beams will reflect in order to test the mechanical properties of the antenna. The antenna was aimed at the sun, which was hanging up on the western sky. The content being transmitted was still as regular as before.



llama_print_timings:        load time =   810.20 ms
llama_print_timings:      sample time =    66.30 ms /   148 runs   (    0.45 ms per token,  2232.21 tokens per second)
llama_print_timings: prompt eval time =   810.16 ms /   179 tokens (    4.53 ms per token,   220.94 tokens per second)
llama_print_timings:        eval time = 12656.35 ms /   147 runs   (   86.10 ms per token,    11.61 tokens per second)
llama_print_timings:       total time = 13780.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$This was a sunny afternoon in the fall of 1971. Whenever she later recalled this moment, there was nothing particular about it, only anxiety and impatience for launching to finish before any of her colleagues could find out. It was irregular to carry out launch experiments with maximum power using defective components, and although she had thought through a few possible excuses in case, the heating from optical systems made Lei Wenjie uneasy. By the time the sun set in the west, Ye Wenghao had to manually track the satellite as it slowly revolved toward the setting sun. When the launch was completed and the red light lit up, she felt completely wet with sweat. Turning her head, she saw three operators sitting on their desks, following the instructions in the manuals to turn off each device one after another. The engineer sipped from his water bottle while the technician was fast asleep in a long chair. Whatever historians and writers may later depict this scene, it was really just pla


llama_print_timings:        load time =   933.69 ms
llama_print_timings:      sample time =   104.49 ms /   234 runs   (    0.45 ms per token,  2239.36 tokens per second)
llama_print_timings: prompt eval time =   933.65 ms /   245 tokens (    3.81 ms per token,   262.41 tokens per second)
llama_print_timings:        eval time = 20652.95 ms /   233 runs   (   88.64 ms per token,    11.28 tokens per second)
llama_print_timings:       total time = 22083.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Once the launch was completed, Ye Wenjie rushed out of the control room and ran into Yang Weining's office. Breathing heavily, she said, "Quickly, let the base radio station tune to 12000 Megahertz!"



llama_print_timings:        load time =   647.29 ms
llama_print_timings:      sample time =    27.11 ms /    61 runs   (    0.44 ms per token,  2250.18 tokens per second)
llama_print_timings: prompt eval time =   647.26 ms /    73 tokens (    8.87 ms per token,   112.78 tokens per second)
llama_print_timings:        eval time =  5091.73 ms /    60 runs   (   84.86 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  5865.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What are you doing?” Yang Zongjian asked, astonished by the messy hair of Ye Wenjie who had been glued to her face with sweat. With an extremely sensitive wireless communication system in place at Red Banks, the regular military radio stations that served as a means of contact with the outside world were little more than toys.



llama_print_timings:        load time =   629.63 ms
llama_print_timings:      sample time =    35.11 ms /    76 runs   (    0.46 ms per token,  2164.69 tokens per second)
llama_print_timings: prompt eval time =   629.58 ms /    76 tokens (    8.28 ms per token,   120.71 tokens per second)
llama_print_timings:        eval time =  6312.48 ms /    75 runs   (   84.17 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  7107.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$“Perhaps we can receive something, but Red Bank's receiving system doesn't have enough time to switch to receiving state!” Yue Wenjie said. Normally, the Red Bank reception system should preheat and switch within 10 minutes, but now the reception system is being repaired as many modules have been disassembled without reassembly. Yang Weining looked at Ye Wenjie for a few seconds before picking up the telephone and instructing the communication room to do as Ye Wenjie had said. “The precision of that radio, only can receive signals from the moon.”



llama_print_timings:        load time =   739.34 ms
llama_print_timings:      sample time =    59.02 ms /   133 runs   (    0.44 ms per token,  2253.66 tokens per second)
llama_print_timings: prompt eval time =   739.30 ms /   135 tokens (    5.48 ms per token,   182.61 tokens per second)
llama_print_timings:        eval time = 11411.35 ms /   132 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 12425.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The signal comes from the sun,” said Ye Wenjie. Outside, the edge of the sun was already nearing the top of the mountain in a blood-red color.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

"Did you send a signal to the sun with Redditch system?" Yang Weining asked nervously.



llama_print_timings:        load time =   582.25 ms
llama_print_timings:      sample time =    10.40 ms /    23 runs   (    0.45 ms per token,  2211.11 tokens per second)
llama_print_timings: prompt eval time =   582.21 ms /    49 tokens (   11.88 ms per token,    84.16 tokens per second)
llama_print_timings:        eval time =  1826.49 ms /    22 runs   (   83.02 ms per token,    12.04 tokens per second)
llama_print_timings:       total time =  2457.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie nodded.



llama_print_timings:        load time =   580.22 ms
llama_print_timings:      sample time =     4.44 ms /    10 runs   (    0.44 ms per token,  2253.27 tokens per second)
llama_print_timings: prompt eval time =   580.18 ms /    36 tokens (   16.12 ms per token,    62.05 tokens per second)
llama_print_timings:        eval time =   769.21 ms /     9 runs   (   85.47 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  1369.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Don't tell anyone about this, absolutely not. It's just a mere coincidence," Yang Weining looked back at the door warily and said.



llama_print_timings:        load time =   603.49 ms
llama_print_timings:      sample time =    16.24 ms /    35 runs   (    0.46 ms per token,  2155.44 tokens per second)
llama_print_timings: prompt eval time =   603.34 ms /    57 tokens (   10.58 ms per token,    94.47 tokens per second)
llama_print_timings:        eval time =  2908.03 ms /    34 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3588.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie nodded.



llama_print_timings:        load time =   580.70 ms
llama_print_timings:      sample time =     4.51 ms /    10 runs   (    0.45 ms per token,  2215.82 tokens per second)
llama_print_timings: prompt eval time =   580.67 ms /    37 tokens (   15.69 ms per token,    63.72 tokens per second)
llama_print_timings:        eval time =   802.66 ms /     9 runs   (   89.19 ms per token,    11.21 tokens per second)
llama_print_timings:       total time =  1404.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What's the point, the echo is extremely weak and far beyond the reach of regular radio receivers."



llama_print_timings:        load time =   587.57 ms
llama_print_timings:      sample time =    12.23 ms /    24 runs   (    0.51 ms per token,  1962.55 tokens per second)
llama_print_timings: prompt eval time =   587.51 ms /    51 tokens (   11.52 ms per token,    86.81 tokens per second)
llama_print_timings:        eval time =  1949.61 ms /    23 runs   (   84.77 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  2593.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"No, if my guess is correct. The strongest of the echoes we received are … unimaginable. Just by transmitting at a power greater than our threshold value, the Sun could amplify an electromagnetic wave to a billionfold!



llama_print_timings:        load time =   621.06 ms
llama_print_timings:      sample time =    24.94 ms /    54 runs   (    0.46 ms per token,  2164.85 tokens per second)
llama_print_timings: prompt eval time =   621.02 ms /    73 tokens (    8.51 ms per token,   117.55 tokens per second)
llama_print_timings:        eval time =  4578.75 ms /    53 runs   (   86.39 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  5317.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He picked up the phone and called the communications room briefly. He simply asked two questions.



llama_print_timings:        load time =   692.68 ms
llama_print_timings:      sample time =     8.80 ms /    20 runs   (    0.44 ms per token,  2272.21 tokens per second)
llama_print_timings: prompt eval time =   692.64 ms /   113 tokens (    6.13 ms per token,   163.14 tokens per second)
llama_print_timings:        eval time =  1633.82 ms /    19 runs   (   85.99 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  2366.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yang Wenning put down the phone and said, "Nothing arrived."



llama_print_timings:        load time =   593.98 ms
llama_print_timings:      sample time =     7.62 ms /    17 runs   (    0.45 ms per token,  2231.26 tokens per second)
llama_print_timings: prompt eval time =   593.93 ms /    42 tokens (   14.14 ms per token,    70.72 tokens per second)
llama_print_timings:        eval time =  1350.88 ms /    16 runs   (   84.43 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1979.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie breathed a sigh of relief, and nodded after a long while.



llama_print_timings:        load time =   593.29 ms
llama_print_timings:      sample time =    10.73 ms /    23 runs   (    0.47 ms per token,  2142.72 tokens per second)
llama_print_timings: prompt eval time =   593.25 ms /    43 tokens (   13.80 ms per token,    72.48 tokens per second)
llama_print_timings:        eval time =  1903.69 ms /    22 runs   (   86.53 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  2546.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That American astronomer has written back.” Yang Weining took out a thick envelope and handed it to Ye Wenjie, which was sealed with numerous customs stamps. Ye Wenjie opened the envelope eagerly and saw first is the letter from Harold Beadson, saying that he had not expected to have research partners in China in the field of planetary electromagnetics, hoping for more contacts and cooperation. He sent two thick stacks of paper, where the recorded were two waves, which apparently are printed from long-strip signal recording papers. To match them up, Ye Wenjie put the papers on the ground into two rows halfway through, but she had no hope when reaching the middle, as she was too familiar with those two interruptions caused by Jupiter’s transits.



llama_print_timings:        load time =   867.48 ms
llama_print_timings:      sample time =    76.78 ms /   174 runs   (    0.44 ms per token,  2266.33 tokens per second)
llama_print_timings: prompt eval time =   867.43 ms /   216 tokens (    4.02 ms per token,   249.01 tokens per second)
llama_print_timings:        eval time = 15065.57 ms /   173 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 16292.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaf Vance slowly gathered the two rows of copies on the ground. Yang Weining crouched down to help her, and when he handed the pile of papers to his inner-deep loved girl, he saw that she shook her head with a smile that was very sad and caused his heart to tremble.



llama_print_timings:        load time =   656.29 ms
llama_print_timings:      sample time =    29.11 ms /    66 runs   (    0.44 ms per token,  2267.03 tokens per second)
llama_print_timings: prompt eval time =   656.26 ms /    93 tokens (    7.06 ms per token,   141.71 tokens per second)
llama_print_timings:        eval time =  5515.52 ms /    65 runs   (   84.85 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6307.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How?” he asked quietly, not realizing that he had never spoken so softly to her before.



llama_print_timings:        load time =   586.76 ms
llama_print_timings:      sample time =    10.27 ms /    23 runs   (    0.45 ms per token,  2239.97 tokens per second)
llama_print_timings: prompt eval time =   586.72 ms /    52 tokens (   11.28 ms per token,    88.63 tokens per second)
llama_print_timings:        eval time =  1890.77 ms /    22 runs   (   85.94 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2524.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Nothing, just a dream.



llama_print_timings:        load time =   697.53 ms
llama_print_timings:      sample time =     3.36 ms /     7 runs   (    0.48 ms per token,  2084.57 tokens per second)
llama_print_timings: prompt eval time =   697.49 ms /   118 tokens (    5.91 ms per token,   169.18 tokens per second)
llama_print_timings:        eval time =   531.83 ms /     6 runs   (   88.64 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  1246.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this time, the sun has set and the Great Xing'an Mountain looks like a dark place. Like Ye Wenjie's life. In this grayness, dreams particularly appear magnificent and shimmering. But dreams always end quickly, just like the sun will never rise again with new hope. At that moment, Ye Wenjie suddenly saw her remaining life, which is only boundless grayness. She smiled while holding back tears, keeping on eating cold buns.



llama_print_timings:        load time =   700.60 ms
llama_print_timings:      sample time =    47.36 ms /   106 runs   (    0.45 ms per token,  2238.03 tokens per second)
llama_print_timings: prompt eval time =   700.56 ms /   120 tokens (    5.84 ms per token,   171.29 tokens per second)
llama_print_timings:        eval time =  8843.02 ms /   105 runs   (   84.22 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  9767.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie didn't know, at this time, the first voice that could be heard by human beings in space, which was sent from Earth and moving towards the universe at the speed of light, has crossed the orbit of Jupiter.



llama_print_timings:        load time =   656.51 ms
llama_print_timings:      sample time =    24.33 ms /    54 runs   (    0.45 ms per token,  2219.30 tokens per second)
llama_print_timings: prompt eval time =   656.47 ms /    87 tokens (    7.55 ms per token,   132.53 tokens per second)
llama_print_timings:        eval time =  4526.05 ms /    53 runs   (   85.40 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  5296.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At this time, the Sun is the brightest star in the Milky Way Galaxy at a frequency of 12000 MHz.



llama_print_timings:        load time =   615.23 ms
llama_print_timings:      sample time =    14.23 ms /    32 runs   (    0.44 ms per token,  2247.98 tokens per second)
llama_print_timings: prompt eval time =   615.21 ms /    52 tokens (   11.83 ms per token,    84.52 tokens per second)
llama_print_timings:        eval time =  2694.30 ms /    31 runs   (   86.91 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  3376.09 ms


translated 53.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The next eight years were the calmest in Ye Wenjie's life. The shock caused by "the Cultural Revolution" gradually subside, and she was finally able to relax a little bit her own mental condition. "Red Bottom" project has completed experimentation and trial operation, which are growing more regular and stable. Fewer and fewer technical problems need to be solved as time goes on, while work and life become more orderly.



llama_print_timings:        load time =   659.40 ms
llama_print_timings:      sample time =    42.13 ms /    93 runs   (    0.45 ms per token,  2207.45 tokens per second)
llama_print_timings: prompt eval time =   659.37 ms /    96 tokens (    6.87 ms per token,   145.59 tokens per second)
llama_print_timings:        eval time =  7805.32 ms /    92 runs   (   84.84 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  8655.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the calm, the memories of being oppressed and terrified by fear began to emerge. Ye Wenjie discovered that the true pain had just begun. The nightmare-like memory was burning her mind like a torch rekindled from dead ashes, getting increasingly hot and consuming her soul. For an ordinary woman, maybe time could gradually heal these wounds. After all, she is lucky compared to many women who experienced this kind of trauma in the Cultural Revolution. But Ye Wenjie is a scientific female, refusing to forget what hurt her and looking at those crazy and obsessive with clear rational eyes.



llama_print_timings:        load time =   757.60 ms
llama_print_timings:      sample time =    63.88 ms /   142 runs   (    0.45 ms per token,  2223.06 tokens per second)
llama_print_timings: prompt eval time =   757.55 ms /   146 tokens (    5.19 ms per token,   192.73 tokens per second)
llama_print_timings:        eval time = 12268.38 ms /   141 runs   (   87.01 ms per token,    11.49 tokens per second)
llama_print_timings:       total time = 13313.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a result of her close relationship with Yang Weining, Ye Wenjie started to collect the classic literature from other countries. The bloodstains decorating human history disgusted her. At the same time, her own thinking and that of other outstanding thinkers led her to the essence of human nature that was both most basic and most concealed.



llama_print_timings:        load time =   708.21 ms
llama_print_timings:      sample time =    34.87 ms /    76 runs   (    0.46 ms per token,  2179.52 tokens per second)
llama_print_timings: prompt eval time =   708.09 ms /   127 tokens (    5.58 ms per token,   179.36 tokens per second)
llama_print_timings:        eval time =  6365.62 ms /    75 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  7230.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$In fact, the radar peak is like a haven in which humanity's irrationality and craziness are still on display every day. Ye Wenjie saw that the forest in the mountain basin was being widespreadly cleared by her former comrades, with the resulting effect of the expanding range increasing the abnormality of those remaining forests. When these areas were combined, those few remaining trees looked odd. The great fire igniting on that barren land resembled a refuge for birds fleeing from the flames. And their feathers were burnt black when the fire broke out; they were screaming miserably in the base.



llama_print_timings:        load time =   794.34 ms
llama_print_timings:      sample time =    65.03 ms /   145 runs   (    0.45 ms per token,  2229.88 tokens per second)
llama_print_timings: prompt eval time =   794.31 ms /   174 tokens (    4.56 ms per token,   219.06 tokens per second)
llama_print_timings:        eval time = 12640.43 ms /   144 runs   (   87.78 ms per token,    11.39 tokens per second)
llama_print_timings:       total time = 13727.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the far outer world, humanity's madness has reached the peak of civilization. At that time, it was the most intense period of the Cold War between the United States and the Soviet Union, with thousands of launchers scattered throughout the continents on both sides and strategic nuclear submarines prowling in the depths of the ocean. Only one "Polaris" or "Triton" class submarine had enough nuclear warheads to destroy hundreds of cities and kill billions of people. But ordinary people still smiled at this, as if it did not affect them at all.



llama_print_timings:        load time =   751.53 ms
llama_print_timings:      sample time =    57.08 ms /   127 runs   (    0.45 ms per token,  2225.06 tokens per second)
llama_print_timings: prompt eval time =   751.49 ms /   143 tokens (    5.26 ms per token,   190.29 tokens per second)
llama_print_timings:        eval time = 10719.38 ms /   126 runs   (   85.07 ms per token,    11.75 tokens per second)
llama_print_timings:       total time = 11729.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a celestial physicist, Yi Wenjie is very sensitive to nuclear weapons. She knows it's the power of stars and is even more aware that there are even worse forces in the universe like black holes and anti-matter, among other things, which make heat bombs seem weak and gentle by comparison. If humanity were to obtain any one of those powers, the world could be instantly vaporized in a fit of madness and reason is powerless in the face of that insanity.



llama_print_timings:        load time =   683.99 ms
llama_print_timings:      sample time =    50.05 ms /   111 runs   (    0.45 ms per token,  2218.00 tokens per second)
llama_print_timings: prompt eval time =   683.95 ms /   112 tokens (    6.11 ms per token,   163.76 tokens per second)
llama_print_timings:        eval time =  9423.09 ms /   110 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time = 10333.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   588.53 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2280.50 tokens per second)
llama_print_timings: prompt eval time =   588.50 ms /    34 tokens (   17.31 ms per token,    57.77 tokens per second)
llama_print_timings:        eval time =    81.86 ms /     1 runs   (   81.86 ms per token,    12.22 tokens per second)
llama_print_timings:       total time =   674.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Four years after entering Hongba, Ye Wenjie and Yang Weining formed a family. Yang Weining was truly in love with Ye Wenjie, and gave up his own future for it. At this time, the period of the "Cultural Revolution"'s worst violence had already passed by, and the political environment was relatively mild. Although he didn't have to undergo any persecution due to his marriage, but because he married a woman wearing an anti-revolutionary hat, he was deemed as immature politically and lost his post as chief engineer. However, he and his wife were allowed to stay at the base as technical personnel only because of their technical talents were needed. For Ye Wenjie, accepting Yang Weining's love mainly came from her own sense of gratitude for saving her life during the most dangerous time. Although Yang Weining was talented, courteous and well-bred, she herself had hardly any feeling left to reignite that flame of love.



llama_print_timings:        load time =   875.45 ms
llama_print_timings:      sample time =    99.60 ms /   220 runs   (    0.45 ms per token,  2208.79 tokens per second)
llama_print_timings: prompt eval time =   875.40 ms /   219 tokens (    4.00 ms per token,   250.17 tokens per second)
llama_print_timings:        eval time = 19120.15 ms /   219 runs   (   87.31 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 20448.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The thought of the essence of humanity made Ye Wenjie suffer from a deep spiritual crisis. What she first faced was an absence of a goal of dedication. She had once been an idealist and wanted to contribute her talents to a great cause. Now, however, she realized that everything she did was meaningless and there could be no significant pursuit in the future. This mentality developed further, and she began to feel strangely alienated from the world. In forming a family, her mind felt more disoriented than ever before.



llama_print_timings:        load time =   740.92 ms
llama_print_timings:      sample time =    54.17 ms /   118 runs   (    0.46 ms per token,  2178.41 tokens per second)
llama_print_timings: prompt eval time =   740.88 ms /   136 tokens (    5.45 ms per token,   183.57 tokens per second)
llama_print_timings:        eval time = 10114.60 ms /   117 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11099.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This day, Ye Wenjie was on night shift. It is the loneliest time of the year when the silent midnight of the universe speaks to its listeners with vast desolation. The most Lei disliked looking at that curved line moving slowly on the monitor, which represents the electromagnetic waves received by Red Banks, meaningless noise. Ye Wenjie felt that this endless curve was the abstraction of the universe, one head linked to infinity in the past and another to infinity in the future, with only random fluctuations in height and size without any regularity or life in between, like a granule of sand held up by a hand formed into a desert. It is desolate and intolerable to look at it going forward or backward infinitely far.



llama_print_timings:        load time =   841.54 ms
llama_print_timings:      sample time =    76.55 ms /   171 runs   (    0.45 ms per token,  2233.72 tokens per second)
llama_print_timings: prompt eval time =   841.50 ms /   198 tokens (    4.25 ms per token,   235.29 tokens per second)
llama_print_timings:        eval time = 14786.73 ms /   170 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 15974.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =  1124.42 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2275.31 tokens per second)
llama_print_timings: prompt eval time =  1124.37 ms /   331 tokens (    3.40 ms per token,   294.39 tokens per second)
llama_print_timings:        eval time =   109.51 ms /     1 runs   (  109.51 ms per token,     9.13 tokens per second)
llama_print_timings:       total time =  1238.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Don't answer!



llama_print_timings:        load time =   567.84 ms
llama_print_timings:      sample time =     2.67 ms /     6 runs   (    0.44 ms per token,  2251.41 tokens per second)
llama_print_timings: prompt eval time =   567.80 ms /    32 tokens (   17.74 ms per token,    56.36 tokens per second)
llama_print_timings:        eval time =   413.79 ms /     5 runs   (   82.76 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =   992.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do not answer!



llama_print_timings:        load time =   568.00 ms
llama_print_timings:      sample time =     2.21 ms /     5 runs   (    0.44 ms per token,  2264.49 tokens per second)
llama_print_timings: prompt eval time =   567.96 ms /    33 tokens (   17.21 ms per token,    58.10 tokens per second)
llama_print_timings:        eval time =   329.17 ms /     4 runs   (   82.29 ms per token,    12.15 tokens per second)
llama_print_timings:       total time =   906.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do not answer!



llama_print_timings:        load time =   575.71 ms
llama_print_timings:      sample time =     2.19 ms /     5 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   575.67 ms /    34 tokens (   16.93 ms per token,    59.06 tokens per second)
llama_print_timings:        eval time =   310.18 ms /     4 runs   (   77.54 ms per token,    12.90 tokens per second)
llama_print_timings:       total time =   895.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After translating the second message, Ye Wenjie was still stunned and confused.



llama_print_timings:        load time =   592.37 ms
llama_print_timings:      sample time =     9.39 ms /    21 runs   (    0.45 ms per token,  2236.90 tokens per second)
llama_print_timings: prompt eval time =   592.33 ms /    52 tokens (   11.39 ms per token,    87.79 tokens per second)
llama_print_timings:        eval time =  1738.46 ms /    20 runs   (   86.92 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2372.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The world has received your message.



llama_print_timings:        load time =   572.02 ms
llama_print_timings:      sample time =     4.19 ms /     8 runs   (    0.52 ms per token,  1907.94 tokens per second)
llama_print_timings: prompt eval time =   572.00 ms /    34 tokens (   16.82 ms per token,    59.44 tokens per second)
llama_print_timings:        eval time =   594.67 ms /     7 runs   (   84.95 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1183.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   610.74 ms
llama_print_timings:      sample time =    18.68 ms /    42 runs   (    0.44 ms per token,  2248.03 tokens per second)
llama_print_timings: prompt eval time =   610.70 ms /    60 tokens (   10.18 ms per token,    98.25 tokens per second)
llama_print_timings:        eval time =  3507.03 ms /    41 runs   (   85.54 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4200.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have millions of stars in your direction. If you do not respond, the source cannot be identified.



llama_print_timings:        load time =   585.32 ms
llama_print_timings:      sample time =     9.82 ms /    22 runs   (    0.45 ms per token,  2239.87 tokens per second)
llama_print_timings: prompt eval time =   585.29 ms /    47 tokens (   12.45 ms per token,    80.30 tokens per second)
llama_print_timings:        eval time =  1746.39 ms /    21 runs   (   83.16 ms per token,    12.02 tokens per second)
llama_print_timings:       total time =  2375.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If you respond, the source will be located and your planetary system will come under attack! Your world will be invaded!



llama_print_timings:        load time =   586.99 ms
llama_print_timings:      sample time =    11.98 ms /    27 runs   (    0.44 ms per token,  2252.82 tokens per second)
llama_print_timings: prompt eval time =   586.95 ms /    52 tokens (   11.29 ms per token,    88.59 tokens per second)
llama_print_timings:        eval time =  2261.75 ms /    26 runs   (   86.99 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2902.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do not answer! Do not answer! ! Do not answer! ! !



llama_print_timings:        load time =   581.98 ms
llama_print_timings:      sample time =     7.13 ms /    16 runs   (    0.45 ms per token,  2244.35 tokens per second)
llama_print_timings: prompt eval time =   581.94 ms /    41 tokens (   14.19 ms per token,    70.45 tokens per second)
llama_print_timings:        eval time =  1291.40 ms /    15 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  1904.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As she watched the flickering green characters on the display, Yvonne could no longer think clearly. She could only understand these two facts: less than 9 years had passed since they were transmitted to the ground, and the source of these messages was less than 4 light-years away from us. It could only come from the nearest star system, the Alpha Centauri!



llama_print_timings:        load time =   684.10 ms
llama_print_timings:      sample time =    37.21 ms /    83 runs   (    0.45 ms per token,  2230.76 tokens per second)
llama_print_timings: prompt eval time =   684.06 ms /   103 tokens (    6.64 ms per token,   150.57 tokens per second)
llama_print_timings:        eval time =  7007.77 ms /    82 runs   (   85.46 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  7861.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The universe is not barren, the universe is not empty, the universe is full of vitality! Humans look towards the end of the universe but where they did not expect, there were intelligent lives in the nearest stars.



llama_print_timings:        load time =   626.25 ms
llama_print_timings:      sample time =    21.16 ms /    48 runs   (    0.44 ms per token,  2268.54 tokens per second)
llama_print_timings: prompt eval time =   626.21 ms /    70 tokens (    8.95 ms per token,   111.78 tokens per second)
llama_print_timings:        eval time =  4060.83 ms /    47 runs   (   86.40 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  4782.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As she looked at the waveform display, information continued to stream into Red Bank antenna. She switched on another interface and activated real-time decoding, receiving the incoming message right away. Over the course of the next four hours, Ye Wenjie learned about the existence of Trisolaran civilization, their numerous resurgences over the centuries, and their interstellar migration attempts.



llama_print_timings:        load time =   677.49 ms
llama_print_timings:      sample time =    39.24 ms /    85 runs   (    0.46 ms per token,  2166.21 tokens per second)
llama_print_timings: prompt eval time =   677.42 ms /   105 tokens (    6.45 ms per token,   155.00 tokens per second)
llama_print_timings:        eval time =  7291.31 ms /    84 runs   (   86.80 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  8148.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At 4:00 in the morning, the messages from Andromeda came to an end. The translating system started running without results, producing only failure messages over and over again. In a vast expanse of space, all that was heard by Red Coast Station was an eerie, desolate noise.



llama_print_timings:        load time =   637.29 ms
llama_print_timings:      sample time =    29.14 ms /    65 runs   (    0.45 ms per token,  2230.61 tokens per second)
llama_print_timings: prompt eval time =   637.25 ms /    71 tokens (    8.98 ms per token,   111.42 tokens per second)
llama_print_timings:        eval time =  5554.27 ms /    64 runs   (   86.79 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  6323.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But Ye Wenjie could be sure that everything just now was not a dream.



llama_print_timings:        load time =   591.14 ms
llama_print_timings:      sample time =     8.68 ms /    19 runs   (    0.46 ms per token,  2187.93 tokens per second)
llama_print_timings: prompt eval time =   591.11 ms /    41 tokens (   14.42 ms per token,    69.36 tokens per second)
llama_print_timings:        eval time =  1599.93 ms /    18 runs   (   88.88 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  2229.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The sun is indeed a superantenna, but why didn't it receive any echoes from that experiment eight years ago? Why did Jupiter's radiation pattern not match the solar one later? Later on, Ye Wenjie came up with many reasons. The base radio may have never been able to receive the signal of that frequency, or they could just have received a few words, which they would interpret as nothing at all. As for the latter, it is likely because when the sun amplifies electromagnetic waves, it also overlays a regular pattern. This pattern, in the alien civilization's interpretation system, can be easily discarded. But in her physical perception, Jupiter and the sun have very different radiation patterns. This point was later confirmed, where an oscillatory wave is being added.



llama_print_timings:        load time =   791.81 ms
llama_print_timings:      sample time =    76.89 ms /   172 runs   (    0.45 ms per token,  2237.02 tokens per second)
llama_print_timings: prompt eval time =   791.76 ms /   171 tokens (    4.63 ms per token,   215.97 tokens per second)
llama_print_timings:        eval time = 14871.77 ms /   171 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 16013.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She looked around suspiciously. There were still three other people on duty in the control room, and two of them were chatting in a corner while one was asleep at the terminal. The receiver processing the monitored information in the system was only she and her colleague in the front desk. She carried out the operation without speaking aloud, transferring all received messages to a multiple encrypted hidden folder and substituting it with some noise that was received a year ago for five hours of monitored data.



llama_print_timings:        load time =   715.99 ms
llama_print_timings:      sample time =    48.22 ms /   106 runs   (    0.45 ms per token,  2198.08 tokens per second)
llama_print_timings: prompt eval time =   715.95 ms /   126 tokens (    5.68 ms per token,   175.99 tokens per second)
llama_print_timings:        eval time =  8923.37 ms /   105 runs   (   84.98 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  9862.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Then, she typed a short message on the terminal and input it into the cache area of the Red Bank launching system.



llama_print_timings:        load time =   580.23 ms
llama_print_timings:      sample time =    11.69 ms /    26 runs   (    0.45 ms per token,  2225.07 tokens per second)
llama_print_timings: prompt eval time =   580.19 ms /    48 tokens (   12.09 ms per token,    82.73 tokens per second)
llama_print_timings:        eval time =  2174.65 ms /    25 runs   (   86.99 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2809.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She got up and walked out of the listening control room. A cold breeze rustled her face, which was hot from the rising sun in the east. Along with a stone-paved path that barely had been lit by dawn's faint rays, she walked to the launching control room. The giant hand of the Red Banks antennae silently opened up towards space. She passed the gate guard who stood still watching her just like always. Leaving the listening control room and walking through several rows of equipment, she reached the console and tuned all the switches, starting the preheating of the launching system. The two shift workers on duty looked up at her with eyes heavy from sleep, then one of them turned his head again to look at the clock on the wall while the other one continued reading the newspaper that he probably had seen many times before. In the base, she didn't have any political status in terms of politics but she had some freedom technically and often preheated the equipment ahead of time even though to


llama_print_timings:        load time =  1006.82 ms
llama_print_timings:      sample time =   102.67 ms /   231 runs   (    0.44 ms per token,  2249.99 tokens per second)
llama_print_timings: prompt eval time =  1006.77 ms /   273 tokens (    3.69 ms per token,   271.16 tokens per second)
llama_print_timings:        eval time = 20463.31 ms /   230 runs   (   88.97 ms per token,    11.24 tokens per second)
llama_print_timings:       total time = 21940.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After a long half an hour, Ye Wenjie reset the launch frequency to its optimal value and set the power output to its maximum. She then focused the optic locating sensor's telescope to observe that the sun was rising over the horizon. Then she activated the satellite antenna orientation system and gently turned the direction wheel until it pointed towards the sun. The sound of the giant antennas rotating carried into the main control room, where one of the deputy officers looked at Ye Wenjie and didn't say anything.



llama_print_timings:        load time =   739.39 ms
llama_print_timings:      sample time =    52.50 ms /   115 runs   (    0.46 ms per token,  2190.48 tokens per second)
llama_print_timings: prompt eval time =   739.36 ms /   136 tokens (    5.44 ms per token,   183.94 tokens per second)
llama_print_timings:        eval time =  9870.65 ms /   114 runs   (   86.58 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 10848.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The sun has completely risen over the ridge of mountains, and the center of the antenna's crosswire is at its upper edge. This takes into account the anticipated propagation delay in radio waves. The transmitting system is ready to go. The transmit button resembles a square like the space bar on a computer keyboard, but it's red. At this point, Ye Wenjie's finger is two centimeters above it.



llama_print_timings:        load time =   681.43 ms
llama_print_timings:      sample time =    43.80 ms /    97 runs   (    0.45 ms per token,  2214.46 tokens per second)
llama_print_timings: prompt eval time =   681.39 ms /   110 tokens (    6.19 ms per token,   161.43 tokens per second)
llama_print_timings:        eval time =  8326.88 ms /    96 runs   (   86.74 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  9208.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The fate of humanity lies in the delicate two fingers.



llama_print_timings:        load time =   578.23 ms
llama_print_timings:      sample time =     6.24 ms /    14 runs   (    0.45 ms per token,  2243.23 tokens per second)
llama_print_timings: prompt eval time =   578.20 ms /    44 tokens (   13.14 ms per token,    76.10 tokens per second)
llama_print_timings:        eval time =  1149.47 ms /    13 runs   (   88.42 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  1755.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Without hesitation, Ye Wenjie pressed the launch button.



llama_print_timings:        load time =   579.58 ms
llama_print_timings:      sample time =     7.12 ms /    16 runs   (    0.44 ms per token,  2247.82 tokens per second)
llama_print_timings: prompt eval time =   579.54 ms /    41 tokens (   14.14 ms per token,    70.75 tokens per second)
llama_print_timings:        eval time =  1244.17 ms /    15 runs   (   82.94 ms per token,    12.06 tokens per second)
llama_print_timings:       total time =  1855.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What are you doing?” the on-duty officer asked, yawning.



llama_print_timings:        load time =   575.00 ms
llama_print_timings:      sample time =     8.82 ms /    20 runs   (    0.44 ms per token,  2267.32 tokens per second)
llama_print_timings: prompt eval time =   574.98 ms /    41 tokens (   14.02 ms per token,    71.31 tokens per second)
llama_print_timings:        eval time =  1641.75 ms /    19 runs   (   86.41 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2255.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie smiled at him without speaking. Then she stopped the launch by pressing another yellow button and changed the direction of the antenna with a turn of the direction panel, leaving the control room to go out.



llama_print_timings:        load time =   615.94 ms
llama_print_timings:      sample time =    20.95 ms /    47 runs   (    0.45 ms per token,  2243.22 tokens per second)
llama_print_timings: prompt eval time =   615.89 ms /    70 tokens (    8.80 ms per token,   113.66 tokens per second)
llama_print_timings:        eval time =  3970.98 ms /    46 runs   (   86.33 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  4681.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The signalman looks at his watch and sees that it is time for him to go home. He takes the logbook and records what Ye Wenjie had done a few seconds before launching the missile system, which seems abnormal, but after looking at a strip of tape, he notices she only activated the missile system for less than three seconds, so he throws the logbook back on the desk and yawns. Then, he put his army hat on and left.



llama_print_timings:        load time =   683.70 ms
llama_print_timings:      sample time =    47.27 ms /   106 runs   (    0.45 ms per token,  2242.67 tokens per second)
llama_print_timings: prompt eval time =   683.65 ms /   107 tokens (    6.39 ms per token,   156.51 tokens per second)
llama_print_timings:        eval time =  8905.72 ms /   105 runs   (   84.82 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  9808.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Welcome to this world. I will help you achieve it. My civilization is unable to solve its own problems and needs your strength to intervene.



llama_print_timings:        load time =   592.27 ms
llama_print_timings:      sample time =    14.76 ms /    32 runs   (    0.46 ms per token,  2168.17 tokens per second)
llama_print_timings: prompt eval time =   592.23 ms /    54 tokens (   10.97 ms per token,    91.18 tokens per second)
llama_print_timings:        eval time =  2656.88 ms /    31 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3317.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The rising sun made her faint. She didn't go far after getting up, but she fainted on the grass.



llama_print_timings:        load time =   602.83 ms
llama_print_timings:      sample time =    11.56 ms /    26 runs   (    0.44 ms per token,  2248.75 tokens per second)
llama_print_timings: prompt eval time =   602.79 ms /    56 tokens (   10.76 ms per token,    92.90 tokens per second)
llama_print_timings:        eval time =  2065.87 ms /    25 runs   (   82.63 ms per token,    12.10 tokens per second)
llama_print_timings:       total time =  2720.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After she woke up, she found herself lying in the medical room and Yanshengning was looking at her with a worried expression, just like when they were flying on an airplane long ago. The doctor told her to rest more from now on because she is pregnant.



llama_print_timings:        load time =   623.59 ms
llama_print_timings:      sample time =    27.60 ms /    61 runs   (    0.45 ms per token,  2210.47 tokens per second)
llama_print_timings: prompt eval time =   623.54 ms /    73 tokens (    8.54 ms per token,   117.07 tokens per second)
llama_print_timings:        eval time =  5073.93 ms /    60 runs   (   84.57 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  5823.39 ms


translated 39.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“Ms. Ye, I'm very impressed with your storytelling ability and the fact that you have so much knowledge about Chinese history and culture!”



llama_print_timings:        load time =   637.61 ms
llama_print_timings:      sample time =    15.31 ms /    34 runs   (    0.45 ms per token,  2221.06 tokens per second)
llama_print_timings: prompt eval time =   637.58 ms /    77 tokens (    8.28 ms per token,   120.77 tokens per second)
llama_print_timings:        eval time =  2774.59 ms /    33 runs   (   84.08 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  3486.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, how did the Three-Body Organization grow to this scale?



llama_print_timings:        load time =   582.04 ms
llama_print_timings:      sample time =     7.42 ms /    16 runs   (    0.46 ms per token,  2156.33 tokens per second)
llama_print_timings: prompt eval time =   582.00 ms /    43 tokens (   13.53 ms per token,    73.88 tokens per second)
llama_print_timings:        eval time =  1272.23 ms /    15 runs   (   84.82 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  1886.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This needs to start from the time I met Evans. But since this history is known by all of you here, let's not waste time on that. From now on, we can talk about your nanomaterials separately. Whether or not there is this opportunity will depend on you.”



llama_print_timings:        load time =   684.32 ms
llama_print_timings:      sample time =    28.26 ms /    61 runs   (    0.46 ms per token,  2158.83 tokens per second)
llama_print_timings: prompt eval time =   684.25 ms /    94 tokens (    7.28 ms per token,   137.38 tokens per second)
llama_print_timings:        eval time =  5009.48 ms /    60 runs   (   83.49 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  5823.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Why are you so afraid of nanomaterials?" Wang Mo asked.



llama_print_timings:        load time =   585.65 ms
llama_print_timings:      sample time =     7.56 ms /    17 runs   (    0.44 ms per token,  2248.38 tokens per second)
llama_print_timings: prompt eval time =   585.61 ms /    50 tokens (   11.71 ms per token,    85.38 tokens per second)
llama_print_timings:        eval time =  1354.79 ms /    16 runs   (   84.67 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1974.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Because it can enable humans to escape gravity and enter space on a large scale.



llama_print_timings:        load time =   597.90 ms
llama_print_timings:      sample time =     7.98 ms /    18 runs   (    0.44 ms per token,  2255.36 tokens per second)
llama_print_timings: prompt eval time =   597.88 ms /    43 tokens (   13.90 ms per token,    71.92 tokens per second)
llama_print_timings:        eval time =  1458.25 ms /    17 runs   (   85.78 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  2091.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Space elevator?" he thought.



llama_print_timings:        load time =   573.49 ms
llama_print_timings:      sample time =     4.03 ms /     9 runs   (    0.45 ms per token,  2233.25 tokens per second)
llama_print_timings: prompt eval time =   573.45 ms /    42 tokens (   13.65 ms per token,    73.24 tokens per second)
llama_print_timings:        eval time =   737.25 ms /     8 runs   (   92.16 ms per token,    10.85 tokens per second)
llama_print_timings:       total time =  1327.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, once that material can be mass-produced, constructing a space elevator from the surface to Earth's orbit would have technological foundation. To us, it is a very small invention but to humans on Earth it has great significance. Humans on Earth can easily enter near-earth space with this technology. Building a massive defense system in space will become possible. Therefore, we must eliminate this technology.”



llama_print_timings:        load time =   694.73 ms
llama_print_timings:      sample time =    40.22 ms /    89 runs   (    0.45 ms per token,  2212.66 tokens per second)
llama_print_timings: prompt eval time =   694.69 ms /   110 tokens (    6.32 ms per token,   158.35 tokens per second)
llama_print_timings:        eval time =  7422.48 ms /    88 runs   (   84.35 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  8300.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is the end of a countdown?



llama_print_timings:        load time =   585.88 ms
llama_print_timings:      sample time =     4.56 ms /    10 runs   (    0.46 ms per token,  2191.06 tokens per second)
llama_print_timings: prompt eval time =   585.86 ms /    50 tokens (   11.72 ms per token,    85.34 tokens per second)
llama_print_timings:        eval time =   798.37 ms /     9 runs   (   88.71 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  1404.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaves Wenjie smiled slightly, “I don't know.”



llama_print_timings:        load time =   576.13 ms
llama_print_timings:      sample time =     7.84 ms /    17 runs   (    0.46 ms per token,  2168.09 tokens per second)
llama_print_timings: prompt eval time =   576.09 ms /    39 tokens (   14.77 ms per token,    67.70 tokens per second)
llama_print_timings:        eval time =  1396.81 ms /    16 runs   (   87.30 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2007.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This is not meaningful! This is basic research and anyone else could have done it!" Wang Miao said nervously with a loud voice.



llama_print_timings:        load time =   596.33 ms
llama_print_timings:      sample time =    14.11 ms /    31 runs   (    0.46 ms per token,  2197.49 tokens per second)
llama_print_timings: prompt eval time =   596.30 ms /    58 tokens (   10.28 ms per token,    97.27 tokens per second)
llama_print_timings:        eval time =  2612.41 ms /    30 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  3270.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It’s not meaningful, but the most effective way to disturb a researcher's mind is to do it badly, as you said. This is an application of research rather than effective in fundamental research...”



llama_print_timings:        load time =   611.42 ms
llama_print_timings:      sample time =    20.78 ms /    46 runs   (    0.45 ms per token,  2213.77 tokens per second)
llama_print_timings: prompt eval time =   611.38 ms /    65 tokens (    9.41 ms per token,   106.32 tokens per second)
llama_print_timings:        eval time =  3808.09 ms /    45 runs   (   84.62 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  4515.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What happened to your daughter in terms of basic research?



llama_print_timings:        load time =   600.83 ms
llama_print_timings:      sample time =     5.33 ms /    12 runs   (    0.44 ms per token,  2249.30 tokens per second)
llama_print_timings: prompt eval time =   600.79 ms /    41 tokens (   14.65 ms per token,    68.24 tokens per second)
llama_print_timings:        eval time =   965.73 ms /    11 runs   (   87.79 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  1589.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie remained silent for a few seconds before responding, noticing that her eyes darkened slightly without anyone noticing, but then she continued with the conversation. “After all, the ultra powerful ones do not care about what we do.”



llama_print_timings:        load time =   652.53 ms
llama_print_timings:      sample time =    24.54 ms /    55 runs   (    0.45 ms per token,  2241.06 tokens per second)
llama_print_timings: prompt eval time =   652.49 ms /    88 tokens (    7.41 ms per token,   134.87 tokens per second)
llama_print_timings:        eval time =  4608.24 ms /    54 runs   (   85.34 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5373.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving the door open, Ye Wenjie's voice just fell when there was a loud bang. The doors of the dining hall were suddenly opened as well as surrounded by a group of soldiers carrying assault rifles. Wang Miao noticed they are not just the police but regular army and formed a surrounding circle quickly around the Tachanka rebels. At last, Shi Qiang came in. His open skin-tight jacket covered his gun handle, which looked like an iron hammer as it showed outside. He walked around to look carefully all around. Suddenly he ran forward and swung his hand heavily on a Tachanka rebel's head with the gun grip, the man fell down without pulling out his pistol. Several soldiers shoot up in the air, causing dust to fall on the ceiling. Someone pulls Wang Miao to run away quickly and stands behind some soldiers.



llama_print_timings:        load time =   882.59 ms
llama_print_timings:      sample time =    86.29 ms /   194 runs   (    0.44 ms per token,  2248.31 tokens per second)
llama_print_timings: prompt eval time =   882.54 ms /   223 tokens (    3.96 ms per token,   252.68 tokens per second)
llama_print_timings:        eval time = 16811.92 ms /   193 runs   (   87.11 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 18089.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Put your weapons on the table!” said Shi Qiang, pointing to a row of assault rifles behind him. “We all know you are not afraid to sacrifice yourself for duty, and we are not either! I’m not going to go further than this: The ordinary rules of police and law enforcement do not apply to you any more, nor does the warfare code among humans apply to you now that you have become our common enemies. Since you have declared war on all humanity, then there is nothing we can hold back here!”



llama_print_timings:        load time =   716.38 ms
llama_print_timings:      sample time =    51.80 ms /   115 runs   (    0.45 ms per token,  2219.86 tokens per second)
llama_print_timings: prompt eval time =   716.35 ms /   124 tokens (    5.78 ms per token,   173.10 tokens per second)
llama_print_timings:        eval time =  9800.59 ms /   114 runs   (   85.97 ms per token,    11.63 tokens per second)
llama_print_timings:       total time = 10754.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The crowd of Rebel Three-Body Army is stirred, but there's no panic. Ye Wenjie doesn't make a sound. There are three people who suddenly rush out from the crowd, which include that beautiful girl with her neck twisted by Pan Hancheng. They rush toward the moving artifact of Three-Body Art, and one person holds on tightly to a flying metal sphere.



llama_print_timings:        load time =   669.57 ms
llama_print_timings:      sample time =    40.18 ms /    89 runs   (    0.45 ms per token,  2215.25 tokens per second)
llama_print_timings: prompt eval time =   669.53 ms /    97 tokens (    6.90 ms per token,   144.88 tokens per second)
llama_print_timings:        eval time =  7626.59 ms /    88 runs   (   86.67 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  8479.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The beautiful girl holds up the crystal-clear metal ball in her hands, evoking thoughts of an artistic gymnast with a slim figure. She also shows her lovely smile and speaks sweetly: "Officers, we are holding three nuclear bombs in our hands. Each has a destructive force of 1500 tons. It's not too big, we like to play around. Here is the detonator."



llama_print_timings:        load time =   679.44 ms
llama_print_timings:      sample time =    43.23 ms /    96 runs   (    0.45 ms per token,  2220.89 tokens per second)
llama_print_timings: prompt eval time =   679.37 ms /   103 tokens (    6.60 ms per token,   151.61 tokens per second)
llama_print_timings:        eval time =  8211.96 ms /    95 runs   (   86.44 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  9088.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The hall suddenly froze, the only thing moving was Shi Qiang. He put the gun back in its holster on his left armpit and composed himself calmly by patting his hands.



llama_print_timings:        load time =   612.62 ms
llama_print_timings:      sample time =    19.97 ms /    44 runs   (    0.45 ms per token,  2203.08 tokens per second)
llama_print_timings: prompt eval time =   612.58 ms /    66 tokens (    9.28 ms per token,   107.74 tokens per second)
llama_print_timings:        eval time =  3680.42 ms /    43 runs   (   85.59 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  4383.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Our requirements are simple: just make the general leave, and then we can have fun together.



llama_print_timings:        load time =   593.68 ms
llama_print_timings:      sample time =     9.40 ms /    21 runs   (    0.45 ms per token,  2233.80 tokens per second)
llama_print_timings: prompt eval time =   593.64 ms /    55 tokens (   10.79 ms per token,    92.65 tokens per second)
llama_print_timings:        eval time =  1671.66 ms /    20 runs   (   83.58 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  2307.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I am with my comrades.” Lei Wenjie said calmly.



llama_print_timings:        load time =   580.21 ms
llama_print_timings:      sample time =     9.08 ms /    20 runs   (    0.45 ms per token,  2202.40 tokens per second)
llama_print_timings: prompt eval time =   580.18 ms /    43 tokens (   13.49 ms per token,    74.11 tokens per second)
llama_print_timings:        eval time =  1616.12 ms /    19 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2237.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Can that be confirmed?" said Shi Qiang, quietly asking the officer who obviously was an expert in explosives.



llama_print_timings:        load time =   593.49 ms
llama_print_timings:      sample time =    12.16 ms /    27 runs   (    0.45 ms per token,  2219.66 tokens per second)
llama_print_timings: prompt eval time =   593.45 ms /    52 tokens (   11.41 ms per token,    87.62 tokens per second)
llama_print_timings:        eval time =  2226.65 ms /    26 runs   (   85.64 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  2875.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The officer threw a plastic bag to the three players with balls. The bag contained a spring scale. One of the players picked up the bag, took out the spring scale and put it in with the ball he was carrying. Then he lifted the ball up, shaked it for a while and finally dropped it onto the ground. The girl smiled and the explosives expert on the other side also gave a smirking look. The other player picked up his ball and weighed it as well, then dropped it on the ground like he was throwing it away. The girl laughed again. This time she picked up the plastic bag, put her ball in with it, hung the scale and the scale fell straight down.



llama_print_timings:        load time =   770.62 ms
llama_print_timings:      sample time =    66.31 ms /   147 runs   (    0.45 ms per token,  2216.93 tokens per second)
llama_print_timings: prompt eval time =   770.57 ms /   157 tokens (    4.91 ms per token,   203.75 tokens per second)
llama_print_timings:        eval time = 12696.27 ms /   146 runs   (   86.96 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 13774.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The grin on the explosives expert froze, and in a soft voice he said to Shi Qian:"This is it."



llama_print_timings:        load time =   583.93 ms
llama_print_timings:      sample time =    12.81 ms /    29 runs   (    0.44 ms per token,  2264.21 tokens per second)
llama_print_timings: prompt eval time =   583.91 ms /    52 tokens (   11.23 ms per token,    89.06 tokens per second)
llama_print_timings:        eval time =  2386.22 ms /    28 runs   (   85.22 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  3027.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   568.11 ms
llama_print_timings:      sample time =     0.89 ms /     2 runs   (    0.45 ms per token,  2244.67 tokens per second)
llama_print_timings: prompt eval time =   568.07 ms /    36 tokens (   15.78 ms per token,    63.37 tokens per second)
llama_print_timings:        eval time =    79.97 ms /     1 runs   (   79.97 ms per token,    12.51 tokens per second)
llama_print_timings:       total time =   652.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“At least it contains heavy fissile material. As for the detonation system, we are not sure yet.” The explosives expert said.



llama_print_timings:        load time =   592.49 ms
llama_print_timings:      sample time =    14.27 ms /    32 runs   (    0.45 ms per token,  2242.00 tokens per second)
llama_print_timings: prompt eval time =   592.45 ms /    56 tokens (   10.58 ms per token,    94.52 tokens per second)
llama_print_timings:        eval time =  2605.47 ms /    31 runs   (   84.05 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  3262.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The soldiers focused their flashlight beams on the girl who held a nuclear bomb, with her hand gripping 1500 tons of TNT. She smiled brightly, as if receiving the claps and applause of a stage in a splendid setting.



llama_print_timings:        load time =   636.72 ms
llama_print_timings:      sample time =    25.72 ms /    57 runs   (    0.45 ms per token,  2216.52 tokens per second)
llama_print_timings: prompt eval time =   636.68 ms /    80 tokens (    7.96 ms per token,   125.65 tokens per second)
llama_print_timings:        eval time =  4697.00 ms /    56 runs   (   83.88 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  5451.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“There is only one way,” the explosives expert whispered in Sear’s ear.



llama_print_timings:        load time =   587.30 ms
llama_print_timings:      sample time =     9.56 ms /    21 runs   (    0.46 ms per token,  2197.34 tokens per second)
llama_print_timings: prompt eval time =   587.26 ms /    51 tokens (   11.51 ms per token,    86.84 tokens per second)
llama_print_timings:        eval time =  1675.88 ms /    20 runs   (   83.79 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  2306.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Won't explode?"



llama_print_timings:        load time =   573.50 ms
llama_print_timings:      sample time =     4.04 ms /     9 runs   (    0.45 ms per token,  2227.72 tokens per second)
llama_print_timings: prompt eval time =   573.47 ms /    34 tokens (   16.87 ms per token,    59.29 tokens per second)
llama_print_timings:        eval time =   685.03 ms /     8 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  1278.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Only normal explosives will detonate, but they will not have the precise axial compression on the nuclear bomb to cause a nuclear blast.



llama_print_timings:        load time =   617.94 ms
llama_print_timings:      sample time =    14.36 ms /    31 runs   (    0.46 ms per token,  2158.77 tokens per second)
llama_print_timings: prompt eval time =   617.90 ms /    65 tokens (    9.51 ms per token,   105.19 tokens per second)
llama_print_timings:        eval time =  2554.53 ms /    30 runs   (   85.15 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  3236.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Dashi stared at the nuclear bomb girl and didn't speak.



llama_print_timings:        load time =   583.91 ms
llama_print_timings:      sample time =     7.14 ms /    16 runs   (    0.45 ms per token,  2241.84 tokens per second)
llama_print_timings: prompt eval time =   583.87 ms /    40 tokens (   14.60 ms per token,    68.51 tokens per second)
llama_print_timings:        eval time =  1254.47 ms /    15 runs   (   83.63 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  1869.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Do you want to assign a sniper?”


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Big Story almost unnoticeably shakes his head, “No suitable place. That small creature is so clever that she can detect ghosts with just one aim of a sniper’s long barrel.”



llama_print_timings:        load time =   620.79 ms
llama_print_timings:      sample time =    19.59 ms /    44 runs   (    0.45 ms per token,  2246.39 tokens per second)
llama_print_timings: prompt eval time =   620.75 ms /    67 tokens (    9.26 ms per token,   107.93 tokens per second)
llama_print_timings:        eval time =  3741.16 ms /    43 runs   (   87.00 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  4451.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said, and then walked straight forward, clearing a path through the crowd, to a spot in the middle of the ground.



llama_print_timings:        load time =   592.23 ms
llama_print_timings:      sample time =    13.28 ms /    28 runs   (    0.47 ms per token,  2108.75 tokens per second)
llama_print_timings: prompt eval time =   592.21 ms /    51 tokens (   11.61 ms per token,    86.12 tokens per second)
llama_print_timings:        eval time =  2325.38 ms /    27 runs   (   86.13 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  2978.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Stop,” the nuclear bomb girl warned with a seductive glance, tightly pressing her thumb on the detonation switch. The nail polish gleamed under the spotlight.



llama_print_timings:        load time =   622.08 ms
llama_print_timings:      sample time =    18.35 ms /    41 runs   (    0.45 ms per token,  2234.21 tokens per second)
llama_print_timings: prompt eval time =   622.04 ms /    67 tokens (    9.28 ms per token,   107.71 tokens per second)
llama_print_timings:        eval time =  3268.50 ms /    40 runs   (   81.71 ms per token,    12.24 tokens per second)
llama_print_timings:       total time =  3977.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Take it easy, girl. There's something you are definitely going to want to know.” Standing about seven meters away from the girl, Grand Shi holds a letter out of his coat pocket. “She's been found.”



llama_print_timings:        load time =   621.64 ms
llama_print_timings:      sample time =    22.59 ms /    50 runs   (    0.45 ms per token,  2213.66 tokens per second)
llama_print_timings: prompt eval time =   621.62 ms /    67 tokens (    9.28 ms per token,   107.78 tokens per second)
llama_print_timings:        eval time =  4285.01 ms /    49 runs   (   87.45 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  5010.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The girl's eyes lit up with joy, but then they dimmed as if to convey something deeper.



llama_print_timings:        load time =   598.07 ms
llama_print_timings:      sample time =    10.16 ms /    23 runs   (    0.44 ms per token,  2262.89 tokens per second)
llama_print_timings: prompt eval time =   598.03 ms /    54 tokens (   11.07 ms per token,    90.30 tokens per second)
llama_print_timings:        eval time =  1888.69 ms /    22 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2534.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Big History quickly took two more steps forward, narrowing the distance between himself and the girl to five meters or so. The girl alertly raised her nuclear bomb in his face, using her eyes to stop him from getting any closer. But she had already been distracted by what the other man did while throwing away the fake nuclear bomb. One of them walked up to Big History quickly and reached out to take the envelope he was holding in his hand, just as Big History withdrawn his gun quickly. The girl didn't see because it happened too fast, but she saw a small bright light flashed at the man's ear before her nuclear bomb exploded.



llama_print_timings:        load time =   747.61 ms
llama_print_timings:      sample time =    63.30 ms /   140 runs   (    0.45 ms per token,  2211.83 tokens per second)
llama_print_timings: prompt eval time =   747.57 ms /   145 tokens (    5.16 ms per token,   193.96 tokens per second)
llama_print_timings:        eval time = 11872.24 ms /   139 runs   (   85.41 ms per token,    11.71 tokens per second)
llama_print_timings:       total time = 12912.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$A loud boom echoed, and Wang Meng went blind. He was dragged out of the dining hall as yellow smoke billowed from the fire door. The noise and gunshots in the hall had become a chaotic din…Wang Meng tried to stand up, but was stopped by the explosives expert.



llama_print_timings:        load time =   685.56 ms
llama_print_timings:      sample time =    31.54 ms /    70 runs   (    0.45 ms per token,  2219.40 tokens per second)
llama_print_timings: prompt eval time =   685.53 ms /   111 tokens (    6.18 ms per token,   161.92 tokens per second)
llama_print_timings:        eval time =  5857.55 ms /    69 runs   (   84.89 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6688.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Be careful! Radioactive!



llama_print_timings:        load time =   578.17 ms
llama_print_timings:      sample time =     3.17 ms /     7 runs   (    0.45 ms per token,  2211.69 tokens per second)
llama_print_timings: prompt eval time =   578.13 ms /    38 tokens (   15.21 ms per token,    65.73 tokens per second)
llama_print_timings:        eval time =   476.11 ms /     6 runs   (   79.35 ms per token,    12.60 tokens per second)
llama_print_timings:       total time =  1069.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The chaos soon calmed down, with more than a dozen Trisolarian fighters being killed. More than two hundred others, including Ye Wenjie, were arrested. The nuclear bomb girl was decimated and blood all over her body, but the aborted nuclear bomb only killed her. The man carrying the message to Shi Jiaxiang was severely injured by the blast, due to which he received some light injuries while being protected by others in the hall. He and the rest of the people in the hall were also harmed seriously by radiation.



llama_print_timings:        load time =   692.60 ms
llama_print_timings:      sample time =    55.06 ms /   122 runs   (    0.45 ms per token,  2215.56 tokens per second)
llama_print_timings: prompt eval time =   692.56 ms /   119 tokens (    5.82 ms per token,   171.83 tokens per second)
llama_print_timings:        eval time = 10440.34 ms /   121 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time = 11384.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Du Wangmou looked through the small window of the ambulance at Da Shi who still had a wound on his head that was still bleeding, and they could only talk with each other by cellphone because Du wore protective clothing as a nurse.



llama_print_timings:        load time =   635.69 ms
llama_print_timings:      sample time =    26.00 ms /    58 runs   (    0.45 ms per token,  2230.43 tokens per second)
llama_print_timings: prompt eval time =   635.65 ms /    75 tokens (    8.48 ms per token,   117.99 tokens per second)
llama_print_timings:        eval time =  4893.75 ms /    57 runs   (   85.86 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  5650.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who is that girl?



llama_print_timings:        load time =   574.88 ms
llama_print_timings:      sample time =     2.91 ms /     6 runs   (    0.49 ms per token,  2061.86 tokens per second)
llama_print_timings: prompt eval time =   574.84 ms /    41 tokens (   14.02 ms per token,    71.32 tokens per second)
llama_print_timings:        eval time =   437.36 ms /     5 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1026.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Laughing loudly, "I don't know. Just guessing. Such a girl, she has probably never seen her mother before." After 20 years of working in this industry, I learned to judge people by looking at them.



llama_print_timings:        load time =   624.81 ms
llama_print_timings:      sample time =    23.32 ms /    51 runs   (    0.46 ms per token,  2186.87 tokens per second)
llama_print_timings: prompt eval time =   624.77 ms /    66 tokens (    9.47 ms per token,   105.64 tokens per second)
llama_print_timings:        eval time =  4124.87 ms /    50 runs   (   82.50 ms per token,    12.12 tokens per second)
llama_print_timings:       total time =  4857.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You won, there really is someone up to no good.” Wang Miao forced himself to smile, hoping that car driver Shuo can see it.



llama_print_timings:        load time =   597.49 ms
llama_print_timings:      sample time =    14.57 ms /    32 runs   (    0.46 ms per token,  2195.84 tokens per second)
llama_print_timings: prompt eval time =   597.47 ms /    57 tokens (   10.48 ms per token,    95.40 tokens per second)
llama_print_timings:        eval time =  2610.56 ms /    31 runs   (   84.21 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  3273.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Brother, you won again.” Dashai smiled and shook his head. “I never expected this to get so far out of our control! ”



llama_print_timings:        load time =   639.83 ms
llama_print_timings:      sample time =    15.18 ms /    33 runs   (    0.46 ms per token,  2173.48 tokens per second)
llama_print_timings: prompt eval time =   639.81 ms /    64 tokens (   10.00 ms per token,   100.03 tokens per second)
llama_print_timings:        eval time =  2754.28 ms /    32 runs   (   86.07 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3464.53 ms


translated 45.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

What is your name?



llama_print_timings:        load time =   572.51 ms
llama_print_timings:      sample time =     2.70 ms /     6 runs   (    0.45 ms per token,  2220.58 tokens per second)
llama_print_timings: prompt eval time =   572.49 ms /    35 tokens (   16.36 ms per token,    61.14 tokens per second)
llama_print_timings:        eval time =   424.93 ms /     5 runs   (   84.99 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1009.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

My name is Yvonne.



llama_print_timings:        load time =   573.40 ms
llama_print_timings:      sample time =     3.61 ms /     8 runs   (    0.45 ms per token,  2216.07 tokens per second)
llama_print_timings: prompt eval time =   573.36 ms /    37 tokens (   15.50 ms per token,    64.53 tokens per second)
llama_print_timings:        eval time =   579.96 ms /     7 runs   (   82.85 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =  1169.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is your birthday?



llama_print_timings:        load time =   567.23 ms
llama_print_timings:      sample time =     3.17 ms /     7 runs   (    0.45 ms per token,  2209.60 tokens per second)
llama_print_timings: prompt eval time =   567.20 ms /    36 tokens (   15.76 ms per token,    63.47 tokens per second)
llama_print_timings:        eval time =   521.22 ms /     6 runs   (   86.87 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1102.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: In June 1947.



llama_print_timings:        load time =   589.03 ms
llama_print_timings:      sample time =     7.18 ms /    16 runs   (    0.45 ms per token,  2229.03 tokens per second)
llama_print_timings: prompt eval time =   588.99 ms /    41 tokens (   14.37 ms per token,    69.61 tokens per second)
llama_print_timings:        eval time =  1326.34 ms /    15 runs   (   88.42 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  1947.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What is your profession?



llama_print_timings:        load time =   576.07 ms
llama_print_timings:      sample time =     2.62 ms /     6 runs   (    0.44 ms per token,  2292.70 tokens per second)
llama_print_timings: prompt eval time =   576.05 ms /    35 tokens (   16.46 ms per token,    60.76 tokens per second)
llama_print_timings:        eval time =   430.47 ms /     5 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  1018.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yu Wenjie is a professor of the Department of Physics at Tsinghua University, who retired in 2004.



llama_print_timings:        load time =   584.76 ms
llama_print_timings:      sample time =    14.09 ms /    31 runs   (    0.45 ms per token,  2200.14 tokens per second)
llama_print_timings: prompt eval time =   584.73 ms /    49 tokens (   11.93 ms per token,    83.80 tokens per second)
llama_print_timings:        eval time =  2672.72 ms /    30 runs   (   89.09 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  3321.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In view of your health condition, you can request a rest during the conversation.



llama_print_timings:        load time =   584.58 ms
llama_print_timings:      sample time =     8.13 ms /    17 runs   (    0.48 ms per token,  2090.25 tokens per second)
llama_print_timings: prompt eval time =   584.53 ms /    45 tokens (   12.99 ms per token,    76.98 tokens per second)
llama_print_timings:        eval time =  1443.88 ms /    16 runs   (   90.24 ms per token,    11.08 tokens per second)
llama_print_timings:       total time =  2066.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie: Thanks, I don't need it.



llama_print_timings:        load time =   582.17 ms
llama_print_timings:      sample time =     7.64 ms /    17 runs   (    0.45 ms per token,  2224.26 tokens per second)
llama_print_timings: prompt eval time =   582.15 ms /    37 tokens (   15.73 ms per token,    63.56 tokens per second)
llama_print_timings:        eval time =  1352.26 ms /    16 runs   (   84.52 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  1969.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We are investigating a common criminal case today, which does not involve higher-level contents. This is not the main part of this investigation and we hope to end it as soon as possible with your cooperation.



llama_print_timings:        load time =   627.83 ms
llama_print_timings:      sample time =    19.69 ms /    44 runs   (    0.45 ms per token,  2234.41 tokens per second)
llama_print_timings: prompt eval time =   627.80 ms /    65 tokens (    9.66 ms per token,   103.54 tokens per second)
llama_print_timings:        eval time =  3625.56 ms /    43 runs   (   84.32 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  4346.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: I know what you mean. I will cooperate.



llama_print_timings:        load time =   588.09 ms
llama_print_timings:      sample time =     9.41 ms /    20 runs   (    0.47 ms per token,  2125.85 tokens per second)
llama_print_timings: prompt eval time =   588.05 ms /    42 tokens (   14.00 ms per token,    71.42 tokens per second)
llama_print_timings:        eval time =  1626.73 ms /    19 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  2258.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the investigation, it was found that you had suspicion of murder.



llama_print_timings:        load time =   592.22 ms
llama_print_timings:      sample time =     7.33 ms /    16 runs   (    0.46 ms per token,  2184.00 tokens per second)
llama_print_timings: prompt eval time =   592.18 ms /    49 tokens (   12.09 ms per token,    82.75 tokens per second)
llama_print_timings:        eval time =  1287.52 ms /    15 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1912.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie: I have killed two people.



llama_print_timings:        load time =   578.59 ms
llama_print_timings:      sample time =     6.15 ms /    14 runs   (    0.44 ms per token,  2275.68 tokens per second)
llama_print_timings: prompt eval time =   578.55 ms /    38 tokens (   15.23 ms per token,    65.68 tokens per second)
llama_print_timings:        eval time =  1120.73 ms /    13 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1726.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What about the time?



llama_print_timings:        load time =   571.68 ms
llama_print_timings:      sample time =     2.95 ms /     6 runs   (    0.49 ms per token,  2035.97 tokens per second)
llama_print_timings: prompt eval time =   571.63 ms /    35 tokens (   16.33 ms per token,    61.23 tokens per second)
llama_print_timings:        eval time =   419.30 ms /     5 runs   (   83.86 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  1004.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: October 21, 1979 at 2:30 p.m.



llama_print_timings:        load time =   579.68 ms
llama_print_timings:      sample time =    12.86 ms /    28 runs   (    0.46 ms per token,  2177.29 tokens per second)
llama_print_timings: prompt eval time =   579.64 ms /    45 tokens (   12.88 ms per token,    77.63 tokens per second)
llama_print_timings:        eval time =  2289.15 ms /    27 runs   (   84.78 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  2927.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Whose name is it?



llama_print_timings:        load time =   572.93 ms
llama_print_timings:      sample time =     3.18 ms /     7 runs   (    0.45 ms per token,  2201.26 tokens per second)
llama_print_timings: prompt eval time =   572.89 ms /    39 tokens (   14.69 ms per token,    68.08 tokens per second)
llama_print_timings:        eval time =   515.26 ms /     6 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1101.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie: The base political commissar, Lei Zhicheng and my husband, Yang Wennin.



llama_print_timings:        load time =   585.41 ms
llama_print_timings:      sample time =    14.29 ms /    29 runs   (    0.49 ms per token,  2029.82 tokens per second)
llama_print_timings: prompt eval time =   585.37 ms /    48 tokens (   12.20 ms per token,    82.00 tokens per second)
llama_print_timings:        eval time =  2402.57 ms /    28 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3053.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The motive for my crime was to make money quickly. I had no financial resources and needed to earnings quick cash.



llama_print_timings:        load time =   591.16 ms
llama_print_timings:      sample time =    12.46 ms /    27 runs   (    0.46 ms per token,  2167.63 tokens per second)
llama_print_timings: prompt eval time =   591.12 ms /    40 tokens (   14.78 ms per token,    67.67 tokens per second)
llama_print_timings:        eval time =  2211.91 ms /    26 runs   (   85.07 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2860.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: I... Can you assume that you are familiar with the background at that time?



llama_print_timings:        load time =   580.05 ms
llama_print_timings:      sample time =    11.30 ms /    24 runs   (    0.47 ms per token,  2123.71 tokens per second)
llama_print_timings: prompt eval time =   579.98 ms /    47 tokens (   12.34 ms per token,    81.04 tokens per second)
llama_print_timings:        eval time =  1999.11 ms /    23 runs   (   86.92 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  2631.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have a basic understanding, but I will ask if there are any questions.



llama_print_timings:        load time =   591.81 ms
llama_print_timings:      sample time =     7.52 ms /    17 runs   (    0.44 ms per token,  2260.94 tokens per second)
llama_print_timings: prompt eval time =   591.77 ms /    41 tokens (   14.43 ms per token,    69.28 tokens per second)
llama_print_timings:        eval time =  1387.18 ms /    16 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2012.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Zhicheng received the same information as well. He is a typical political officer in that era, and he is very sensitive to class struggle. In the past, we would call this kind of person "class struggle mentally hypersensitive". His task force was reading all the incoming and outgoing messages backstage, storing them into an encrypted file which only he can access. By reading from this file, he discovered the information about alien civilization that he received last night. In the afternoon after I'd just been told that I was pregnant, Lei Zhicheng called me to his office and showed me the screen of his terminal computer...



llama_print_timings:        load time =   912.40 ms
llama_print_timings:      sample time =    61.66 ms /   137 runs   (    0.45 ms per token,  2221.83 tokens per second)
llama_print_timings: prompt eval time =   912.35 ms /   234 tokens (    3.90 ms per token,   256.48 tokens per second)
llama_print_timings:        eval time = 11780.05 ms /   136 runs   (   86.62 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 12978.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Eight hours have passed since the first message was received. You didn't report anything, but instead you deleted or concealed the original information, is that right?”



llama_print_timings:        load time =   600.72 ms
llama_print_timings:      sample time =    16.96 ms /    36 runs   (    0.47 ms per token,  2122.52 tokens per second)
llama_print_timings: prompt eval time =   600.69 ms /    61 tokens (    9.85 ms per token,   101.55 tokens per second)
llama_print_timings:        eval time =  3010.62 ms /    35 runs   (   86.02 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3691.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I didn't answer with my head down.



llama_print_timings:        load time =   575.83 ms
llama_print_timings:      sample time =     5.12 ms /    11 runs   (    0.47 ms per token,  2146.76 tokens per second)
llama_print_timings: prompt eval time =   575.79 ms /    41 tokens (   14.04 ms per token,    71.21 tokens per second)
llama_print_timings:        eval time =   873.19 ms /    10 runs   (   87.32 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  1472.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I am clear about your next move, you intend to return the call. If it wasn't for me discovering in time, the entire human civilization would have been destroyed! Certainly not that we fear invaders from the cosmos; if something like that really did happen, the invaders of the universe will be lost in a sea of people's warfare!”



llama_print_timings:        load time =   668.20 ms
llama_print_timings:      sample time =    36.65 ms /    80 runs   (    0.46 ms per token,  2182.99 tokens per second)
llama_print_timings: prompt eval time =   668.16 ms /    99 tokens (    6.75 ms per token,   148.17 tokens per second)
llama_print_timings:        eval time =  6709.49 ms /    79 runs   (   84.93 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  7545.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I see. He still does not know that I have responded to his message. When I put the response in the transmit buffer, I am using a custom interface instead of using a standard file-based interface. This has unintentionally circumvented his surveillance program.



llama_print_timings:        load time =   602.51 ms
llama_print_timings:      sample time =    27.36 ms /    59 runs   (    0.46 ms per token,  2156.35 tokens per second)
llama_print_timings: prompt eval time =   602.49 ms /    64 tokens (    9.41 ms per token,   106.23 tokens per second)
llama_print_timings:        eval time =  4893.58 ms /    58 runs   (   84.37 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  5622.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You, Ye Wenjie, are capable of doing such a thing. You have been nursing your deep-seated hatred against the Party and people, seeking to seek revenge wherever possible. Have you considered the consequences?



llama_print_timings:        load time =   616.66 ms
llama_print_timings:      sample time =    22.24 ms /    50 runs   (    0.44 ms per token,  2248.30 tokens per second)
llama_print_timings: prompt eval time =   616.62 ms /    67 tokens (    9.20 ms per token,   108.66 tokens per second)
llama_print_timings:        eval time =  4179.60 ms /    49 runs   (   85.30 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  4896.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course I know, so I nodded and Lei Zhi-cheng remained silent for a moment. But then he said something that took me by surprise.



llama_print_timings:        load time =   594.50 ms
llama_print_timings:      sample time =    15.37 ms /    35 runs   (    0.44 ms per token,  2277.61 tokens per second)
llama_print_timings: prompt eval time =   594.46 ms /    53 tokens (   11.22 ms per token,    89.16 tokens per second)
llama_print_timings:        eval time =  2821.89 ms /    34 runs   (   83.00 ms per token,    12.05 tokens per second)
llama_print_timings:       total time =  3486.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Ye Wenjie, I will never have any compassion for you. You have been a class enemy with the people. But I am an old comrade-in-arms of Yang Weining and I can't see him and his children fall completely and cannot also let his child follow it. You have your own children, aren't you?”



llama_print_timings:        load time =   654.54 ms
llama_print_timings:      sample time =    36.17 ms /    78 runs   (    0.46 ms per token,  2156.54 tokens per second)
llama_print_timings: prompt eval time =   654.50 ms /    94 tokens (    6.96 ms per token,   143.62 tokens per second)
llama_print_timings:        eval time =  6615.50 ms /    77 runs   (   85.92 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  7440.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is not just a casual remark. If this had happened in that era, no matter what the relationship between my husband and the incident, he would have been greatly impacted and also his unborn children would be affected too.



llama_print_timings:        load time =   615.73 ms
llama_print_timings:      sample time =    21.76 ms /    48 runs   (    0.45 ms per token,  2205.98 tokens per second)
llama_print_timings: prompt eval time =   615.70 ms /    68 tokens (    9.05 ms per token,   110.44 tokens per second)
llama_print_timings:        eval time =  4070.01 ms /    47 runs   (   86.60 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  4785.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lei Zhicheng lowered his voice and said, “The matter is still only between us. Now what we need to do is minimize the impact of this situation by making sure not to mention anything about it to anyone else, including Yang Wenning. The rest of the issues are for me to handle.”



llama_print_timings:        load time =   688.64 ms
llama_print_timings:      sample time =    31.03 ms /    67 runs   (    0.46 ms per token,  2158.92 tokens per second)
llama_print_timings: prompt eval time =   688.60 ms /   109 tokens (    6.32 ms per token,   158.29 tokens per second)
llama_print_timings:        eval time =  5625.48 ms /    66 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6458.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I immediately understood Lei Zhicheng's thought: he wanted to be the first human being who discovered extraterrestrial civilization, which was a great opportunity to be remembered for all time.



llama_print_timings:        load time =   602.22 ms
llama_print_timings:      sample time =    19.27 ms /    42 runs   (    0.46 ms per token,  2179.78 tokens per second)
llama_print_timings: prompt eval time =   602.18 ms /    60 tokens (   10.04 ms per token,    99.64 tokens per second)
llama_print_timings:        eval time =  3607.02 ms /    41 runs   (   87.98 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  4297.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I promised him and then left the office, by then I had decided everything.



llama_print_timings:        load time =   580.96 ms
llama_print_timings:      sample time =     7.55 ms /    17 runs   (    0.44 ms per token,  2252.85 tokens per second)
llama_print_timings: prompt eval time =   580.91 ms /    43 tokens (   13.51 ms per token,    74.02 tokens per second)
llama_print_timings:        eval time =  1348.60 ms /    16 runs   (   84.29 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1963.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I took a small screwdriver and went into the equipment room where the front-end processing module was located, opened the cabinet, and gently loosened the bolt of the bottom ground wire. As I usually had to check the devices, no one noticed what I did. At that moment, the ground resistance rose from 0.6 ohms to 5 ohms, and the interference in the receiving system suddenly increased greatly.



llama_print_timings:        load time =   683.78 ms
llama_print_timings:      sample time =    40.70 ms /    92 runs   (    0.44 ms per token,  2260.50 tokens per second)
llama_print_timings: prompt eval time =   683.74 ms /   107 tokens (    6.39 ms per token,   156.49 tokens per second)
llama_print_timings:        eval time =  7692.92 ms /    91 runs   (   84.54 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  8563.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =  1135.83 ms
llama_print_timings:      sample time =     0.94 ms /     2 runs   (    0.47 ms per token,  2118.64 tokens per second)
llama_print_timings: prompt eval time =  1135.78 ms /   336 tokens (    3.38 ms per token,   295.83 tokens per second)
llama_print_timings:        eval time =    95.06 ms /     1 runs   (   95.06 ms per token,    10.52 tokens per second)
llama_print_timings:       total time =  1234.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The soldier with the most experience, probably to impress others, is always in front of the line when dangerous and hard work needs to be done. Even if they have a very good skill level and know all the rules and regulations, I would bet that they'll be the ones to step forward and take care of it.



llama_print_timings:        load time =   973.09 ms
llama_print_timings:      sample time =    29.94 ms /    67 runs   (    0.45 ms per token,  2237.58 tokens per second)
llama_print_timings: prompt eval time =   973.04 ms /   258 tokens (    3.77 ms per token,   265.15 tokens per second)
llama_print_timings:        eval time =  5843.03 ms /    66 runs   (   88.53 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  6954.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He arrived at this time.



llama_print_timings:        load time =   572.72 ms
llama_print_timings:      sample time =     3.08 ms /     7 runs   (    0.44 ms per token,  2271.99 tokens per second)
llama_print_timings: prompt eval time =   572.69 ms /    38 tokens (   15.07 ms per token,    66.35 tokens per second)
llama_print_timings:        eval time =   518.68 ms /     6 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  1104.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Once he had ascertained the cause of the situation, he looked down at the cliff and said that if it were to check grounding wire, they would need to dig. As he was alone there, it would be too difficult for him to do this on his own. He asked to go down to help. However, when I insisted on getting a second rope, he said it was sufficient, as it was quite thick and strong enough to support two people simultaneously. I persisted in my request, but he had already rushed down the cliff with the first rope. When I looked down, I saw that both he and Lei Zhiqing were checking the grounding wire together, with Lei Zhiqing in front of him.



llama_print_timings:        load time =   793.81 ms
llama_print_timings:      sample time =    71.18 ms /   159 runs   (    0.45 ms per token,  2233.77 tokens per second)
llama_print_timings: prompt eval time =   793.77 ms /   173 tokens (    4.59 ms per token,   217.95 tokens per second)
llama_print_timings:        eval time = 13827.74 ms /   158 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 14947.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's never too late to try again. I pulled out the steel saw and saw through the rope.



llama_print_timings:        load time =   588.59 ms
llama_print_timings:      sample time =    10.71 ms /    24 runs   (    0.45 ms per token,  2240.69 tokens per second)
llama_print_timings: prompt eval time =   588.56 ms /    47 tokens (   12.52 ms per token,    79.86 tokens per second)
llama_print_timings:        eval time =  1957.18 ms /    23 runs   (   85.09 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2593.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I ask a question, and you don't record the response. How did you feel at that moment?



llama_print_timings:        load time =   585.36 ms
llama_print_timings:      sample time =    10.52 ms /    23 runs   (    0.46 ms per token,  2186.10 tokens per second)
llama_print_timings: prompt eval time =   585.32 ms /    45 tokens (   13.01 ms per token,    76.88 tokens per second)
llama_print_timings:        eval time =  1907.51 ms /    22 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2543.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

叶文洁: I did it calmly and without feeling, finding an ideal career for which to devote myself. The price, whether my own or that of others, doesn't matter to me either. I also know that the entire human race will pay a historical sacrifice unparalleled in this regard. This is just the beginning.



llama_print_timings:        load time =   660.28 ms
llama_print_timings:      sample time =    32.20 ms /    71 runs   (    0.45 ms per token,  2204.97 tokens per second)
llama_print_timings: prompt eval time =   660.24 ms /    91 tokens (    7.26 ms per token,   137.83 tokens per second)
llama_print_timings:        eval time =  6030.84 ms /    70 runs   (   86.15 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  6840.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Alright, go on.



llama_print_timings:        load time =   576.57 ms
llama_print_timings:      sample time =     3.13 ms /     7 runs   (    0.45 ms per token,  2237.85 tokens per second)
llama_print_timings: prompt eval time =   576.54 ms /    38 tokens (   15.17 ms per token,    65.91 tokens per second)
llama_print_timings:        eval time =   502.85 ms /     6 runs   (   83.81 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1092.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: I heard two short cries and then the sound of someone's body flying off a cliff into rocks. After that, I saw a stream at the bottom of the cliff change to red... There is nothing else I can tell you about this.



llama_print_timings:        load time =   647.26 ms
llama_print_timings:      sample time =    27.47 ms /    60 runs   (    0.46 ms per token,  2183.96 tokens per second)
llama_print_timings: prompt eval time =   647.23 ms /    82 tokens (    7.89 ms per token,   126.69 tokens per second)
llama_print_timings:        eval time =  5034.37 ms /    59 runs   (   85.33 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5809.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

OK, this is the record. Please read it carefully and sign if everything is correct.



llama_print_timings:        load time =   628.40 ms
llama_print_timings:      sample time =     8.47 ms /    19 runs   (    0.45 ms per token,  2242.95 tokens per second)
llama_print_timings: prompt eval time =   628.36 ms /    52 tokens (   12.08 ms per token,    82.76 tokens per second)
llama_print_timings:        eval time =  1502.81 ms /    18 runs   (   83.49 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  2172.56 ms


translated 42.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After Lei Zhi-cheng and Yang Huining died in the accident, their superior handled it as a common work accident. Everyone at the base gave her a good rating on their relationship with Jiang Wenjie. No one suspected her.



llama_print_timings:        load time =   628.92 ms
llama_print_timings:      sample time =    25.82 ms /    56 runs   (    0.46 ms per token,  2169.28 tokens per second)
llama_print_timings: prompt eval time =   628.89 ms /    71 tokens (    8.86 ms per token,   112.90 tokens per second)
llama_print_timings:        eval time =  4687.60 ms /    55 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  5432.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The new base commander was soon installed, and life resumed its previous calm. Lei Wenjie's unborn child grew day by day, but she also felt the changes outside world.



llama_print_timings:        load time =   642.78 ms
llama_print_timings:      sample time =    19.09 ms /    42 runs   (    0.45 ms per token,  2199.53 tokens per second)
llama_print_timings: prompt eval time =   642.74 ms /    65 tokens (    9.89 ms per token,   101.13 tokens per second)
llama_print_timings:        eval time =  3417.13 ms /    41 runs   (   83.34 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  4145.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Today, the squad leader named Ye Wenjie was called to go on a patrol trip. When she entered the gatehouse, she was surprised: Here were three children, two male and female, about 15-6 years old, who were wearing old cotton coats and dog fur caps. She was told that they were from Qijiatan village. They wanted to ask some study questions. Ye Wenjie thought in silence, how dare these kids come to the radar peak which is an absolute military restricted zone? The sentry told her that they had just received orders for a reduction of the base's security level and other local people were allowed to go up to the radar peak as long as they did not enter the base. Yesterday, several local farmers came here to send food.



llama_print_timings:        load time =   850.97 ms
llama_print_timings:      sample time =    78.38 ms /   175 runs   (    0.45 ms per token,  2232.66 tokens per second)
llama_print_timings: prompt eval time =   850.93 ms /   199 tokens (    4.28 ms per token,   233.86 tokens per second)
llama_print_timings:        eval time = 15348.71 ms /   174 runs   (   88.21 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 16554.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A child takes out a book that has been used so much the pages have turned yellow and crinkled like leather. He asks an intermediate-school physics problem with a heavy accent: According to the textbook, falling objects accelerate all the time but eventually they always fall at a constant speed, which he thinks is strange. They pondered this for several nights without understanding it.



llama_print_timings:        load time =   673.68 ms
llama_print_timings:      sample time =    37.68 ms /    82 runs   (    0.46 ms per token,  2175.99 tokens per second)
llama_print_timings: prompt eval time =   673.65 ms /   101 tokens (    6.67 ms per token,   149.93 tokens per second)
llama_print_timings:        eval time =  6962.48 ms /    81 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  7806.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You guys run so far just to ask this?” asked Ye Wenjie.



llama_print_timings:        load time =   579.97 ms
llama_print_timings:      sample time =     8.59 ms /    19 runs   (    0.45 ms per token,  2211.62 tokens per second)
llama_print_timings: prompt eval time =   579.93 ms /    46 tokens (   12.61 ms per token,    79.32 tokens per second)
llama_print_timings:        eval time =  1573.08 ms /    18 runs   (   87.39 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  2192.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You don't know, Miss Ye? The national college entrance exam is held outside now!” the girl said excitedly.



llama_print_timings:        load time =   591.65 ms
llama_print_timings:      sample time =    12.15 ms /    27 runs   (    0.45 ms per token,  2222.22 tokens per second)
llama_print_timings: prompt eval time =   591.60 ms /    52 tokens (   11.38 ms per token,    87.90 tokens per second)
llama_print_timings:        eval time =  2185.90 ms /    26 runs   (   84.07 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  2831.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What about the college entrance exam?”



llama_print_timings:        load time =   571.68 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2250.56 tokens per second)
llama_print_timings: prompt eval time =   571.65 ms /    33 tokens (   17.32 ms per token,    57.73 tokens per second)
llama_print_timings:        eval time =   688.51 ms /     8 runs   (   86.06 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  1277.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's just about going to college! Who studies better, who gets a higher score will go! Two years ago, don't you know that?!"



llama_print_timings:        load time =   613.05 ms
llama_print_timings:      sample time =    15.59 ms /    35 runs   (    0.45 ms per token,  2244.45 tokens per second)
llama_print_timings: prompt eval time =   613.01 ms /    57 tokens (   10.75 ms per token,    92.98 tokens per second)
llama_print_timings:        eval time =  2838.33 ms /    34 runs   (   83.48 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  3520.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Not recommended?”



llama_print_timings:        load time =   575.78 ms
llama_print_timings:      sample time =     2.23 ms /     5 runs   (    0.45 ms per token,  2247.19 tokens per second)
llama_print_timings: prompt eval time =   575.74 ms /    35 tokens (   16.45 ms per token,    60.79 tokens per second)
llama_print_timings:        eval time =   345.48 ms /     4 runs   (   86.37 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =   931.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, anyone can take it, even the kids in the ‘black five classes’ in our village can do it!”



llama_print_timings:        load time =   581.64 ms
llama_print_timings:      sample time =    12.56 ms /    27 runs   (    0.47 ms per token,  2150.20 tokens per second)
llama_print_timings: prompt eval time =   581.61 ms /    50 tokens (   11.63 ms per token,    85.97 tokens per second)
llama_print_timings:        eval time =  2160.82 ms /    26 runs   (   83.11 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =  2800.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie stared at the scene for a while before responding to their questions. She explained that it was due to air resistance and gravity balancing, and promised them to visit her if they encountered any difficulties in their studies.



llama_print_timings:        load time =   657.67 ms
llama_print_timings:      sample time =    22.74 ms /    51 runs   (    0.45 ms per token,  2242.65 tokens per second)
llama_print_timings: prompt eval time =   657.63 ms /    93 tokens (    7.07 ms per token,   141.42 tokens per second)
llama_print_timings:        eval time =  4167.62 ms /    50 runs   (   83.35 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  4931.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After three days, another seven children came to find Ye Wenjie. Besides the three who had visited before, there were four other children from farming villages. The thirteenth child is fifteen in number, accompanied by a middle school teacher from the town, because of shortage of manpower, he teaches physics, mathematics and chemistry. He came to seek help from Ye Wenjie about teaching-related issues. This person has reached the age of half century and full of wind and frost on his face before coming to see Ye Wenjie. After leaving the gate house, Ye Wenhao heard him say to students in front: "Children, this is a real scientist!". From then on, children came to ask questions about science periodically, sometimes they come in large numbers, and after getting permission from base security personnel responsible for safety, they were brought to the dining room where Ye Wenjie supported a blackboard and gave lectures.



llama_print_timings:        load time =   896.93 ms
llama_print_timings:      sample time =    92.50 ms /   205 runs   (    0.45 ms per token,  2216.22 tokens per second)
llama_print_timings: prompt eval time =   896.86 ms /   227 tokens (    3.95 ms per token,   253.11 tokens per second)
llama_print_timings:        eval time = 17944.49 ms /   204 runs   (   87.96 ms per token,    11.37 tokens per second)
llama_print_timings:       total time = 19265.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1976年除夕夜，叶文浩下班后天已经完全黑了。基地的人大部分已在三天假期中下了山，到处都是一片寂静。叶文洁回到自己的房间，这里曾是她和杨卫宁的家，现在空荡荡的，只有腹中的孩子陪伴着她。外面的寒夜中，大兴安岭的寒风呼啸着，风中隐隐传来远处齐家屯的鞭炮声。孤寂像一只巨掌压着叶文洁，她觉得自己被越压越小，最后缩到这个世界看不到的一个小角落去了……就在这时，响起了敲门声，开门后叶文洁首先看到哨兵，他身后有几支松明子的火光在寒风中摇曳着。举火把的是一群孩子，他们脸冻得通红，狗皮帽上有冰碴子，进屋后带着一股寒气。有两个男孩子冻得最厉害，他们穿得很单薄，却用两件厚棉衣裹着一个什么东西抱在怀里，打开来是一个大瓷盆，里面的酸菜猪肉馅饺子还冒着热汽。



llama_print_timings:        load time =  1034.76 ms
llama_print_timings:      sample time =   110.00 ms /   246 runs   (    0.45 ms per token,  2236.42 tokens per second)
llama_print_timings: prompt eval time =  1034.71 ms /   279 tokens (    3.71 ms per token,   269.64 tokens per second)
llama_print_timings:        eval time = 21758.03 ms /   245 runs   (   88.81 ms per token,    11.26 tokens per second)
llama_print_timings:       total time = 23298.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That year, after sending out signals to the sun for eight months, Ye Wenjie was about to give birth. Due to her weak body and fetal position not right, the Base Health Clinic had no ability to deliver a baby and they took Ye Wenjie to the nearest town hospital.



llama_print_timings:        load time =   631.12 ms
llama_print_timings:      sample time =    29.03 ms /    65 runs   (    0.45 ms per token,  2239.29 tokens per second)
llama_print_timings: prompt eval time =   631.09 ms /    77 tokens (    8.20 ms per token,   122.01 tokens per second)
llama_print_timings:        eval time =  5440.21 ms /    64 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  6204.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is a ghostly passage of Yue Wenjie, who encountered difficulties in giving birth. After intense pain and heavy bleeding, she was knocked unconscious. She thought the sun seemed to be spinning slowly around her with its hot rays while in coma, and thought that this may be her eternal destination and punishment for the super betrayal. In great fear of herself, but also for her baby — the child still inside her belly? Or is she suffering from eternal pain together? After a long time, the suns seem to have backed away and shrunk into crystals, bringing relief and coolness around her. Finally, she awakened.



llama_print_timings:        load time =   856.76 ms
llama_print_timings:      sample time =    67.08 ms /   149 runs   (    0.45 ms per token,  2221.20 tokens per second)
llama_print_timings: prompt eval time =   856.71 ms /   206 tokens (    4.16 ms per token,   240.45 tokens per second)
llama_print_timings:        eval time = 12903.13 ms /   148 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 14068.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She heard a cry and turned her face to the baby, seeing its pink little cheeks and wet eyes.



llama_print_timings:        load time =   602.31 ms
llama_print_timings:      sample time =    11.06 ms /    24 runs   (    0.46 ms per token,  2170.77 tokens per second)
llama_print_timings: prompt eval time =   602.27 ms /    61 tokens (    9.87 ms per token,   101.28 tokens per second)
llama_print_timings:        eval time =  1982.15 ms /    23 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2634.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The doctor told Ye Wenjie that she had lost more than two thousand milliliters of blood. Several dozen villagers from Qijia Tun came to give her blood, many of whom he had helped as a teacher, but even more of them were just the parents of her pupils who had heard about her from their children and were willing to donate blood on hearing that she was dying.



llama_print_timings:        load time =   653.02 ms
llama_print_timings:      sample time =    38.19 ms /    84 runs   (    0.45 ms per token,  2199.41 tokens per second)
llama_print_timings: prompt eval time =   652.98 ms /    90 tokens (    7.26 ms per token,   137.83 tokens per second)
llama_print_timings:        eval time =  7129.06 ms /    83 runs   (   85.89 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  7958.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The days after the birth of her baby became a problem for Ye Wenjie, who couldn't take care of her child herself because she was weak. She also had no family and nobody to look after her. At this time, an elderly couple from Qijiatun came looking for their base leader, saying they could bring Ye Wenjie back home with her baby to be taken care of. The man used to hunt wild game and gather herbs before there were fewer and fewer trees in the vicinity, so he started farming instead. But people still called him Qiji Shouer. They had two sons and two daughters, their older son was a soldier in another city, while their younger son had got married and lived with them. Ye Wenjie wasn't yet vindicated at that time, so the base leader felt very hard-pressed, but only this method could be used, so they had to have them take Ye Wenjie back from the hospital in Qiji.



llama_print_timings:        load time =   825.62 ms
llama_print_timings:      sample time =    94.53 ms /   213 runs   (    0.44 ms per token,  2253.30 tokens per second)
llama_print_timings: prompt eval time =   825.58 ms /   195 tokens (    4.23 ms per token,   236.20 tokens per second)
llama_print_timings:        eval time = 18555.77 ms /   212 runs   (   87.53 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 19819.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

叶文洁 spent more than half a year in this big Xing'anling farmhouse, where she had no milk after giving birth. During this time, Yang Dong fed on Yi Laiyu's milk growing up. The most of the milk for Yang Dong was provided by Qili Huoxueer's niece, Fengliang, who lived in the farm and ate mostly brown rice every day while milk-feeding two children. Other postpartum women from the village also came to feed her, they liked her very much and said that she had the spirit of her mother. Slowly, Qili Huoxueer's house became the gathering place for all the villagers who wanted to talk about woman issues. Yelu Fanjie herself started to feel curious about women's issues after spending more time with these women in the village. She often watched several villagers holding up copper-made smoke pipe and whispering to each other in a peaceful sunny day while surrounded by playing children and lazy dog, all of them bathed in warmth and shimmering silver light like their sweat hair. One day, one of them 


llama_print_timings:        load time =  1153.81 ms
llama_print_timings:      sample time =   130.69 ms /   290 runs   (    0.45 ms per token,  2219.04 tokens per second)
llama_print_timings: prompt eval time =  1153.76 ms /   338 tokens (    3.41 ms per token,   292.96 tokens per second)
llama_print_timings:        eval time = 25757.53 ms /   289 runs   (   89.13 ms per token,    11.22 tokens per second)
llama_print_timings:       total time = 27517.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She didn't take much notice of it at first, but when she saw how those husky men treated their wives and talked dirty in front of her, she started to treasure this kind of respect. Sometimes, they brought wild rabbits or chickens from the hunting ground for Zhi Xiang, as well as hand-made and unique figurines made by Yang Dong that he brought back from his trips.



llama_print_timings:        load time =   821.92 ms
llama_print_timings:      sample time =    41.57 ms /    90 runs   (    0.46 ms per token,  2164.87 tokens per second)
llama_print_timings: prompt eval time =   821.89 ms /   177 tokens (    4.64 ms per token,   215.36 tokens per second)
llama_print_timings:        eval time =  7778.12 ms /    89 runs   (   87.39 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  8789.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In Ye Wenjie's memory, these days don't seem to belong to her. They are like a fragment from someone else's life drifting in. Like a feather flying away from her own life. These memories are like European classic oil paintings. They aren't Chinese paintings or oil paintings. Chinese paintings are too white, but the lives at Qijiazhuang are not empty at all. They are full of warm colors: thick and soft靰秘草 fires, cigarette smoke from bronze smoke stoves, thick brown rice, 65-degree honey wine… These things flow away in calmness and peace, just like a stream by the side of the village.



llama_print_timings:        load time =   817.07 ms
llama_print_timings:      sample time =    69.46 ms /   155 runs   (    0.45 ms per token,  2231.53 tokens per second)
llama_print_timings: prompt eval time =   817.02 ms /   186 tokens (    4.39 ms per token,   227.66 tokens per second)
llama_print_timings:        eval time = 13413.98 ms /   154 runs   (   87.10 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 14552.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The most memorable for Ye Wenjie were those nights. Qi Xilian's son went to sell mushrooms in the city, and she was living with Dafeng in this time when Chiji屯 did not have electricity yet. At that time, Ye Wenjie was reading books and Dafeng was doing needlework. Ye Wenjie would always inadvertently bring her eyes and hair close to the oil lamp, and it often caused her hair to snap at a low height with an annoyed sound. They would then look up at each other and laugh at this situation. Dafeng had never gone out before as she was very good at seeing through the light of coal fire. The two under 6-month old children were sleeping in her bed, their breaths could be heard clearly and they looked so adorable that it made Ye Wenjie cry with tears. She did not know who she felt closer to in such a situation —her husband or someone else.



llama_print_timings:        load time =   977.87 ms
llama_print_timings:      sample time =    96.94 ms /   215 runs   (    0.45 ms per token,  2217.91 tokens per second)
llama_print_timings: prompt eval time =   977.83 ms /   261 tokens (    3.75 ms per token,   266.92 tokens per second)
llama_print_timings:        eval time = 18937.66 ms /   214 runs   (   88.49 ms per token,    11.30 tokens per second)
llama_print_timings:       total time = 20360.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She put down her book and saw Dafeng place his shoes on his knee, staring blankly at the lamp flames. She found that Yewanjie was looking at herself and Dafeng asked suddenly:



llama_print_timings:        load time =   659.66 ms
llama_print_timings:      sample time =    22.23 ms /    49 runs   (    0.45 ms per token,  2203.73 tokens per second)
llama_print_timings: prompt eval time =   659.61 ms /    68 tokens (    9.70 ms per token,   103.09 tokens per second)
llama_print_timings:        eval time =  4098.73 ms /    48 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  4858.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Sister, why don't the stars fall from the sky?”



llama_print_timings:        load time =   576.60 ms
llama_print_timings:      sample time =     7.32 ms /    16 runs   (    0.46 ms per token,  2185.20 tokens per second)
llama_print_timings: prompt eval time =   576.56 ms /    44 tokens (   13.10 ms per token,    76.31 tokens per second)
llama_print_timings:        eval time =  1290.62 ms /    15 runs   (   86.04 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  1899.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaves looked carefully at Dafeng, the oil lamp is a brilliant painter, created this solemn-colored but also with bright classical painting. Dafeng covered a cotton jacket, and red belly button and rounded arms showed out. The oilstove highlighted her figure, applying the most striking color to her best parts and hiding the rest in darkness. The background is also blurred, everything submerged in a soft dark light, but still noticeable is that on the ground there is a red halo, and this halo is not from the oilstove, it's the warm air blowing out of the window that carves beautiful ice patterns on it.



llama_print_timings:        load time =   795.93 ms
llama_print_timings:      sample time =    64.72 ms /   144 runs   (    0.45 ms per token,  2224.90 tokens per second)
llama_print_timings: prompt eval time =   795.88 ms /   176 tokens (    4.52 ms per token,   221.14 tokens per second)
llama_print_timings:        eval time = 12429.01 ms /   143 runs   (   86.92 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 13520.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Do you fear that the stars will fall down?" Ye Wenjie lightly asked.



llama_print_timings:        load time =   577.03 ms
llama_print_timings:      sample time =     9.34 ms /    21 runs   (    0.44 ms per token,  2248.88 tokens per second)
llama_print_timings: prompt eval time =   577.00 ms /    45 tokens (   12.82 ms per token,    77.99 tokens per second)
llama_print_timings:        eval time =  1716.58 ms /    20 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2334.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What are you afraid of? They're so small.""



llama_print_timings:        load time =   578.25 ms
llama_print_timings:      sample time =     6.87 ms /    15 runs   (    0.46 ms per token,  2184.04 tokens per second)
llama_print_timings: prompt eval time =   578.21 ms /    46 tokens (   12.57 ms per token,    79.56 tokens per second)
llama_print_timings:        eval time =  1202.26 ms /    14 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1811.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But she finally didn't answer him as a astronomer, just saying “They are very far away and can never fall.”



llama_print_timings:        load time =   636.78 ms
llama_print_timings:      sample time =    12.02 ms /    27 runs   (    0.45 ms per token,  2246.26 tokens per second)
llama_print_timings: prompt eval time =   636.74 ms /    60 tokens (   10.61 ms per token,    94.23 tokens per second)
llama_print_timings:        eval time =  2204.56 ms /    26 runs   (   84.79 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  2894.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She closed her eyes and imagined the entire world around the little house. It was a dark ball of space that surrounded the little house, with stars in the darkness and all kinds of mountains like Hualin Mountain on its surface. The universe is flat, stretching out in every direction and always with an edge. In this toy box-like cosmos, she felt particularly comfortable…. Suddenly, these images began to become her dreams.



llama_print_timings:        load time =   921.81 ms
llama_print_timings:      sample time =    41.86 ms /    94 runs   (    0.45 ms per token,  2245.85 tokens per second)
llama_print_timings: prompt eval time =   921.77 ms /   237 tokens (    3.89 ms per token,   257.12 tokens per second)
llama_print_timings:        eval time =  8008.51 ms /    93 runs   (   86.11 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  9119.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In this remote village deep in the Great Xing'an Mountain, something inside Lei Wenjie was melting. A small lake of clear water had melted on her frozen mind like snow under the sunlight.



llama_print_timings:        load time =   622.58 ms
llama_print_timings:      sample time =    22.45 ms /    49 runs   (    0.46 ms per token,  2182.63 tokens per second)
llama_print_timings: prompt eval time =   622.54 ms /    67 tokens (    9.29 ms per token,   107.62 tokens per second)
llama_print_timings:        eval time =  4105.20 ms /    48 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4828.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Years passed after Yang Dong was born in Hongba base. Then, Ye Wenjie received the notice that her and her father's cases had been completely cleared up; shortly thereafter she got the letter from her alma mater saying she could go back to work immediately. With it was also a sizable sum of money which was the wages of her father after implementing the new policy. In the meeting, Ye Wenjie finally became a member of CCP again.



llama_print_timings:        load time =   685.54 ms
llama_print_timings:      sample time =    45.75 ms /   102 runs   (    0.45 ms per token,  2229.41 tokens per second)
llama_print_timings: prompt eval time =   685.50 ms /   117 tokens (    5.86 ms per token,   170.68 tokens per second)
llama_print_timings:        eval time =  8780.35 ms /   101 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  9672.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie remained calm and did not show any excitement as she faced all of this. She didn't care about the outside world, preferring to stay at the quiet Red Bank base instead but for the sake of her child's education, she still left the place where she had been expecting to spend her entire life and returned to her alma mater.



llama_print_timings:        load time =   643.37 ms
llama_print_timings:      sample time =    34.97 ms /    77 runs   (    0.45 ms per token,  2202.01 tokens per second)
llama_print_timings: prompt eval time =   643.34 ms /    83 tokens (    7.75 ms per token,   129.01 tokens per second)
llama_print_timings:        eval time =  6427.73 ms /    76 runs   (   84.58 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  7228.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As she walked out of the deep mountains, Yue Wenjie was filled with the feeling of spring. "The Cultural Revolution has finally ended," she thought, and everything was in the process of revival. Though the bloody period had just come to an end, people's wounds were still being licked, but the brighter future seemed to be revealed in everyone's eyes. Children with their own children appeared at universities, Chinese classic books were sold out in bookstores, technology innovations became a sensation in factories, and scientific research was covered by a halo of grace. Science and technology were now the only keys to unlock the future, and people approached science like little schoolchildren, their efforts were sincere but practical. At the first national scientific conference, Guo Moyi announced that the spring of science had arrived.



llama_print_timings:        load time =   807.36 ms
llama_print_timings:      sample time =    80.34 ms /   179 runs   (    0.45 ms per token,  2227.92 tokens per second)
llama_print_timings: prompt eval time =   807.32 ms /   180 tokens (    4.49 ms per token,   222.96 tokens per second)
llama_print_timings:        eval time = 15735.64 ms /   178 runs   (   88.40 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 16911.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Is this the end of crazy? Science and reason start to return? Lei Wenjie asked himself more than once.



llama_print_timings:        load time =   601.67 ms
llama_print_timings:      sample time =    12.43 ms /    28 runs   (    0.44 ms per token,  2251.89 tokens per second)
llama_print_timings: prompt eval time =   601.63 ms /    51 tokens (   11.80 ms per token,    84.77 tokens per second)
llama_print_timings:        eval time =  2339.92 ms /    27 runs   (   86.66 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  2999.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Until she left the Red Bank base, Ye Wenjie never received another message from the alien world. She knew that it would take at least eight years for her to receive a response to her message. But since she left the base, she no longer had the means to receive messages from the outside world.



llama_print_timings:        load time =   633.27 ms
llama_print_timings:      sample time =    29.21 ms /    66 runs   (    0.44 ms per token,  2259.11 tokens per second)
llama_print_timings: prompt eval time =   633.22 ms /    80 tokens (    7.92 ms per token,   126.34 tokens per second)
llama_print_timings:        eval time =  5505.72 ms /    65 runs   (   84.70 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  6270.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It was such a significant matter, but she accomplished it alone in silence. This produced an unreal feeling as time passed and the illusion grew stronger: The matter felt like a dream or a fantasy. Could the sun magnify radio waves? Did she actually transmit human civilization to the universe using the sun as an antenna? Was there a bloody dawn when she betrayed humanity? And did she really receive an alien signal during that time?



llama_print_timings:        load time =   736.57 ms
llama_print_timings:      sample time =    42.47 ms /    95 runs   (    0.45 ms per token,  2237.14 tokens per second)
llama_print_timings: prompt eval time =   736.54 ms /   134 tokens (    5.50 ms per token,   181.93 tokens per second)
llama_print_timings:        eval time =  8234.80 ms /    94 runs   (   87.60 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  9160.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She tried to numb herself in her work so that she could forget about the past. Strange, almost instinctive self-defense mechanisms allowed her not to remember about the contact with extra terrestrial civilizations, and days passed in a state of calm.



llama_print_timings:        load time =   654.11 ms
llama_print_timings:      sample time =    25.49 ms /    55 runs   (    0.46 ms per token,  2157.71 tokens per second)
llama_print_timings: prompt eval time =   654.06 ms /    82 tokens (    7.98 ms per token,   125.37 tokens per second)
llama_print_timings:        eval time =  4600.00 ms /    54 runs   (   85.19 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  5370.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After returning to her alma mater for a while, Ye Wenjie took Winter with her mother Shaolin back to her hometown. Soon after her husband's tragic death, Shaolin quickly recovered from mental instability and continued in political arena by praising slogans to survive. She managed to receive some compensation when she was finally allowed to resume teaching later during the "Revival Campaign." But at this time, however, Shaolin made a surprising decision, entering into a marriage with a high official who had been persecuted by the education authorities.



llama_print_timings:        load time =   732.52 ms
llama_print_timings:      sample time =    56.84 ms /   128 runs   (    0.44 ms per token,  2251.86 tokens per second)
llama_print_timings: prompt eval time =   732.48 ms /   136 tokens (    5.39 ms per token,   185.67 tokens per second)
llama_print_timings:        eval time = 11022.87 ms /   127 runs   (   86.79 ms per token,    11.52 tokens per second)
llama_print_timings:       total time = 12013.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The “bullpen” labor camp. In this, Xiaoling thought deeply and was clear that the social chaos could not last for a long time, and currently these young upstart maoists had no experience in governing a country. Sooner or later, those old party members who were persecuted and relegated to the sidelines would soon regain their positions of power and govern the country again. The later facts proved Xiaoling's gambling was correct, as “the Cultural Revolution” did not end yet, her husband was partially restored his posts, and he quickly became a vice minister after the 1978 Third Plenary Session of the Eleventh Party Congress. With this background, Xiaoling rapidly rose to power when intellectuals were once again given their due respects in society. After becoming a member of the Academic Committee of the Chinese Academy of Sciences, she showed her great wisdom by shifting out of the school where she was deputy dean and quickly rose to vice president of another famous university.



llama_print_timings:        load time =   805.73 ms
llama_print_timings:      sample time =   101.81 ms /   225 runs   (    0.45 ms per token,  2209.98 tokens per second)
llama_print_timings: prompt eval time =   805.69 ms /   176 tokens (    4.58 ms per token,   218.45 tokens per second)
llama_print_timings:        eval time = 19633.43 ms /   224 runs   (   87.65 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 20909.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The mother that Yue Wenjie met was a well-maintained knowledgeable image, with no traces of suffering in the past. She warmly welcomed Yue Wenjie and her mother, expressing concern over how she had managed during those years and marveling at Winter's intelligence and cute. Her husband made dinner for them and they ate it together without any awkwardness, but Yue Wenjie could sense their distance. They avoided sensitive topics, leaving out anything about her father.



llama_print_timings:        load time =   760.91 ms
llama_print_timings:      sample time =    52.95 ms /   113 runs   (    0.47 ms per token,  2134.17 tokens per second)
llama_print_timings: prompt eval time =   760.88 ms /   140 tokens (    5.43 ms per token,   184.00 tokens per second)
llama_print_timings:        eval time =  9917.11 ms /   112 runs   (   88.55 ms per token,    11.29 tokens per second)
llama_print_timings:       total time = 10918.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As soon as they left, the deputy minister's face turned cold and his smile disappeared like a mask he had worn for too long. He said to Yao Wenjie.



llama_print_timings:        load time =   657.19 ms
llama_print_timings:      sample time =    18.02 ms /    40 runs   (    0.45 ms per token,  2220.00 tokens per second)
llama_print_timings: prompt eval time =   657.15 ms /    92 tokens (    7.14 ms per token,   140.00 tokens per second)
llama_print_timings:        eval time =  3480.65 ms /    39 runs   (   89.25 ms per token,    11.20 tokens per second)
llama_print_timings:       total time =  4219.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In the future, welcome you and your children to come here often. But I have a request. Please don't insist on resolving historical issues. As for your father's death, he had no blame in it, he was also a victim. Rather, your father is quite self-righteous, and blindly pursued his own beliefs to the exclusion of his family responsibilities. This caused you and your mother so much pain.”



llama_print_timings:        load time =   677.01 ms
llama_print_timings:      sample time =    45.55 ms /   100 runs   (    0.46 ms per token,  2195.34 tokens per second)
llama_print_timings: prompt eval time =   676.96 ms /   102 tokens (    6.64 ms per token,   150.67 tokens per second)
llama_print_timings:        eval time =  8458.14 ms /    99 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  9345.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You don't have the right to talk about my father,” said Ye Wenjie, angrily. “It is a matter between me and my mother.”



llama_print_timings:        load time =   597.34 ms
llama_print_timings:      sample time =    16.27 ms /    37 runs   (    0.44 ms per token,  2273.43 tokens per second)
llama_print_timings: prompt eval time =   597.31 ms /    56 tokens (   10.67 ms per token,    93.75 tokens per second)
llama_print_timings:        eval time =  3059.42 ms /    36 runs   (   84.98 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  3730.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It has nothing to do with me,” said Mr. Shaw, nodding in a cold manner. “I am merely passing on your mother’s message.”



llama_print_timings:        load time =   592.76 ms
llama_print_timings:      sample time =    14.94 ms /    34 runs   (    0.44 ms per token,  2275.62 tokens per second)
llama_print_timings: prompt eval time =   592.73 ms /    56 tokens (   10.58 ms per token,    94.48 tokens per second)
llama_print_timings:        eval time =  2826.36 ms /    33 runs   (   85.65 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3487.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaf looked back and saw that in the small bungalow with a yard, Xiaolin was peeking at them through the corner of her window. Leaf hugged her little son Winter quietly and left without saying anything. She didn't return anymore.



llama_print_timings:        load time =   620.78 ms
llama_print_timings:      sample time =    26.47 ms /    58 runs   (    0.46 ms per token,  2191.49 tokens per second)
llama_print_timings: prompt eval time =   620.73 ms /    77 tokens (    8.06 ms per token,   124.05 tokens per second)
llama_print_timings:        eval time =  4908.20 ms /    57 runs   (   86.11 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  5648.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After searching for many times, Ye Wenjie finally found three of the four Red Guards who killed her father. They are all former countryside workers and currently they don't have jobs. After learning their addresses, she sent them a simple letter asking them to meet at the playground where her father was killed.



llama_print_timings:        load time =   656.16 ms
llama_print_timings:      sample time =    31.48 ms /    69 runs   (    0.46 ms per token,  2191.73 tokens per second)
llama_print_timings: prompt eval time =   656.12 ms /    94 tokens (    6.98 ms per token,   143.27 tokens per second)
llama_print_timings:        eval time =  5813.60 ms /    68 runs   (   85.49 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  6611.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As far as she was concerned, there was no revenge plan. On that sunrise of a new day at Red Beach base, she had repaid the whole human race's enmity. All she wanted to hear were their confessions and see even an ounce of humanity's return.



llama_print_timings:        load time =   634.15 ms
llama_print_timings:      sample time =    29.91 ms /    66 runs   (    0.45 ms per token,  2206.25 tokens per second)
llama_print_timings: prompt eval time =   634.10 ms /    78 tokens (    8.13 ms per token,   123.01 tokens per second)
llama_print_timings:        eval time =  5570.62 ms /    65 runs   (   85.70 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  6344.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After class on this day, Ye Wenhua waited for them in the playground. She didn't hold much hope that they would come, but at the appointed time, three Red Guards arrived.



llama_print_timings:        load time =   621.38 ms
llama_print_timings:      sample time =    19.62 ms /    44 runs   (    0.45 ms per token,  2242.15 tokens per second)
llama_print_timings: prompt eval time =   621.35 ms /    67 tokens (    9.27 ms per token,   107.83 tokens per second)
llama_print_timings:        eval time =  3683.80 ms /    43 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  4394.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaving far behind, she recognized them all right away. They were wearing now very rare green army uniforms. Walking close to them, she discovered this was possibly those clothes they wore during the critique meeting. The uniforms have been laundered many times and there are obvious patches on it. But besides this, these women in their 30s looked totally different from the red youthful soldiers from back then. From the first glance, Yao Wenjie felt that the differences between these three women were greater than those among the nine soldiers. One of them is very thin and her clothes are too big for her now; another one became stout and her hair was yellow with a few white strands; the other wore a sleeveless dress, but her wristband is empty, and she walks in an uneasy way.



llama_print_timings:        load time =   969.03 ms
llama_print_timings:      sample time =    79.81 ms /   178 runs   (    0.45 ms per token,  2230.27 tokens per second)
llama_print_timings: prompt eval time =   968.98 ms /   260 tokens (    3.73 ms per token,   268.32 tokens per second)
llama_print_timings:        eval time = 15763.68 ms /   177 runs   (   89.06 ms per token,    11.23 tokens per second)
llama_print_timings:       total time = 17098.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Three former Red Guards approached Ye Wenxi, standing in a row like they had years before. They were trying to restore the pride that had long been forgotten; but what was once a demonic force within them seemed to have evaporated over time. The woman's face bore a ratlike expression and the other two had only vacant stares as their eyes looked up at the sky.



llama_print_timings:        load time =   687.52 ms
llama_print_timings:      sample time =    36.51 ms /    83 runs   (    0.44 ms per token,  2273.54 tokens per second)
llama_print_timings: prompt eval time =   687.48 ms /   111 tokens (    6.19 ms per token,   161.46 tokens per second)
llama_print_timings:        eval time =  7111.38 ms /    82 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  7963.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You think we can't come?” the burly woman challenged.



llama_print_timings:        load time =   578.76 ms
llama_print_timings:      sample time =     8.19 ms /    17 runs   (    0.48 ms per token,  2076.21 tokens per second)
llama_print_timings: prompt eval time =   578.71 ms /    44 tokens (   13.15 ms per token,    76.03 tokens per second)
llama_print_timings:        eval time =  1395.20 ms /    16 runs   (   87.20 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2011.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I think we should meet up and settle the past once and for all.



llama_print_timings:        load time =   582.32 ms
llama_print_timings:      sample time =     7.66 ms /    17 runs   (    0.45 ms per token,  2219.90 tokens per second)
llama_print_timings: prompt eval time =   582.28 ms /    51 tokens (   11.42 ms per token,    87.59 tokens per second)
llama_print_timings:        eval time =  1375.83 ms /    16 runs   (   85.99 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  1992.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's closed, ” the small woman said with a sharp voice. She sounded constantly terrified for no reason she could identify.



llama_print_timings:        load time =   605.35 ms
llama_print_timings:      sample time =    14.25 ms /    31 runs   (    0.46 ms per token,  2176.05 tokens per second)
llama_print_timings: prompt eval time =   605.33 ms /    64 tokens (    9.46 ms per token,   105.73 tokens per second)
llama_print_timings:        eval time =  2623.52 ms /    30 runs   (   87.45 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  3293.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I mean mentally.



llama_print_timings:        load time =   576.58 ms
llama_print_timings:      sample time =     2.67 ms /     6 runs   (    0.45 ms per token,  2247.19 tokens per second)
llama_print_timings: prompt eval time =   576.55 ms /    37 tokens (   15.58 ms per token,    64.17 tokens per second)
llama_print_timings:        eval time =   437.98 ms /     5 runs   (   87.60 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  1026.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Are you ready to listen to us?” the muscular woman asked.



llama_print_timings:        load time =   578.92 ms
llama_print_timings:      sample time =     7.30 ms /    16 runs   (    0.46 ms per token,  2192.68 tokens per second)
llama_print_timings: prompt eval time =   578.88 ms /    45 tokens (   12.86 ms per token,    77.74 tokens per second)
llama_print_timings:        eval time =  1283.51 ms /    15 runs   (   85.57 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1895.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Don't you feel guilty?"



llama_print_timings:        load time =   572.29 ms
llama_print_timings:      sample time =     4.07 ms /     9 runs   (    0.45 ms per token,  2209.67 tokens per second)
llama_print_timings: prompt eval time =   572.27 ms /    37 tokens (   15.47 ms per token,    64.66 tokens per second)
llama_print_timings:        eval time =   696.06 ms /     8 runs   (   87.01 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  1285.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Who is to blame?” the one-armed woman said, still silent.



llama_print_timings:        load time =   579.39 ms
llama_print_timings:      sample time =     8.45 ms /    19 runs   (    0.44 ms per token,  2247.72 tokens per second)
llama_print_timings: prompt eval time =   579.35 ms /    46 tokens (   12.59 ms per token,    79.40 tokens per second)
llama_print_timings:        eval time =  1501.29 ms /    18 runs   (   83.40 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  2118.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The stout woman said, “We four have all signed the large banner at Changzheng Xiaozao. From large rallies to large reviews to large martial combat, from ‘One Division’ to ‘Two Divisions’ and ‘Three Divisions’ to ‘Networking’, ‘Western Clique’, ‘Eastern Clique’, to ‘New Beijing Society’, ‘Red Banner’ and ‘Oriental Red’, we have gone through the entire journey of Red Guards from birth to death.”



llama_print_timings:        load time =   730.88 ms
llama_print_timings:      sample time =    51.89 ms /   115 runs   (    0.45 ms per token,  2216.06 tokens per second)
llama_print_timings: prompt eval time =   730.83 ms /   130 tokens (    5.62 ms per token,   177.88 tokens per second)
llama_print_timings:        eval time =  9733.30 ms /   114 runs   (   85.38 ms per token,    11.71 tokens per second)
llama_print_timings:       total time = 10704.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The one-armed woman went on to say, “In the 100-day campus battle at Tsinghua University, there were four of us. Two of us fought in ‘Yinggangshan’ and two of us fought in ‘Siwuqier4.14’. I once rushed forward with a grenade toward the home-made tank of ‘Yinggangshan’. My hand was crushed under the wheels of the tank, my flesh and bones lay mixed with blood on the ground – and this happened when I was only 15 years old.”



llama_print_timings:        load time =   688.27 ms
llama_print_timings:      sample time =    58.28 ms /   130 runs   (    0.45 ms per token,  2230.69 tokens per second)
llama_print_timings: prompt eval time =   688.22 ms /   113 tokens (    6.09 ms per token,   164.19 tokens per second)
llama_print_timings:        eval time = 11032.90 ms /   129 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time = 11989.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Afterwards, we went out into the vast world!” said the woman with her arms raised. “We had four of us: two went to Shaanxi, and the other two to Henan province which is one of the most remote and poor places in China. When we initially arrived there, we were very enthusiastic; but day by day, after completing a full day of farming work, we were so exhausted that we couldn’t even wash our clothes; lying in the leaking grass house, listening to the distant howling wolf sounds, slowly returning from dreams back to reality. We stayed in poor places for a long time and felt so quiet and silent.”



llama_print_timings:        load time =   745.62 ms
llama_print_timings:      sample time =    64.59 ms /   145 runs   (    0.45 ms per token,  2245.03 tokens per second)
llama_print_timings: prompt eval time =   745.58 ms /   138 tokens (    5.40 ms per token,   185.09 tokens per second)
llama_print_timings:        eval time = 12510.96 ms /   144 runs   (   86.88 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 13557.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The one-armed woman looked blankly at the ground and said, "Sometimes, when walking in a remote mountain path, I encounter former comrades of Red Guards or enemies from the battle between factions. When we see each other, both are dressed raggedly and covered with dirt and cow dung. We can't help but look at each other in silence."



llama_print_timings:        load time =   661.61 ms
llama_print_timings:      sample time =    37.93 ms /    82 runs   (    0.46 ms per token,  2161.93 tokens per second)
llama_print_timings: prompt eval time =   661.58 ms /    88 tokens (    7.52 ms per token,   133.02 tokens per second)
llama_print_timings:        eval time =  6856.32 ms /    81 runs   (   84.65 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  7696.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Tang Hongjing, ” the stout woman glared at Yue Wenjie. “The girl who caused most harm to your father by beating him in the head with a belt drowned in the Huaihe River.” Her eyes watered and she covered her face and began to cry.



llama_print_timings:        load time =   792.24 ms
llama_print_timings:      sample time =    30.40 ms /    68 runs   (    0.45 ms per token,  2236.99 tokens per second)
llama_print_timings: prompt eval time =   792.20 ms /   170 tokens (    4.66 ms per token,   214.59 tokens per second)
llama_print_timings:        eval time =  5707.29 ms /    67 runs   (   85.18 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  6638.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"After returning to the city, I still couldn't make it any better. Even as a migrant worker, I had no job, no money and no prospects whatsoever, so anything I owned was gone too."



llama_print_timings:        load time =   651.94 ms
llama_print_timings:      sample time =    21.83 ms /    48 runs   (    0.45 ms per token,  2198.51 tokens per second)
llama_print_timings: prompt eval time =   651.90 ms /    86 tokens (    7.58 ms per token,   131.92 tokens per second)
llama_print_timings:        eval time =  4017.32 ms /    47 runs   (   85.47 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  4770.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaves Wenjie completely speechless.



llama_print_timings:        load time =   567.46 ms
llama_print_timings:      sample time =     4.93 ms /    11 runs   (    0.45 ms per token,  2231.69 tokens per second)
llama_print_timings: prompt eval time =   567.42 ms /    36 tokens (   15.76 ms per token,    63.45 tokens per second)
llama_print_timings:        eval time =   843.61 ms /    10 runs   (   84.36 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  1433.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The one-armed woman said, “I recently watched a movie called ‘Maple’. Have you seen it? At the end of the film, an adult and a child stand before the graves of killed Red Guards in a battle. The child asked the adult if they were soldiers, but the adult denied it. Then the child asked if they were enemies, but again the adult denied it. Finally, the child asked what they were if not soldiers or enemies. And the adult replied: ‘They are history’.”



llama_print_timings:        load time =   687.01 ms
llama_print_timings:      sample time =    49.02 ms /   109 runs   (    0.45 ms per token,  2223.76 tokens per second)
llama_print_timings: prompt eval time =   686.97 ms /   112 tokens (    6.13 ms per token,   163.04 tokens per second)
llama_print_timings:        eval time =  9179.26 ms /   108 runs   (   84.99 ms per token,    11.77 tokens per second)
llama_print_timings:       total time = 10093.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Hear that? History! It's history! " The woman excitedly waved a large hand towards Ye Wenjie. "We are in the new era now, who still remember us? We'll soon be forgotten by everyone!"



llama_print_timings:        load time =   659.19 ms
llama_print_timings:      sample time =    23.77 ms /    53 runs   (    0.45 ms per token,  2229.61 tokens per second)
llama_print_timings: prompt eval time =   659.16 ms /    82 tokens (    8.04 ms per token,   124.40 tokens per second)
llama_print_timings:        eval time =  4373.71 ms /    52 runs   (   84.11 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  5141.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Three old Red Guards left and left Ye Wenjie alone on the playground. More than ten years ago that rainy afternoon, she stood here just like this lonely girl watching her dead father pass away. That last word spoken by the Red Guard in her head echoed again…



llama_print_timings:        load time =   643.45 ms
llama_print_timings:      sample time =    27.78 ms /    61 runs   (    0.46 ms per token,  2196.14 tokens per second)
llama_print_timings: prompt eval time =   643.40 ms /    85 tokens (    7.57 ms per token,   132.11 tokens per second)
llama_print_timings:        eval time =  5217.75 ms /    60 runs   (   86.96 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  5988.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The setting sun casts a long shadow on the frail figure of Ye Wenjie. In her mind, any hope for society has already evaporated like dew under the scorching sun and her doubt about having betrayed superiors is also gone, firmly determined to bring higher forms of civilization into the human world.



llama_print_timings:        load time =   678.83 ms
llama_print_timings:      sample time =    32.06 ms /    71 runs   (    0.45 ms per token,  2214.74 tokens per second)
llama_print_timings: prompt eval time =   678.79 ms /   106 tokens (    6.40 ms per token,   156.16 tokens per second)
llama_print_timings:        eval time =  5993.80 ms /    70 runs   (   85.63 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  6820.13 ms


translated 68.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Years after returning to college, Ye Wenjie took on a major project: the design of a large radio astronomy observatory. Soon, she went out with her team for the location of the base. At first, the consideration was purely technical; unlike traditional astronomical observations, radio astronomy has relatively low requirements for atmospheric quality and visible light interference, but it must be avoided non-visible frequency electromagnetic interference. They ran to many places and finally chose a site in an isolated mountainous area in the northwest where the electromagnetic environment was best.



llama_print_timings:        load time =   709.62 ms
llama_print_timings:      sample time =    55.39 ms /   123 runs   (    0.45 ms per token,  2220.62 tokens per second)
llama_print_timings: prompt eval time =   709.57 ms /   126 tokens (    5.63 ms per token,   177.57 tokens per second)
llama_print_timings:        eval time = 10411.14 ms /   122 runs   (   85.34 ms per token,    11.72 tokens per second)
llama_print_timings:       total time = 11367.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They stopped in a village with mostly cave dwellings. The production head of the village identified Ye Wenjie as someone who knew foreign languages, so he asked her if she knew which language—if she did, he would send someone to bring up Whitelaw Young and have him consult about things with the team leader.



llama_print_timings:        load time =   742.76 ms
llama_print_timings:      sample time =    30.41 ms /    67 runs   (    0.45 ms per token,  2203.30 tokens per second)
llama_print_timings: prompt eval time =   742.69 ms /   142 tokens (    5.23 ms per token,   191.20 tokens per second)
llama_print_timings:        eval time =  5710.19 ms /    66 runs   (   86.52 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  6585.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Bai Quren? Ye Wenjie was very surprised.



llama_print_timings:        load time =   587.89 ms
llama_print_timings:      sample time =     6.92 ms /    15 runs   (    0.46 ms per token,  2166.38 tokens per second)
llama_print_timings: prompt eval time =   587.85 ms /    41 tokens (   14.34 ms per token,    69.75 tokens per second)
llama_print_timings:        eval time =  1236.42 ms /    14 runs   (   88.32 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  1854.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We don't know the name of that foreigner, we just call him by his nickname.



llama_print_timings:        load time =   590.72 ms
llama_print_timings:      sample time =     9.83 ms /    22 runs   (    0.45 ms per token,  2238.73 tokens per second)
llama_print_timings: prompt eval time =   590.68 ms /    43 tokens (   13.74 ms per token,    72.80 tokens per second)
llama_print_timings:        eval time =  1779.40 ms /    21 runs   (   84.73 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  2413.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Did he see you for a check-up?



llama_print_timings:        load time =   577.20 ms
llama_print_timings:      sample time =     4.93 ms /    11 runs   (    0.45 ms per token,  2232.14 tokens per second)
llama_print_timings: prompt eval time =   577.17 ms /    36 tokens (   16.03 ms per token,    62.37 tokens per second)
llama_print_timings:        eval time =   811.97 ms /    10 runs   (   81.20 ms per token,    12.32 tokens per second)
llama_print_timings:       total time =  1411.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No, he is planting trees on the hill behind his house. He has been doing it for almost three years.



llama_print_timings:        load time =   576.04 ms
llama_print_timings:      sample time =    11.46 ms /    25 runs   (    0.46 ms per token,  2181.12 tokens per second)
llama_print_timings: prompt eval time =   576.00 ms /    47 tokens (   12.26 ms per token,    81.60 tokens per second)
llama_print_timings:        eval time =  2125.92 ms /    24 runs   (   88.58 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  2754.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What is the point of planting trees?”



llama_print_timings:        load time =   579.31 ms
llama_print_timings:      sample time =     4.88 ms /    11 runs   (    0.44 ms per token,  2252.71 tokens per second)
llama_print_timings: prompt eval time =   579.29 ms /    36 tokens (   16.09 ms per token,    62.15 tokens per second)
llama_print_timings:        eval time =   880.69 ms /    10 runs   (   88.07 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  1481.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said it was for raising birds, a species that, according to his version, was on the verge of extinction.



llama_print_timings:        load time =   578.34 ms
llama_print_timings:      sample time =    12.15 ms /    27 runs   (    0.45 ms per token,  2221.49 tokens per second)
llama_print_timings: prompt eval time =   578.31 ms /    46 tokens (   12.57 ms per token,    79.54 tokens per second)
llama_print_timings:        eval time =  2183.24 ms /    26 runs   (   83.97 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  2817.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Lei Wenjie and her colleagues were very surprised. They asked the captain to show them. Along the mountain path, they reached a small hilltop and the captain pointed something to them. Ye Wenjie was surprised — she saw that between the arid hillsides there was a slope covered with trees and vegetation, as if an unexpected splash of bright green had landed on an old yellow canvas.



llama_print_timings:        load time =   674.49 ms
llama_print_timings:      sample time =    40.36 ms /    91 runs   (    0.44 ms per token,  2254.99 tokens per second)
llama_print_timings: prompt eval time =   674.45 ms /   106 tokens (    6.36 ms per token,   157.17 tokens per second)
llama_print_timings:        eval time =  7655.57 ms /    90 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  8509.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Leaves soon met the foreigner, and in addition to his golden hair and blue eyes, he looked little different from a local farmer. He seemed not very interested in visitors; his name is Michael Evans. He didn't say his nationality, but his accent was obvious that of an American. He lived in two simple mud houses. The room was sparsely furnished with hoes and shovels as well as saws used to trim tree branches. There are also a lot of books on biology, noticing one book written by Peter Singer called 《Animal Liberation》. All modern devices visible were only an old radio and a telescope; the batteries in it have run out, but there is still water for them to drink since he has just one cup.



llama_print_timings:        load time =   990.69 ms
llama_print_timings:      sample time =    74.45 ms /   166 runs   (    0.45 ms per token,  2229.68 tokens per second)
llama_print_timings: prompt eval time =   990.64 ms /   268 tokens (    3.70 ms per token,   270.53 tokens per second)
llama_print_timings:        eval time = 14523.43 ms /   165 runs   (   88.02 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 15853.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I’m here to help you with your English, ” I answered. “Is there anything specific you want me to teach you?”



llama_print_timings:        load time =   590.67 ms
llama_print_timings:      sample time =    13.73 ms /    29 runs   (    0.47 ms per token,  2112.32 tokens per second)
llama_print_timings: prompt eval time =   590.64 ms /    44 tokens (   13.42 ms per token,    74.50 tokens per second)
llama_print_timings:        eval time =  2455.88 ms /    28 runs   (   87.71 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  3109.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Savior.



llama_print_timings:        load time =   576.16 ms
llama_print_timings:      sample time =     2.19 ms /     5 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   576.12 ms /    36 tokens (   16.00 ms per token,    62.49 tokens per second)
llama_print_timings:        eval time =   318.56 ms /     4 runs   (   79.64 ms per token,    12.56 tokens per second)
llama_print_timings:       total time =   904.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is it possible to save the locals here? The ecological environment here is definitely not good.”



llama_print_timings:        load time =   591.79 ms
llama_print_timings:      sample time =    10.30 ms /    23 runs   (    0.45 ms per token,  2232.14 tokens per second)
llama_print_timings: prompt eval time =   591.77 ms /    44 tokens (   13.45 ms per token,    74.35 tokens per second)
llama_print_timings:        eval time =  1846.63 ms /    22 runs   (   83.94 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  2483.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How can you all be like this?” Evans suddenly broke out in a fury, “Is it only humans who deserve to be saviors and saving other species is such an insignificant matter? Who gave humanity this arrogant status? No, humans do not need salvation. In fact, they are doing better than they deserve.”



llama_print_timings:        load time =   665.87 ms
llama_print_timings:      sample time =    33.72 ms /    75 runs   (    0.45 ms per token,  2223.94 tokens per second)
llama_print_timings: prompt eval time =   665.83 ms /    96 tokens (    6.94 ms per token,   144.18 tokens per second)
llama_print_timings:        eval time =  6237.43 ms /    74 runs   (   84.29 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  7055.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I heard you're saving a bird?



llama_print_timings:        load time =   588.19 ms
llama_print_timings:      sample time =     4.52 ms /    10 runs   (    0.45 ms per token,  2214.35 tokens per second)
llama_print_timings: prompt eval time =   588.15 ms /    37 tokens (   15.90 ms per token,    62.91 tokens per second)
llama_print_timings:        eval time =   768.48 ms /     9 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1377.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, it is a kind of swallow, the one from the northeast, the name is very long. When they are returning to their habitats along an ancient path in spring every year, they can only come here as their destination. But now this area's vegetation has been disappearing, and they have found no more places for nesting or living. When I first saw them, the population of this species was reduced to less than 10 thousand individuals, so it will probably die out within five years if we don't take appropriate measures. Now, my forest provides a habitat for some swallows and their population has started to increase again. Of course, I have to plant more trees to expand the area of this garden.”



llama_print_timings:        load time =   774.80 ms
llama_print_timings:      sample time =    69.12 ms /   155 runs   (    0.45 ms per token,  2242.57 tokens per second)
llama_print_timings: prompt eval time =   774.75 ms /   161 tokens (    4.81 ms per token,   207.81 tokens per second)
llama_print_timings:        eval time = 13220.57 ms /   154 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time = 14311.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ivens made them hold the telescope and, following his instructions, they looked for some time before they saw a few black-grey birds hiding among the trees.



llama_print_timings:        load time =   627.34 ms
llama_print_timings:      sample time =    15.86 ms /    35 runs   (    0.45 ms per token,  2206.67 tokens per second)
llama_print_timings: prompt eval time =   627.30 ms /    68 tokens (    9.22 ms per token,   108.40 tokens per second)
llama_print_timings:        eval time =  3019.13 ms /    34 runs   (   88.80 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  3721.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's not very remarkable, is it? They are certainly no match for the adorable giant panda. But every day in this world, numerous species are dying out of sight and we never get to know about them.



llama_print_timings:        load time =   612.05 ms
llama_print_timings:      sample time =    21.56 ms /    48 runs   (    0.45 ms per token,  2226.35 tokens per second)
llama_print_timings: prompt eval time =   612.02 ms /    62 tokens (    9.87 ms per token,   101.30 tokens per second)
llama_print_timings:        eval time =  4053.06 ms /    47 runs   (   86.24 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  4761.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“These trees were all planted by you?”



llama_print_timings:        load time =   578.68 ms
llama_print_timings:      sample time =     5.75 ms /    12 runs   (    0.48 ms per token,  2087.68 tokens per second)
llama_print_timings: prompt eval time =   578.64 ms /    40 tokens (   14.47 ms per token,    69.13 tokens per second)
llama_print_timings:        eval time =   959.50 ms /    11 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1562.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Mostly, I started out hiring locals but soon I ran out of money. It's expensive to plant trees and draw water… But you know what? My father is a millionaire; he owns an oil company, and he no longer pays me. And I don't want his money.”



llama_print_timings:        load time =   651.83 ms
llama_print_timings:      sample time =    29.68 ms /    67 runs   (    0.44 ms per token,  2257.26 tokens per second)
llama_print_timings: prompt eval time =   651.78 ms /    91 tokens (    7.16 ms per token,   139.62 tokens per second)
llama_print_timings:        eval time =  5557.39 ms /    66 runs   (   84.20 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  6344.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =  1050.33 ms
llama_print_timings:      sample time =     1.00 ms /     2 runs   (    0.50 ms per token,  2000.00 tokens per second)
llama_print_timings: prompt eval time =  1050.28 ms /   298 tokens (    3.52 ms per token,   283.73 tokens per second)
llama_print_timings:        eval time =    88.47 ms /     1 runs   (   88.47 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  1143.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

My father came up behind me without my knowing. He asked if I remembered the small dinosaur skeleton he had bought for my grandfather's estate. Of course, I remembered it; it was discovered in an oil prospecting project and was very complete. My father then told me about how dinosaurs became extinct. That night I woke up from a bad dream saying that myself had been back to the terrible era. Now he was telling me something he didn't tell me before, but which I would have liked to hear: "If you were alive in the Cretaceous period, it would be your luck, because at that time our age is even more horrible than the Cretaceous period; nowadays, the rate of species extinction on Earth is much faster than during the Cretaceous period, and so this is indeed a great extinction era. So, what you saw were nothing but a small event in this process; we can have no petrels without oil, and without oil, I can't promise that you would ever be allowed to drive my Ferrari at fifteen years old; now you wa


llama_print_timings:        load time =  1107.85 ms
llama_print_timings:      sample time =   135.80 ms /   302 runs   (    0.45 ms per token,  2223.81 tokens per second)
llama_print_timings: prompt eval time =  1107.80 ms /   324 tokens (    3.42 ms per token,   292.47 tokens per second)
llama_print_timings:        eval time = 27104.10 ms /   301 runs   (   90.05 ms per token,    11.11 tokens per second)
llama_print_timings:       total time = 28838.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“My father had great expectations of me, but in the end he didn't make me become who he wanted. In the days to come, those dying birds in the back of my eyes watched over my life, determined by my fate. On my thirteenth birthday, my father asked me about my plans for the future and I replied that there was nothing special I wanted to do besides being a savior. My dreams weren't particularly grand; all I wanted to do was save an endangered species, whether it was a ugly bird or an ugly butterfly or perhaps a little bug with no color at all. Later on I went to study biology and became a bird and entomologist. In my opinion, my dreams were great, saving a species of bird or insect is the same as saving human beings; life is equal among them, which is the basic tenet of species communism.”



llama_print_timings:        load time =   816.00 ms
llama_print_timings:      sample time =    86.56 ms /   193 runs   (    0.45 ms per token,  2229.59 tokens per second)
llama_print_timings: prompt eval time =   815.97 ms /   174 tokens (    4.69 ms per token,   213.24 tokens per second)
llama_print_timings:        eval time = 16854.56 ms /   192 runs   (   87.78 ms per token,    11.39 tokens per second)
llama_print_timings:       total time = 18068.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What?” Ye Wenjie didn't hear clearly that word.



llama_print_timings:        load time =   619.54 ms
llama_print_timings:      sample time =     7.52 ms /    17 runs   (    0.44 ms per token,  2260.34 tokens per second)
llama_print_timings: prompt eval time =   619.50 ms /    43 tokens (   14.41 ms per token,    69.41 tokens per second)
llama_print_timings:        eval time =  1378.91 ms /    16 runs   (   86.18 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2032.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I created a theory called ‘species communism’, which is also an ideology. The core concept of this belief is that all species are equal, born alike.”



llama_print_timings:        load time =   599.66 ms
llama_print_timings:      sample time =    16.99 ms /    37 runs   (    0.46 ms per token,  2178.26 tokens per second)
llama_print_timings: prompt eval time =   599.62 ms /    61 tokens (    9.83 ms per token,   101.73 tokens per second)
llama_print_timings:        eval time =  3126.78 ms /    36 runs   (   86.85 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  3803.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is just an ideal, not realistic. Crops are also species and human beings must survive to realize equality.



llama_print_timings:        load time =   584.74 ms
llama_print_timings:      sample time =    12.24 ms /    27 runs   (    0.45 ms per token,  2205.34 tokens per second)
llama_print_timings: prompt eval time =   584.70 ms /    53 tokens (   11.03 ms per token,    90.64 tokens per second)
llama_print_timings:        eval time =  2154.30 ms /    26 runs   (   82.86 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =  2795.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the distant past, feudal lords also had this kind of idea. Don't forget about technology; one day humans will be able to synthesize foods, but before that we should be well-prepared in terms of thoughts and theory. After all, communism is the natural continuation of the Declaration of the Rights of Man and of the Citizen, which took two hundred years for France's Revolution to happen. It turns out that humanity's selfishness and hypocrisy are still unresolved even after two centuries have passed.



llama_print_timings:        load time =   684.02 ms
llama_print_timings:      sample time =    54.25 ms /   121 runs   (    0.45 ms per token,  2230.50 tokens per second)
llama_print_timings: prompt eval time =   683.99 ms /   109 tokens (    6.28 ms per token,   159.36 tokens per second)
llama_print_timings:        eval time = 10301.90 ms /   120 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time = 11231.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How long do you plan to stay here?”



llama_print_timings:        load time =   568.85 ms
llama_print_timings:      sample time =     4.85 ms /    11 runs   (    0.44 ms per token,  2268.04 tokens per second)
llama_print_timings: prompt eval time =   568.81 ms /    38 tokens (   14.97 ms per token,    66.81 tokens per second)
llama_print_timings:        eval time =   861.89 ms /    10 runs   (   86.19 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1452.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't know, being a savior is worth it. That feels beautiful and magical. Of course I don't expect you guys.”



llama_print_timings:        load time =   600.46 ms
llama_print_timings:      sample time =    15.33 ms /    33 runs   (    0.46 ms per token,  2152.92 tokens per second)
llama_print_timings: prompt eval time =   600.42 ms /    57 tokens (   10.53 ms per token,    94.93 tokens per second)
llama_print_timings:        eval time =  2717.41 ms /    32 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3386.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ivens said this and suddenly became bored, saying he was going to work and picked up a shovel and saw. Goodbye when he looked at Ye Wenjie one more time, there is something special on her body.



llama_print_timings:        load time =   631.87 ms
llama_print_timings:      sample time =    22.68 ms /    50 runs   (    0.45 ms per token,  2204.20 tokens per second)
llama_print_timings: prompt eval time =   631.83 ms /    78 tokens (    8.10 ms per token,   123.45 tokens per second)
llama_print_timings:        eval time =  4186.72 ms /    49 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  4923.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On the way back, one of Ye Wenjie's colleagues recited a sentence from Memories of Dr. Yiukung-Lan: "It used to be possible to live like this." He remarked.



llama_print_timings:        load time =   663.21 ms
llama_print_timings:      sample time =    21.70 ms /    49 runs   (    0.44 ms per token,  2257.65 tokens per second)
llama_print_timings: prompt eval time =   663.16 ms /    84 tokens (    7.89 ms per token,   126.67 tokens per second)
llama_print_timings:        eval time =  4122.30 ms /    48 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  4882.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Others also expressed their agreement and feelings, and it seemed that Ye Wenjie was self-talking. “If there were more people like him, even just a few, the world would be totally different.” Nobody understood what she really meant.



llama_print_timings:        load time =   631.85 ms
llama_print_timings:      sample time =    24.48 ms /    55 runs   (    0.45 ms per token,  2246.64 tokens per second)
llama_print_timings: prompt eval time =   631.83 ms /    78 tokens (    8.10 ms per token,   123.45 tokens per second)
llama_print_timings:        eval time =  4625.05 ms /    54 runs   (   85.65 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  5368.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The project leader turned the conversation to work, “I think this location isn't good enough. The boss wouldn't approve it either.”



llama_print_timings:        load time =   586.43 ms
llama_print_timings:      sample time =    14.15 ms /    32 runs   (    0.44 ms per token,  2260.84 tokens per second)
llama_print_timings: prompt eval time =   586.38 ms /    51 tokens (   11.50 ms per token,    86.97 tokens per second)
llama_print_timings:        eval time =  2638.58 ms /    31 runs   (   85.12 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  3288.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Why? Our four proposed locations have the best electromagnetic environment.



llama_print_timings:        load time =   578.22 ms
llama_print_timings:      sample time =     6.83 ms /    15 runs   (    0.46 ms per token,  2194.59 tokens per second)
llama_print_timings: prompt eval time =   578.19 ms /    48 tokens (   12.05 ms per token,    83.02 tokens per second)
llama_print_timings:        eval time =  1239.01 ms /    14 runs   (   88.50 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  1847.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The human environment?同志, don't just think about the technical aspects, look at this poor place. Know how? The poor mountain and dirty water will make people wild. Maybe there will be big trouble with the relationship between here and the base in future. It has not been approved because of the reasons like the person in charge mentioned.



llama_print_timings:        load time =   660.24 ms
llama_print_timings:      sample time =    32.16 ms /    70 runs   (    0.46 ms per token,  2176.82 tokens per second)
llama_print_timings: prompt eval time =   660.20 ms /    92 tokens (    7.18 ms per token,   139.35 tokens per second)
llama_print_timings:        eval time =  5921.14 ms /    69 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  6727.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. 2. 3.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Three years have passed since Ye Wenjie has heard nothing from Ivanis.



llama_print_timings:        load time =   575.38 ms
llama_print_timings:      sample time =     8.63 ms /    18 runs   (    0.48 ms per token,  2086.71 tokens per second)
llama_print_timings: prompt eval time =   575.34 ms /    41 tokens (   14.03 ms per token,    71.26 tokens per second)
llama_print_timings:        eval time =  1450.01 ms /    17 runs   (   85.29 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  2064.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This spring, Ye Wenjie suddenly received a postcard from Ivan, which simply read: 



llama_print_timings:        load time =   604.23 ms
llama_print_timings:      sample time =     9.76 ms /    22 runs   (    0.44 ms per token,  2253.41 tokens per second)
llama_print_timings: prompt eval time =   604.19 ms /    59 tokens (   10.24 ms per token,    97.65 tokens per second)
llama_print_timings:        eval time =  1768.78 ms /    21 runs   (   84.23 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  2417.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Come here and tell me how to live.



llama_print_timings:        load time =   573.95 ms
llama_print_timings:      sample time =     4.54 ms /    10 runs   (    0.45 ms per token,  2204.10 tokens per second)
llama_print_timings: prompt eval time =   573.91 ms /    36 tokens (   15.94 ms per token,    62.73 tokens per second)
llama_print_timings:        eval time =   762.39 ms /     9 runs   (   84.71 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1356.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She sat on the train for a whole day and then took a few-hour bus ride to reach that remote mountain village in the northwest.



llama_print_timings:        load time =   593.65 ms
llama_print_timings:      sample time =    14.22 ms /    30 runs   (    0.47 ms per token,  2109.85 tokens per second)
llama_print_timings: prompt eval time =   593.60 ms /    53 tokens (   11.20 ms per token,    89.29 tokens per second)
llama_print_timings:        eval time =  2456.08 ms /    29 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  3114.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As she climbed the hill, she saw that the woods were much like they had been three years before. However, because of the growth of trees, it now looked much thicker than before. Despite this, Lei Wenjie soon noticed that a part of the forest had been expanded much larger. Now, however, this expanded area has been deforested - cutting down the trees in all directions is in full swing and at this rate the forest will soon be gone entirely. The villagers cutting down the trees come from two nearby villages and they use axes and pruning shears to knock down young trees one by one, then load them with tractors and cattle wagons to take them down the hill. There are many disputes over who should get a share of the timber profits.



llama_print_timings:        load time =   813.61 ms
llama_print_timings:      sample time =    74.89 ms /   166 runs   (    0.45 ms per token,  2216.53 tokens per second)
llama_print_timings: prompt eval time =   813.56 ms /   185 tokens (    4.40 ms per token,   227.40 tokens per second)
llama_print_timings:        eval time = 14372.09 ms /   165 runs   (   87.10 ms per token,    11.48 tokens per second)
llama_print_timings:       total time = 15529.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leaves the trees fall down without a loud sound, and also can't hear the rumble of oil saws. But this familiar scene still makes Yew Wenjie nervous in her heart.



llama_print_timings:        load time =   613.50 ms
llama_print_timings:      sample time =    19.96 ms /    44 runs   (    0.45 ms per token,  2203.97 tokens per second)
llama_print_timings: prompt eval time =   613.47 ms /    64 tokens (    9.59 ms per token,   104.32 tokens per second)
llama_print_timings:        eval time =  3712.45 ms /    43 runs   (   86.34 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  4415.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Someone greets her, it is the production team leader who is now the village chief. He recognizes Ye Wenjie and when he is asked why to chop down trees, he answers that “the woods in this area are not protected by law.”



llama_print_timings:        load time =   623.79 ms
llama_print_timings:      sample time =    24.92 ms /    55 runs   (    0.45 ms per token,  2207.06 tokens per second)
llama_print_timings: prompt eval time =   623.75 ms /    74 tokens (    8.43 ms per token,   118.64 tokens per second)
llama_print_timings:        eval time =  4678.40 ms /    54 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  5414.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

How can you say that? The Forestry Law was only enacted recently.



llama_print_timings:        load time =   581.94 ms
llama_print_timings:      sample time =     8.66 ms /    18 runs   (    0.48 ms per token,  2077.80 tokens per second)
llama_print_timings: prompt eval time =   581.90 ms /    43 tokens (   13.53 ms per token,    73.90 tokens per second)
llama_print_timings:        eval time =  1451.58 ms /    17 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2072.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Who approved these kinds of trees to be planted? And who will protect the foreigners when they illegally go up the mountain and plant these trees?



llama_print_timings:        load time =   605.63 ms
llama_print_timings:      sample time =    14.54 ms /    33 runs   (    0.44 ms per token,  2269.91 tokens per second)
llama_print_timings: prompt eval time =   605.59 ms /    59 tokens (   10.26 ms per token,    97.43 tokens per second)
llama_print_timings:        eval time =  2577.59 ms /    32 runs   (   80.55 ms per token,    12.41 tokens per second)
llama_print_timings:       total time =  3250.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's not correct. He planted in the desert, but he didn't take away any arable land and anyway, you didn't say anything at that time.



llama_print_timings:        load time =   594.97 ms
llama_print_timings:      sample time =    17.30 ms /    38 runs   (    0.46 ms per token,  2196.53 tokens per second)
llama_print_timings: prompt eval time =   594.93 ms /    57 tokens (   10.44 ms per token,    95.81 tokens per second)
llama_print_timings:        eval time =  3187.89 ms /    37 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  3863.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yeah, later the county also gave him an afforestation model. Originally, we were thinking to wait a few years before collecting the woods, but people in Nan La village weren’t waiting for that, they cut down the trees instead.”



llama_print_timings:        load time =   641.77 ms
llama_print_timings:      sample time =    26.06 ms /    57 runs   (    0.46 ms per token,  2187.26 tokens per second)
llama_print_timings: prompt eval time =   641.73 ms /    84 tokens (    7.64 ms per token,   130.90 tokens per second)
llama_print_timings:        eval time =  4696.46 ms /    56 runs   (   83.87 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  5455.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Stop right now! I am going to report this to the government department!”



llama_print_timings:        load time =   603.03 ms
llama_print_timings:      sample time =     7.64 ms /    17 runs   (    0.45 ms per token,  2223.97 tokens per second)
llama_print_timings: prompt eval time =   602.99 ms /    42 tokens (   14.36 ms per token,    69.65 tokens per second)
llama_print_timings:        eval time =  1341.55 ms /    16 runs   (   83.85 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1978.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No need, ” the village chief lit a cigarette and pointed to a big truck full of logs that was being loaded. “Look at that truck, it is a forest officer’s. And there are also police stations from our town. The wood on that truck could be the most! I said before, this woods has no name or authority and can’t be protected; moreover, Prof. Ye is a university professor. What does this have to do with you?”



llama_print_timings:        load time =   695.83 ms
llama_print_timings:      sample time =    45.03 ms /   102 runs   (    0.44 ms per token,  2265.36 tokens per second)
llama_print_timings: prompt eval time =   695.78 ms /   117 tokens (    5.95 ms per token,   168.16 tokens per second)
llama_print_timings:        eval time =  8657.78 ms /   101 runs   (   85.72 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  9558.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The two hut was still the same, but Evans is not in them. Iwants found him in a wood. He had an axe in his hands and worked diligently, looking very tired.



llama_print_timings:        load time =   656.06 ms
llama_print_timings:      sample time =    19.88 ms /    44 runs   (    0.45 ms per token,  2213.39 tokens per second)
llama_print_timings: prompt eval time =   656.03 ms /    82 tokens (    8.00 ms per token,   125.00 tokens per second)
llama_print_timings:        eval time =  3693.35 ms /    43 runs   (   85.89 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  4440.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Even if it's meaningless, I can't stop. If I do, I'll collapse.” Evans said, skillfully chopping off a twig that had grown crooked.



llama_print_timings:        load time =   617.19 ms
llama_print_timings:      sample time =    19.62 ms /    43 runs   (    0.46 ms per token,  2191.86 tokens per second)
llama_print_timings: prompt eval time =   617.15 ms /    57 tokens (   10.83 ms per token,    92.36 tokens per second)
llama_print_timings:        eval time =  3591.09 ms /    42 runs   (   85.50 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  4297.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We'll go to the county government together, and if that doesn't work we'll go to the provincial capital,” said Ye Wenjie worryingly.



llama_print_timings:        load time =   593.97 ms
llama_print_timings:      sample time =    17.20 ms /    37 runs   (    0.46 ms per token,  2150.79 tokens per second)
llama_print_timings: prompt eval time =   593.95 ms /    57 tokens (   10.42 ms per token,    95.97 tokens per second)
llama_print_timings:        eval time =  3041.33 ms /    36 runs   (   84.48 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  3714.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$$伊文斯停下来，用很惊奇的目光看着叶文洁，夕阳透过重重林木照进来，在他的眸子中闪亮。“叶，你真的以为我是为了这片树林？”他笑着摇摇头,扔下了手中的斧子,靠着一棵树坐了下来,"现在我要想制止他们，轻而易举."他把一只空的工具袋放到地上,示意叶文洁坐下,接着说,"我刚从美国回来,父亲在两个月前去世,我继承了他的大部分遗产.哥哥和姐姐只各得了五百万.这让我很意外,真的没想到他最后能对我这样，也许，他在内心深处还是看重我的，或者,看重我的理想.不把不动产算在内，知道我现在能支配的钱有多少吗？大约四十五亿美元.我可以轻而易举地让他们停止砍树，然后让他们种树，让我们目力所及的黄土山都被这样的速生林覆盖,很容易，但有什么意义呢？你看到的一切可以归结为贫穷, but wealthy countries also have the same problem… The country that recently refused to sign the Kyoto Protocol."整个人类本质上都一样,"他继续说,"只要文明发展成这样, you may know, other kinds of birds will eventually die out, just a matter of time. You see all this poverty around you, but you've never seen rich countries' pollution…"



llama_print_timings:        load time =  1095.39 ms
llama_print_timings:      sample time =   134.83 ms /   299 runs   (    0.45 ms per token,  2217.57 tokens per second)
llama_print_timings: prompt eval time =  1095.33 ms /   318 tokens (    3.44 ms per token,   290.32 tokens per second)
llama_print_timings:        eval time = 26780.58 ms /   298 runs   (   89.87 ms per token,    11.13 tokens per second)
llama_print_timings:       total time = 28505.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Leaf Wenjie sat quietly, watching the sunset shining through the tree forest and listening to the noise of chopping in the distance. Her thoughts were drawn back twenty years before, back to DaXingAnling forests where she had also once had a similar conversation with another man.



llama_print_timings:        load time =   672.67 ms
llama_print_timings:      sample time =    29.09 ms /    64 runs   (    0.45 ms per token,  2200.14 tokens per second)
llama_print_timings: prompt eval time =   672.64 ms /    86 tokens (    7.82 ms per token,   127.85 tokens per second)
llama_print_timings:        eval time =  5426.20 ms /    63 runs   (   86.13 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  6233.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I know why I came here, ” Evans continued. “The seeds of communalism in the ancient East were planted.”



llama_print_timings:        load time =   600.30 ms
llama_print_timings:      sample time =    13.04 ms /    29 runs   (    0.45 ms per token,  2224.10 tokens per second)
llama_print_timings: prompt eval time =   600.25 ms /    58 tokens (   10.35 ms per token,    96.63 tokens per second)
llama_print_timings:        eval time =  2407.96 ms /    28 runs   (   86.00 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3066.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are referring to Buddhism?



llama_print_timings:        load time =   572.28 ms
llama_print_timings:      sample time =     3.53 ms /     8 runs   (    0.44 ms per token,  2265.65 tokens per second)
llama_print_timings: prompt eval time =   572.24 ms /    35 tokens (   16.35 ms per token,    61.16 tokens per second)
llama_print_timings:        eval time =   604.03 ms /     7 runs   (   86.29 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  1192.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, Christianity only cares about people. Although all species were placed on Noah's ark, they never have the same status as humans. However, Buddhism is a universal salvation, so I came here to East Asia. But... now it seems everywhere is the same.



llama_print_timings:        load time =   642.95 ms
llama_print_timings:      sample time =    27.80 ms /    61 runs   (    0.46 ms per token,  2194.32 tokens per second)
llama_print_timings: prompt eval time =   642.91 ms /    83 tokens (    7.75 ms per token,   129.10 tokens per second)
llama_print_timings:        eval time =  5230.86 ms /    60 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  6003.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, it's the same everywhere. Humans are all the same.



llama_print_timings:        load time =   573.62 ms
llama_print_timings:      sample time =     7.77 ms /    17 runs   (    0.46 ms per token,  2188.75 tokens per second)
llama_print_timings: prompt eval time =   573.58 ms /    42 tokens (   13.66 ms per token,    73.22 tokens per second)
llama_print_timings:        eval time =  1373.62 ms /    16 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1982.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What can I do now? Where is the pillar of my life? I have forty-five billion dollars and a multinational oil company, but what does it matter? Humans spend more than 4.5 billion US dollars on endangered species! There are also more than 450 billion spent for saving deteriorating ecological environment, but they are all useless as the destruction of other life in the ecosystem continues at the same speed as ours. Forty-five million dollars can build a aircraft carrier, but even building a thousand carrier ships also cannot stop humanity's madness.”



llama_print_timings:        load time =   724.52 ms
llama_print_timings:      sample time =    59.86 ms /   134 runs   (    0.45 ms per token,  2238.56 tokens per second)
llama_print_timings: prompt eval time =   724.48 ms /   131 tokens (    5.53 ms per token,   180.82 tokens per second)
llama_print_timings:        eval time = 11592.09 ms /   133 runs   (   87.16 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 12597.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

McKinley, this is what I want to say. Human civilization can no longer improve on its own.



llama_print_timings:        load time =   580.35 ms
llama_print_timings:      sample time =    11.33 ms /    25 runs   (    0.45 ms per token,  2207.12 tokens per second)
llama_print_timings: prompt eval time =   580.32 ms /    49 tokens (   11.84 ms per token,    84.44 tokens per second)
llama_print_timings:        eval time =  2015.04 ms /    24 runs   (   83.96 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  2646.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is there any other power besides humanity?” If God existed, he would have died long ago.



llama_print_timings:        load time =   575.12 ms
llama_print_timings:      sample time =     9.67 ms /    22 runs   (    0.44 ms per token,  2274.14 tokens per second)
llama_print_timings: prompt eval time =   575.08 ms /    46 tokens (   12.50 ms per token,    79.99 tokens per second)
llama_print_timings:        eval time =  1858.22 ms /    21 runs   (   88.49 ms per token,    11.30 tokens per second)
llama_print_timings:       total time =  2477.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, there is another power.



llama_print_timings:        load time =   575.08 ms
llama_print_timings:      sample time =     3.55 ms /     8 runs   (    0.44 ms per token,  2253.52 tokens per second)
llama_print_timings: prompt eval time =   575.04 ms /    38 tokens (   15.13 ms per token,    66.08 tokens per second)
llama_print_timings:        eval time =   565.19 ms /     7 runs   (   80.74 ms per token,    12.39 tokens per second)
llama_print_timings:       total time =  1155.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

By now the sun had set and the loggers had gone home. The forest and surrounding yellow plateau were enveloped in silence, with only the shadowy woods and plains of the nearby mountains that were listening to what Yelvask said in full. When he finished, a bright moon rose from the east and cast its shadows over the trees.



llama_print_timings:        load time =   761.18 ms
llama_print_timings:      sample time =    35.10 ms /    75 runs   (    0.47 ms per token,  2136.57 tokens per second)
llama_print_timings: prompt eval time =   761.14 ms /   132 tokens (    5.77 ms per token,   173.42 tokens per second)
llama_print_timings:        eval time =  6450.07 ms /    74 runs   (   87.16 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  7373.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I can't believe it, ” Evans said. “Luckily, I have the power to prove this is true if it is indeed so.” He extended a hand towards Ye Wenjie. “We are comrades,” he said, following the standard formula of new acceptance members into Earth-三体 organization.



llama_print_timings:        load time =   662.43 ms
llama_print_timings:      sample time =    30.46 ms /    69 runs   (    0.44 ms per token,  2265.04 tokens per second)
llama_print_timings: prompt eval time =   662.39 ms /    89 tokens (    7.44 ms per token,   134.36 tokens per second)
llama_print_timings:        eval time =  5782.73 ms /    68 runs   (   85.04 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  6584.87 ms


translated 64.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

I can't believe there are so many people who completely despair humanity, hate their own species, and even see the destruction of humanity as a ultimate ideal. This is what shocked me the most about the Earth Trilogy movement.



llama_print_timings:        load time =   616.65 ms
llama_print_timings:      sample time =    23.74 ms /    53 runs   (    0.45 ms per token,  2232.33 tokens per second)
llama_print_timings: prompt eval time =   616.63 ms /    69 tokens (    8.94 ms per token,   111.90 tokens per second)
llama_print_timings:        eval time =  4436.14 ms /    52 runs   (   85.31 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5159.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Rebel Army of the Earth Trilogy is called the Spiritual Aristocracy. Members come mainly from the high-level intellectual class, but there are also some distinguished political and economic figures among them. However, attempts to recruit members in the general population have failed due to lack of widespread understanding among common people and their weak sense of identity with their own species, which is stronger than science and philosophy impact on them. Nevertheless, many knowledgeable people already stand outside the human race when thinking about problems. The powerful force that has emerged within human civilization has finally nursed its own internal antagonists.



llama_print_timings:        load time =   781.73 ms
llama_print_timings:      sample time =    60.14 ms /   131 runs   (    0.46 ms per token,  2178.36 tokens per second)
llama_print_timings: prompt eval time =   781.69 ms /   165 tokens (    4.74 ms per token,   211.08 tokens per second)
llama_print_timings:        eval time = 11371.09 ms /   130 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 12430.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Although the speed of development of the Rebel Army is impressive, it cannot be assessed solely by its numbers because most members are in positions of high power and influence.



llama_print_timings:        load time =   608.34 ms
llama_print_timings:      sample time =    17.16 ms /    38 runs   (    0.45 ms per token,  2214.32 tokens per second)
llama_print_timings: prompt eval time =   608.23 ms /    63 tokens (    9.65 ms per token,   103.58 tokens per second)
llama_print_timings:        eval time =  3104.67 ms /    37 runs   (   83.91 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  3792.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the highest commander of the Rebel Alliance, Earth's leading spiritual leader Yue Wenjie is not involved in the organization's specific operations and does not know how it grew to become so large or its membership numbers.



llama_print_timings:        load time =   626.62 ms
llama_print_timings:      sample time =    21.82 ms /    49 runs   (    0.45 ms per token,  2245.44 tokens per second)
llama_print_timings: prompt eval time =   626.59 ms /    72 tokens (    8.70 ms per token,   114.91 tokens per second)
llama_print_timings:        eval time =  4096.78 ms /    48 runs   (   85.35 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  4821.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

For the Earth Trisolarian Rebellion, national governments have never given it enough attention. In order to rapidly expand, this organization has been acting semi-publicly, knowing that one thing would be their natural protector: the government's conservatism and poor imagination in controlling state power. In the departments of the country responsible for managing state power, no one believes what they say, only treating them as a radical organization, which is because of its membership levels, while governments treats it cautiously until it began to develop its own armed forces and some countries' security organizations noticed it and discovered that this organization was remarkable; as for effectively striking against it, it has been in recent years.



llama_print_timings:        load time =   771.67 ms
llama_print_timings:      sample time =    67.81 ms /   152 runs   (    0.45 ms per token,  2241.66 tokens per second)
llama_print_timings: prompt eval time =   771.63 ms /   160 tokens (    4.82 ms per token,   207.35 tokens per second)
llama_print_timings:        eval time = 13189.12 ms /   151 runs   (   87.35 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 14271.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Rebel Army of Earth is not a homogeneous group. It is divided into various factions and branches, mainly divided into two groups:



llama_print_timings:        load time =   590.12 ms
llama_print_timings:      sample time =    13.23 ms /    30 runs   (    0.44 ms per token,  2266.72 tokens per second)
llama_print_timings: prompt eval time =   590.08 ms /    52 tokens (   11.35 ms per token,    88.12 tokens per second)
llama_print_timings:        eval time =  2489.08 ms /    29 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3138.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Arrivalists are the purest lineage of the Arrivalists. They mostly belong to the followers of the communist speciesism of Yvons, which has despairing views on human nature. The despair originally stems from the great extinction of terrestrial species caused by modern civilization. Yvons is the epitome of this. Later, Arrivalists' hatred towards humans has gained a different starting point, not just in environmental conservation and warfare, but also at a very abstract philosophical level. Unlike later imaginations, these people are mostly realists serving on behalf of their extraterrestrial masters. Their betrayal is due to the despair and hatred towards humans, which has been expressed by a famous quote from Yvons: "We don't know what extraterrestrials look like, but we know humanity."



llama_print_timings:        load time =   817.71 ms
llama_print_timings:      sample time =    85.48 ms /   189 runs   (    0.45 ms per token,  2211.15 tokens per second)
llama_print_timings: prompt eval time =   817.67 ms /   187 tokens (    4.37 ms per token,   228.70 tokens per second)
llama_print_timings:        eval time = 16296.76 ms /   188 runs   (   86.68 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 17513.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Saving Army was created quite a long time after the Civil War in Three-Body. It is essentially a religious group composed of members of the Three-Body Religion.



llama_print_timings:        load time =   618.20 ms
llama_print_timings:      sample time =    16.17 ms /    36 runs   (    0.45 ms per token,  2226.48 tokens per second)
llama_print_timings: prompt eval time =   618.17 ms /    61 tokens (   10.13 ms per token,    98.68 tokens per second)
llama_print_timings:        eval time =  2967.93 ms /    35 runs   (   84.80 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  3658.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The existence of another civilization outside humanity undoubtedly has great appeal to the intellectual class, and it is easy for them to have various fantasies about it. The tremendous fascination towards another civilization for human civilization can be hardly resisted. There's a rather inapt analogy: Human civilization has always been like an innocent youth wandering alone in the vast empty desert of the universe; now that he knows the existence of another species, although he sees no face and figure, but he knows that it is there, his imaginations about the distant civilization spread like wild fire. Gradually, as the imagination about the distant civilization becomes more and more rich, a religion feeling towards the Three Body Universe arises in terms of spirituality; the Andromeda Galaxy becomes the space Olympos, where God lives, which thus gives birth to the Three Body Civilization. With other religious beliefs different, the main responsibility for saving lies with believers


llama_print_timings:        load time =   891.89 ms
llama_print_timings:      sample time =    96.52 ms /   216 runs   (    0.45 ms per token,  2237.99 tokens per second)
llama_print_timings: prompt eval time =   891.84 ms /   230 tokens (    3.88 ms per token,   257.89 tokens per second)
llama_print_timings:        eval time = 19065.81 ms /   215 runs   (   88.68 ms per token,    11.28 tokens per second)
llama_print_timings:       total time = 20401.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The main way to spread the Three-Body culture is through the game. The Rebels of Three-Body invested a lot of efforts in developing this software that is very big. Initially, the purpose was not only to spread the culture but also to attract members to their organization from society's lowest levels by it. Through an apparent human society and history shell, the Three-Body culture and world are reflected in game, which can avoid the feeling of strangeness. When players enter into the game very deeply and feel the charm of the Three-body culture enough, the Rebels will contact them directly to examine their ideological tendencies, and then recruit qualified members as the Earth Three-body Rebels. However, there is not much attention for it in society, and it requires a high knowledge background and deep thinking to play the game and explore its profound content. Therefore, only the high-knowledge people can be truly attracted by it.



llama_print_timings:        load time =   905.60 ms
llama_print_timings:      sample time =    88.93 ms /   198 runs   (    0.45 ms per token,  2226.50 tokens per second)
llama_print_timings: prompt eval time =   905.57 ms /   230 tokens (    3.94 ms per token,   253.98 tokens per second)
llama_print_timings:        eval time = 17209.97 ms /   197 runs   (   87.36 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 18530.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The members who joined the Saving Party later mostly knew the Three-Body Civilization through the game of Three-Body, and eventually devoted themselves to the Earth-based Rebellion against the alien occupation. We can say that the game of Three-Body is the cradle for the Saving Party.



llama_print_timings:        load time =   648.58 ms
llama_print_timings:      sample time =    30.36 ms /    65 runs   (    0.47 ms per token,  2140.83 tokens per second)
llama_print_timings: prompt eval time =   648.56 ms /    69 tokens (    9.40 ms per token,   106.39 tokens per second)
llama_print_timings:        eval time =  5525.24 ms /    64 runs   (   86.33 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  6316.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The Salvationists hold a religious feeling towards the Three-Body Civilization at the same time as their attitude towards human civilization is not as extreme as that of the降临派. Their ultimate ideal is to save The Sovereign. To ensure that The Sovereign survives in the three suns semi-Aries star system, they can sacrifice human civilization on a certain level. However, most of them believe that it is possible to achieve both the survival of The Three-Body Civilization and the survival of Earth to two worlds. They are naively thinking that solving the Three-Body Problem would make this ideal outcome possible, while saving both Two-Body Civilization and Earth. In fact, this idea may not be naive, as the Three-Body Civilization itself has held a similar thought for a very long time, and their efforts to solve the Three-Body Problem have spanned hundreds of iterations in their thousands of generations. The Salvationists include many scientists with deep backgrounds in physics and mathemat


llama_print_timings:        load time =   987.54 ms
llama_print_timings:      sample time =   119.37 ms /   265 runs   (    0.45 ms per token,  2219.93 tokens per second)
llama_print_timings: prompt eval time =   987.49 ms /   264 tokens (    3.74 ms per token,   267.35 tokens per second)
llama_print_timings:        eval time = 23282.69 ms /   264 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 24831.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The降临派 and the salvationist have been in sharp opposition for a long time. The arriving party believes that the salvationists are a significant threat to the Earth's Trisec movement. This view is not completely without reason, it was through some responsible people within the salvationist organization that governments gradually came to know of the shocking background of the Trisec traitors. The two factions have developed to a point where they can no longer be reconciled, and both sides' armed forces have reached a state of open hostility. Ye Wenjie is trying hard to mend any cracks in the organization, but with little success.



llama_print_timings:        load time =   741.40 ms
llama_print_timings:      sample time =    64.66 ms /   141 runs   (    0.46 ms per token,  2180.77 tokens per second)
llama_print_timings: prompt eval time =   741.37 ms /   129 tokens (    5.75 ms per token,   174.00 tokens per second)
llama_print_timings:        eval time = 11918.01 ms /   140 runs   (   85.13 ms per token,    11.75 tokens per second)
llama_print_timings:       total time = 12959.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the development of trilogy movement, a third faction named survivor appeared in the trilogy rebel army. When the existence of an alien fleet to attack solar system is exactly verified, the most natural desire for people who are still alive in that ultimate war is to survive after the war. Of course, the war happened 450 years before this life, but many people hope their descendants can survive four and a half centuries later if the humans lose the battle. Now they serve the alien invaders, which would help them achieve this goal. Compared with two other main factions, members of the survivor faction come from lower social classes, and most of them are Chinese (especially Oriental people). Although their number is still small at present, it will greatly increase in the future when trilogy culture becomes more popular. Thus they would become a powerful force to be overlooked.



llama_print_timings:        load time =   823.61 ms
llama_print_timings:      sample time =    86.83 ms /   195 runs   (    0.45 ms per token,  2245.69 tokens per second)
llama_print_timings: prompt eval time =   823.57 ms /   187 tokens (    4.40 ms per token,   227.06 tokens per second)
llama_print_timings:        eval time = 16851.70 ms /   194 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time = 18080.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The three strong forces of the human civilization's defective power, the yearning for higher civilizations and the worship, as well as the desire to survive from the final war and exist, propel the Three Body Movement rapidly. When it is observed, it has already spread like wildfire.



llama_print_timings:        load time =   654.13 ms
llama_print_timings:      sample time =    29.15 ms /    64 runs   (    0.46 ms per token,  2195.24 tokens per second)
llama_print_timings: prompt eval time =   654.10 ms /    85 tokens (    7.70 ms per token,   129.95 tokens per second)
llama_print_timings:        eval time =  5294.24 ms /    63 runs   (   84.04 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  6083.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But at that time, alien civilization was still far away in the depths of space with four and a half centuries of distance between them and humanity. All they sent us were this beam of light.



llama_print_timings:        load time =   621.10 ms
llama_print_timings:      sample time =    20.01 ms /    44 runs   (    0.45 ms per token,  2199.12 tokens per second)
llama_print_timings: prompt eval time =   621.06 ms /    73 tokens (    8.51 ms per token,   117.54 tokens per second)
llama_print_timings:        eval time =  3648.19 ms /    43 runs   (   84.84 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  4361.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Bill Matthew's "Contact Symbol" theory has been proven to be flawlessly perfect.



llama_print_timings:        load time =   609.42 ms
llama_print_timings:      sample time =     9.96 ms /    21 runs   (    0.47 ms per token,  2109.28 tokens per second)
llama_print_timings: prompt eval time =   609.38 ms /    49 tokens (   12.44 ms per token,    80.41 tokens per second)
llama_print_timings:        eval time =  1696.14 ms /    20 runs   (   84.81 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  2350.57 ms


translated 17.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Interrogator: Starting today's interrogation. I hope you will cooperate like last time.



llama_print_timings:        load time =   584.09 ms
llama_print_timings:      sample time =    11.06 ms /    24 runs   (    0.46 ms per token,  2169.20 tokens per second)
llama_print_timings: prompt eval time =   584.06 ms /    45 tokens (   12.98 ms per token,    77.05 tokens per second)
llama_print_timings:        eval time =  1926.29 ms /    23 runs   (   83.75 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  2561.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I know that you all know this, but there are many things I need you to tell me.



llama_print_timings:        load time =   586.03 ms
llama_print_timings:      sample time =    10.60 ms /    21 runs   (    0.50 ms per token,  1980.95 tokens per second)
llama_print_timings: prompt eval time =   585.99 ms /    47 tokens (   12.47 ms per token,    80.21 tokens per second)
llama_print_timings:        eval time =  1752.71 ms /    20 runs   (   87.64 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  2387.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We want to know first what the information sent from a three-dimensional world to Earth that the Descending Party intercepted contained.



llama_print_timings:        load time =   599.96 ms
llama_print_timings:      sample time =    12.36 ms /    28 runs   (    0.44 ms per token,  2265.37 tokens per second)
llama_print_timings: prompt eval time =   599.92 ms /    60 tokens (   10.00 ms per token,   100.01 tokens per second)
llama_print_timings:        eval time =  2384.97 ms /    27 runs   (   88.33 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  3041.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know, they are very secretive. I just know that they intercepted the information.



llama_print_timings:        load time =   590.49 ms
llama_print_timings:      sample time =    10.94 ms /    23 runs   (    0.48 ms per token,  2101.61 tokens per second)
llama_print_timings: prompt eval time =   590.44 ms /    48 tokens (   12.30 ms per token,    81.30 tokens per second)
llama_print_timings:        eval time =  1930.57 ms /    22 runs   (   87.75 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  2571.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Asker: Let's change the topic: After the contact with the Three-Body universe was monopolized by the Arrival, did you establish the Third Red Bank base?



llama_print_timings:        load time =   599.46 ms
llama_print_timings:      sample time =    18.40 ms /    39 runs   (    0.47 ms per token,  2119.57 tokens per second)
llama_print_timings: prompt eval time =   599.41 ms /    56 tokens (   10.70 ms per token,    93.42 tokens per second)
llama_print_timings:        eval time =  3192.09 ms /    38 runs   (   84.00 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  3876.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The plan was to build the base, but it was only completed up to receiving station and then construction was stopped with equipment and base also being demolished.



llama_print_timings:        load time =   595.24 ms
llama_print_timings:      sample time =    14.58 ms /    32 runs   (    0.46 ms per token,  2194.34 tokens per second)
llama_print_timings: prompt eval time =   595.21 ms /    54 tokens (   11.02 ms per token,    90.72 tokens per second)
llama_print_timings:        eval time =  2653.11 ms /    31 runs   (   85.58 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3314.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What's the reason?



llama_print_timings:        load time =   576.26 ms
llama_print_timings:      sample time =     3.11 ms /     7 runs   (    0.44 ms per token,  2247.91 tokens per second)
llama_print_timings: prompt eval time =   576.22 ms /    35 tokens (   16.46 ms per token,    60.74 tokens per second)
llama_print_timings:        eval time =   483.68 ms /     6 runs   (   80.61 ms per token,    12.40 tokens per second)
llama_print_timings:       total time =  1073.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: Because there is no information coming from this direction, and I believe you have confirmed it.



llama_print_timings:        load time =   601.83 ms
llama_print_timings:      sample time =    11.59 ms /    26 runs   (    0.45 ms per token,  2243.12 tokens per second)
llama_print_timings: prompt eval time =   601.79 ms /    59 tokens (   10.20 ms per token,    98.04 tokens per second)
llama_print_timings:        eval time =  2064.44 ms /    25 runs   (   82.58 ms per token,    12.11 tokens per second)
llama_print_timings:       total time =  2719.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, that is to say that at least four years ago, the Three-Body World stopped communicating with Earth, which made the information intercepted by the descended mission even more significant.



llama_print_timings:        load time =   616.33 ms
llama_print_timings:      sample time =    18.44 ms /    40 runs   (    0.46 ms per token,  2168.96 tokens per second)
llama_print_timings: prompt eval time =   616.28 ms /    68 tokens (    9.06 ms per token,   110.34 tokens per second)
llama_print_timings:        eval time =  3378.19 ms /    39 runs   (   86.62 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  4078.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, I really don't have much to say about that.



llama_print_timings:        load time =   579.82 ms
llama_print_timings:      sample time =     6.77 ms /    15 runs   (    0.45 ms per token,  2215.00 tokens per second)
llama_print_timings: prompt eval time =   579.78 ms /    45 tokens (   12.88 ms per token,    77.62 tokens per second)
llama_print_timings:        eval time =  1172.00 ms /    14 runs   (   83.71 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  1781.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Speaker: (pauses for a few seconds) Alright, let's find a topic to talk about. Did Mike Evans deceive you?



llama_print_timings:        load time =   615.68 ms
llama_print_timings:      sample time =    14.80 ms /    33 runs   (    0.45 ms per token,  2230.33 tokens per second)
llama_print_timings: prompt eval time =   615.65 ms /    59 tokens (   10.43 ms per token,    95.83 tokens per second)
llama_print_timings:        eval time =  2730.32 ms /    32 runs   (   85.32 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  3414.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wen Zhi: That's right. He never shared his deepest thoughts with me, but only expressed his sense of mission to other species on Earth. I hadn't thought much about the hateful feelings towards humans caused by this sense of mission and how extreme they had developed in him to the extent that he considered destroying human civilization as his ultimate ideal.



llama_print_timings:        load time =   658.90 ms
llama_print_timings:      sample time =    36.37 ms /    80 runs   (    0.45 ms per token,  2199.86 tokens per second)
llama_print_timings: prompt eval time =   658.86 ms /    93 tokens (    7.08 ms per token,   141.15 tokens per second)
llama_print_timings:        eval time =  6558.41 ms /    79 runs   (   83.02 ms per token,    12.05 tokens per second)
llama_print_timings:       total time =  7387.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Asker: Look at the current situation of Earth Three-Body Organization: the Descentists want to destroy humanity with alien forces, the Salvationists worship alien civilizations as gods, and the Survivors' ideal is to survive by selling their compatriots. None of these are compatible with your idea to transform humans using alien technology.



llama_print_timings:        load time =   656.50 ms
llama_print_timings:      sample time =    35.18 ms /    79 runs   (    0.45 ms per token,  2245.85 tokens per second)
llama_print_timings: prompt eval time =   656.46 ms /    92 tokens (    7.14 ms per token,   140.15 tokens per second)
llama_print_timings:        eval time =  6709.02 ms /    78 runs   (   86.01 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  7527.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I lit the fire, but I can't control it.



llama_print_timings:        load time =   576.00 ms
llama_print_timings:      sample time =     6.73 ms /    14 runs   (    0.48 ms per token,  2079.62 tokens per second)
llama_print_timings: prompt eval time =   575.98 ms /    43 tokens (   13.39 ms per token,    74.66 tokens per second)
llama_print_timings:        eval time =  1154.76 ms /    13 runs   (   88.83 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  1763.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =  1032.46 ms
llama_print_timings:      sample time =     0.93 ms /     2 runs   (    0.46 ms per token,  2155.17 tokens per second)
llama_print_timings: prompt eval time =  1032.42 ms /    29 tokens (   35.60 ms per token,    28.09 tokens per second)
llama_print_timings:        eval time =    90.96 ms /     1 runs   (   90.96 ms per token,    10.99 tokens per second)
llama_print_timings:       total time =  1127.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The judge asks: "Why didn't you attack the 'Judgment Day' ship first, which was the headquarters and command center of the降临派?"



llama_print_timings:        load time =   677.62 ms
llama_print_timings:      sample time =    15.31 ms /    34 runs   (    0.45 ms per token,  2220.19 tokens per second)
llama_print_timings: prompt eval time =   677.57 ms /   109 tokens (    6.22 ms per token,   160.87 tokens per second)
llama_print_timings:        eval time =  2896.34 ms /    33 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time =  3643.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The leaf rules are for the information that is needed to be captured. The information is stored in the second Red Beach base, which is on Judgment Day ship itself. If we attack that ship, the Downpour would delete all those information at a critical moment they consider. We face the same problem as well. The Downpour holds the information like hostages. This is why Judgments Day still exists.



llama_print_timings:        load time =   758.20 ms
llama_print_timings:      sample time =    38.94 ms /    86 runs   (    0.45 ms per token,  2208.58 tokens per second)
llama_print_timings: prompt eval time =   758.15 ms /   149 tokens (    5.09 ms per token,   196.53 tokens per second)
llama_print_timings:        eval time =  7207.62 ms /    85 runs   (   84.80 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  8143.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

On this aspect, do you have any suggestions for us?



llama_print_timings:        load time =   572.83 ms
llama_print_timings:      sample time =     6.44 ms /    13 runs   (    0.50 ms per token,  2018.95 tokens per second)
llama_print_timings: prompt eval time =   572.79 ms /    41 tokens (   13.97 ms per token,    71.58 tokens per second)
llama_print_timings:        eval time =  1064.22 ms /    12 runs   (   88.69 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  1664.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

None.



llama_print_timings:        load time =   634.97 ms
llama_print_timings:      sample time =     1.31 ms /     3 runs   (    0.44 ms per token,  2293.58 tokens per second)
llama_print_timings: prompt eval time =   634.94 ms /    35 tokens (   18.14 ms per token,    55.12 tokens per second)
llama_print_timings:        eval time =   161.88 ms /     2 runs   (   80.94 ms per token,    12.35 tokens per second)
llama_print_timings:       total time =   801.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Interrogator: Are you also calling the Three-Body World "God", as if it has a religion-like feeling, or have you converted to the Three-Body faith?



llama_print_timings:        load time =   620.15 ms
llama_print_timings:      sample time =    17.67 ms /    39 runs   (    0.45 ms per token,  2207.51 tokens per second)
llama_print_timings: prompt eval time =   620.11 ms /    67 tokens (    9.26 ms per token,   108.05 tokens per second)
llama_print_timings:        eval time =  3258.08 ms /    38 runs   (   85.74 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3960.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: No, it's just a habit. . . I don't want to talk about this problem anymore.



llama_print_timings:        load time =   576.49 ms
llama_print_timings:      sample time =    13.78 ms /    31 runs   (    0.44 ms per token,  2248.98 tokens per second)
llama_print_timings: prompt eval time =   576.45 ms /    46 tokens (   12.53 ms per token,    79.80 tokens per second)
llama_print_timings:        eval time =  2535.34 ms /    30 runs   (   84.51 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  3174.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Surely you must have heard something about it, even if you do not know the details.



llama_print_timings:        load time =   606.28 ms
llama_print_timings:      sample time =     9.36 ms /    21 runs   (    0.45 ms per token,  2243.83 tokens per second)
llama_print_timings: prompt eval time =   606.26 ms /    64 tokens (    9.47 ms per token,   105.56 tokens per second)
llama_print_timings:        eval time =  1693.08 ms /    20 runs   (   84.65 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2340.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: It might just be rumors.



llama_print_timings:        load time =   575.36 ms
llama_print_timings:      sample time =     6.67 ms /    15 runs   (    0.44 ms per token,  2249.21 tokens per second)
llama_print_timings: prompt eval time =   575.32 ms /    39 tokens (   14.75 ms per token,    67.79 tokens per second)
llama_print_timings:        eval time =  1237.51 ms /    14 runs   (   88.39 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  1843.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Exactly.



llama_print_timings:        load time =   570.76 ms
llama_print_timings:      sample time =     1.77 ms /     4 runs   (    0.44 ms per token,  2258.61 tokens per second)
llama_print_timings: prompt eval time =   570.74 ms /    35 tokens (   16.31 ms per token,    61.32 tokens per second)
llama_print_timings:        eval time =   261.79 ms /     3 runs   (   87.26 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =   840.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

…



llama_print_timings:        load time =   583.41 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2277.90 tokens per second)
llama_print_timings: prompt eval time =   583.39 ms /    35 tokens (   16.67 ms per token,    59.99 tokens per second)
llama_print_timings:        eval time =    82.84 ms /     1 runs   (   82.84 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =   670.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Questioner: Has the Three-Body World taught the arrival party certain technologies that surpass humanity's current level of technology?



llama_print_timings:        load time =   579.29 ms
llama_print_timings:      sample time =    13.48 ms /    29 runs   (    0.46 ms per token,  2150.86 tokens per second)
llama_print_timings: prompt eval time =   579.24 ms /    49 tokens (   11.82 ms per token,    84.59 tokens per second)
llama_print_timings:        eval time =  2396.37 ms /    28 runs   (   85.58 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3037.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't think so, because those technologies are likely to fall into your hands.



llama_print_timings:        load time =   579.68 ms
llama_print_timings:      sample time =     9.21 ms /    20 runs   (    0.46 ms per token,  2172.73 tokens per second)
llama_print_timings: prompt eval time =   579.66 ms /    46 tokens (   12.60 ms per token,    79.36 tokens per second)
llama_print_timings:        eval time =  1637.23 ms /    19 runs   (   86.17 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2258.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The last question, and the most important one: Have there been any other means of communication between the Three-Body World and Earth so far?



llama_print_timings:        load time =   589.30 ms
llama_print_timings:      sample time =    13.51 ms /    30 runs   (    0.45 ms per token,  2220.58 tokens per second)
llama_print_timings: prompt eval time =   589.26 ms /    52 tokens (   11.33 ms per token,    88.25 tokens per second)
llama_print_timings:        eval time =  2426.94 ms /    29 runs   (   83.69 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  3076.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Almost.



llama_print_timings:        load time =   570.17 ms
llama_print_timings:      sample time =     1.83 ms /     4 runs   (    0.46 ms per token,  2186.99 tokens per second)
llama_print_timings: prompt eval time =   570.13 ms /    36 tokens (   15.84 ms per token,    63.14 tokens per second)
llama_print_timings:        eval time =   265.03 ms /     3 runs   (   88.34 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =   843.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The interrogator: Almost.



llama_print_timings:        load time =   564.67 ms
llama_print_timings:      sample time =     4.04 ms /     9 runs   (    0.45 ms per token,  2229.93 tokens per second)
llama_print_timings: prompt eval time =   564.63 ms /    35 tokens (   16.13 ms per token,    61.99 tokens per second)
llama_print_timings:        eval time =   709.18 ms /     8 runs   (   88.65 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  1291.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leo Ye Wen-Zhi: The technological leap that led to a space velocity of one tenth of the speed of light, which happened several thousand years before the launch of a small probe still on its way to pass by the human constellation and solar system roughly halfway.



llama_print_timings:        load time =   670.15 ms
llama_print_timings:      sample time =    27.93 ms /    63 runs   (    0.44 ms per token,  2255.64 tokens per second)
llama_print_timings: prompt eval time =   670.11 ms /   102 tokens (    6.57 ms per token,   152.21 tokens per second)
llama_print_timings:        eval time =  5356.49 ms /    62 runs   (   86.40 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  6153.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The interrogator asked, "Here's a question: if the Three Body Fleet is travelling at one-tenth of the speed of light, it should take forty years to reach the Sun and yet you say that it would need four hundred years. Why?"



llama_print_timings:        load time =   618.53 ms
llama_print_timings:      sample time =    26.19 ms /    57 runs   (    0.46 ms per token,  2176.32 tokens per second)
llama_print_timings: prompt eval time =   618.48 ms /    66 tokens (    9.37 ms per token,   106.71 tokens per second)
llama_print_timings:        eval time =  4730.72 ms /    56 runs   (   84.48 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  5470.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$叶文洁：The Three-Body fleet is huge and their acceleration speed is slow, reaching only 1/10th of the light speed. They need a lot of time to gather antimatter for the accelerating process. Therefore, they cannot make fast progress.



llama_print_timings:        load time =   799.73 ms
llama_print_timings:      sample time =    25.98 ms /    58 runs   (    0.45 ms per token,  2232.40 tokens per second)
llama_print_timings: prompt eval time =   799.68 ms /   172 tokens (    4.65 ms per token,   215.09 tokens per second)
llama_print_timings:        eval time =  4871.67 ms /    57 runs   (   85.47 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  5789.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, what exactly did you mean by "almost" in that sentence?



llama_print_timings:        load time =   576.93 ms
llama_print_timings:      sample time =     7.53 ms /    17 runs   (    0.44 ms per token,  2257.94 tokens per second)
llama_print_timings: prompt eval time =   576.89 ms /    41 tokens (   14.07 ms per token,    71.07 tokens per second)
llama_print_timings:        eval time =  1368.30 ms /    16 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1978.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie: We are discussing the speed of space travel within a limited range. Beyond this range, even the backward humans have already been able to accelerate some matter into almost the speed of light.



llama_print_timings:        load time =   621.83 ms
llama_print_timings:      sample time =    22.26 ms /    48 runs   (    0.46 ms per token,  2156.72 tokens per second)
llama_print_timings: prompt eval time =   621.79 ms /    69 tokens (    9.01 ms per token,   110.97 tokens per second)
llama_print_timings:        eval time =  4015.70 ms /    47 runs   (   85.44 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  4738.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Asked questioner pauses for a moment: The limits you are referring to, do they refer to the macroscopic level? In the microscopic realm, humans can already accelerate subatomic particles almost to the speed of light, which would be considered as those substance entities you were talking about.



llama_print_timings:        load time =   654.48 ms
llama_print_timings:      sample time =    29.67 ms /    67 runs   (    0.44 ms per token,  2258.17 tokens per second)
llama_print_timings: prompt eval time =   654.44 ms /    82 tokens (    7.98 ms per token,   125.30 tokens per second)
llama_print_timings:        eval time =  5730.10 ms /    66 runs   (   86.82 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  6520.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$$葉文潔：You are very smart.



llama_print_timings:        load time =   576.37 ms
llama_print_timings:      sample time =     4.81 ms /    11 runs   (    0.44 ms per token,  2286.43 tokens per second)
llama_print_timings: prompt eval time =   576.34 ms /    37 tokens (   15.58 ms per token,    64.20 tokens per second)
llama_print_timings:        eval time =   825.98 ms /    10 runs   (   82.60 ms per token,    12.11 tokens per second)
llama_print_timings:       total time =  1423.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The expert is standing right behind me.



llama_print_timings:        load time =   578.75 ms
llama_print_timings:      sample time =     3.96 ms /     9 runs   (    0.44 ms per token,  2272.15 tokens per second)
llama_print_timings: prompt eval time =   578.72 ms /    45 tokens (   12.86 ms per token,    77.76 tokens per second)
llama_print_timings:        eval time =   705.15 ms /     8 runs   (   88.14 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  1301.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenhuan: Yes, they are microscopic particles. Six years ago, in the distant Semigallians galaxy, a三体世界 accelerated two hydrogen nuclei to almost light speed and fired them toward our solar system. The two protons, which arrived at our solar system just two years ago, then reached Earth.



llama_print_timings:        load time =   651.30 ms
llama_print_timings:      sample time =    33.75 ms /    74 runs   (    0.46 ms per token,  2192.53 tokens per second)
llama_print_timings: prompt eval time =   651.25 ms /    93 tokens (    7.00 ms per token,   142.80 tokens per second)
llama_print_timings:        eval time =  6255.68 ms /    73 runs   (   85.69 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  7065.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Examiner: Two protons? They only sent us two protons? That's practically nothing.



llama_print_timings:        load time =   605.76 ms
llama_print_timings:      sample time =    10.79 ms /    24 runs   (    0.45 ms per token,  2223.25 tokens per second)
llama_print_timings: prompt eval time =   605.72 ms /    54 tokens (   11.22 ms per token,    89.15 tokens per second)
llama_print_timings:        eval time =  1969.12 ms /    23 runs   (   85.61 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  2625.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Le Wenjie (smiling): You also said “almost”. Only the Three Body World has this ability, and they can only send two protons that are this big close to light speed to a distance of four light years.



llama_print_timings:        load time =   628.20 ms
llama_print_timings:      sample time =    22.49 ms /    50 runs   (    0.45 ms per token,  2222.91 tokens per second)
llama_print_timings: prompt eval time =   628.16 ms /    77 tokens (    8.16 ms per token,   122.58 tokens per second)
llama_print_timings:        eval time =  4055.22 ms /    49 runs   (   82.76 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =  4787.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the macro world, two protons are equivalent to nothing at all — even a hair of a bacterium contains tens of billions of protons. What does this mean?



llama_print_timings:        load time =   621.51 ms
llama_print_timings:      sample time =    17.18 ms /    38 runs   (    0.45 ms per token,  2211.62 tokens per second)
llama_print_timings: prompt eval time =   621.47 ms /    66 tokens (    9.42 ms per token,   106.20 tokens per second)
llama_print_timings:        eval time =  3155.71 ms /    37 runs   (   85.29 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  3856.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It's a lock.



llama_print_timings:        load time =   577.29 ms
llama_print_timings:      sample time =     3.06 ms /     7 runs   (    0.44 ms per token,  2290.58 tokens per second)
llama_print_timings: prompt eval time =   577.26 ms /    37 tokens (   15.60 ms per token,    64.10 tokens per second)
llama_print_timings:        eval time =   505.74 ms /     6 runs   (   84.29 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1097.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A: A lock.



llama_print_timings:        load time =   571.46 ms
llama_print_timings:      sample time =     2.79 ms /     6 runs   (    0.46 ms per token,  2152.85 tokens per second)
llama_print_timings: prompt eval time =   571.42 ms /    38 tokens (   15.04 ms per token,    66.50 tokens per second)
llama_print_timings:        eval time =   409.86 ms /     5 runs   (   81.97 ms per token,    12.20 tokens per second)
llama_print_timings:       total time =   994.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The science that locks down humanity will not make any major progress until the arrival of two protons, which is said to be within half a century from now. The date has been proclaimed by Prof. Imwins as "the day of the death of mankind's science".



llama_print_timings:        load time =   650.52 ms
llama_print_timings:      sample time =    27.81 ms /    61 runs   (    0.46 ms per token,  2193.69 tokens per second)
llama_print_timings: prompt eval time =   650.48 ms /    87 tokens (    7.48 ms per token,   133.75 tokens per second)
llama_print_timings:        eval time =  5129.56 ms /    60 runs   (   85.49 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  5908.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

**Asker:** This is so unbelievable, how did you manage to do it?



llama_print_timings:        load time =   573.72 ms
llama_print_timings:      sample time =    10.26 ms /    23 runs   (    0.45 ms per token,  2241.50 tokens per second)
llama_print_timings: prompt eval time =   573.68 ms /    44 tokens (   13.04 ms per token,    76.70 tokens per second)
llama_print_timings:        eval time =  1936.19 ms /    22 runs   (   88.01 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  2556.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie: I don't know. I really don't know. In the eyes of the Trisolarian civilization, we might not even be considered as humans at all; in their eyes we might just be a bunch of bugs.



llama_print_timings:        load time =   597.88 ms
llama_print_timings:      sample time =    25.12 ms /    56 runs   (    0.45 ms per token,  2229.12 tokens per second)
llama_print_timings: prompt eval time =   597.69 ms /    56 tokens (   10.67 ms per token,    93.69 tokens per second)
llama_print_timings:        eval time =  4761.74 ms /    55 runs   (   86.58 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  5476.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   583.46 ms
llama_print_timings:      sample time =     3.39 ms /     7 runs   (    0.48 ms per token,  2066.12 tokens per second)
llama_print_timings: prompt eval time =   583.44 ms /    45 tokens (   12.97 ms per token,    77.13 tokens per second)
llama_print_timings:        eval time =   507.87 ms /     6 runs   (   84.64 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  1106.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They had just left the battle center when it was nearly midnight, having just listened to a conversation they heard above.



llama_print_timings:        load time =   586.78 ms
llama_print_timings:      sample time =    11.31 ms /    25 runs   (    0.45 ms per token,  2210.63 tokens per second)
llama_print_timings: prompt eval time =   586.74 ms /    49 tokens (   11.97 ms per token,    83.51 tokens per second)
llama_print_timings:        eval time =  2039.72 ms /    24 runs   (   84.99 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2678.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do you believe what Yvonne said?



llama_print_timings:        load time =   583.09 ms
llama_print_timings:      sample time =     4.47 ms /    10 runs   (    0.45 ms per token,  2237.64 tokens per second)
llama_print_timings: prompt eval time =   583.05 ms /    43 tokens (   13.56 ms per token,    73.75 tokens per second)
llama_print_timings:        eval time =   815.69 ms /     9 runs   (   90.63 ms per token,    11.03 tokens per second)
llama_print_timings:       total time =  1419.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Are you okay?



llama_print_timings:        load time =   567.19 ms
llama_print_timings:      sample time =     2.21 ms /     5 runs   (    0.44 ms per token,  2263.47 tokens per second)
llama_print_timings: prompt eval time =   567.15 ms /    37 tokens (   15.33 ms per token,    65.24 tokens per second)
llama_print_timings:        eval time =   335.67 ms /     4 runs   (   83.92 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =   912.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"There are some things that are just too bizarre to believe, but using two protons to immobilize the entire human race? That's...



llama_print_timings:        load time =   609.20 ms
llama_print_timings:      sample time =    15.91 ms /    34 runs   (    0.47 ms per token,  2137.69 tokens per second)
llama_print_timings: prompt eval time =   609.15 ms /    57 tokens (   10.69 ms per token,    93.57 tokens per second)
llama_print_timings:        eval time =  2875.83 ms /    33 runs   (   87.15 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  3559.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“First of all, the Three-Body Civilization sent two protons from the star Orion to reach Earth! From four light years away? This is an incredible shot that literally reached here and missed it by a hair. It’s like shooting at a fly on Medusa’s Head with a bullet from Neptune while both are moving in space.”



llama_print_timings:        load time =   691.86 ms
llama_print_timings:      sample time =    33.99 ms /    76 runs   (    0.45 ms per token,  2235.82 tokens per second)
llama_print_timings: prompt eval time =   691.82 ms /   115 tokens (    6.02 ms per token,   166.23 tokens per second)
llama_print_timings:        eval time =  6463.06 ms /    75 runs   (   86.17 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  7310.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When he heard the word "archer", Wang Miao's heart skipped a beat. "What does this mean?"



llama_print_timings:        load time =   587.36 ms
llama_print_timings:      sample time =    12.23 ms /    27 runs   (    0.45 ms per token,  2208.23 tokens per second)
llama_print_timings: prompt eval time =   587.31 ms /    51 tokens (   11.52 ms per token,    86.84 tokens per second)
llama_print_timings:        eval time =  2201.81 ms /    26 runs   (   84.68 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2845.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know. In your mind, what does a proton, neutron and electron look like as microscopic particles?



llama_print_timings:        load time =   612.92 ms
llama_print_timings:      sample time =    13.16 ms /    29 runs   (    0.45 ms per token,  2204.32 tokens per second)
llama_print_timings: prompt eval time =   612.88 ms /    54 tokens (   11.35 ms per token,    88.11 tokens per second)
llama_print_timings:        eval time =  2377.31 ms /    28 runs   (   84.90 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3050.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Almost a point, but it is structured.



llama_print_timings:        load time =   575.91 ms
llama_print_timings:      sample time =     5.33 ms /    12 runs   (    0.44 ms per token,  2253.52 tokens per second)
llama_print_timings: prompt eval time =   575.88 ms /    42 tokens (   13.71 ms per token,    72.93 tokens per second)
llama_print_timings:        eval time =   977.34 ms /    11 runs   (   88.85 ms per token,    11.26 tokens per second)
llama_print_timings:       total time =  1577.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's lucky that the image in my mind is more realistic than the actual thing," said Ding Yi, throwing away his remaining butts, "look over there? What is it?" He pointed to the butt lying on the ground.



llama_print_timings:        load time =   618.84 ms
llama_print_timings:      sample time =    24.63 ms /    55 runs   (    0.45 ms per token,  2232.87 tokens per second)
llama_print_timings: prompt eval time =   618.80 ms /    76 tokens (    8.14 ms per token,   122.82 tokens per second)
llama_print_timings:        eval time =  4669.81 ms /    54 runs   (   86.48 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  5404.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Filter for cigarettes.



llama_print_timings:        load time =   584.33 ms
llama_print_timings:      sample time =     3.54 ms /     8 runs   (    0.44 ms per token,  2257.97 tokens per second)
llama_print_timings: prompt eval time =   584.30 ms /    36 tokens (   16.23 ms per token,    61.61 tokens per second)
llama_print_timings:        eval time =   572.43 ms /     7 runs   (   81.78 ms per token,    12.23 tokens per second)
llama_print_timings:       total time =  1172.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It's a small thing from this distance. What is it?"



llama_print_timings:        load time =   577.64 ms
llama_print_timings:      sample time =     7.10 ms /    16 runs   (    0.44 ms per token,  2252.89 tokens per second)
llama_print_timings: prompt eval time =   577.60 ms /    43 tokens (   13.43 ms per token,    74.45 tokens per second)
llama_print_timings:        eval time =  1331.12 ms /    15 runs   (   88.74 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  1941.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Almost.



llama_print_timings:        load time =   577.11 ms
llama_print_timings:      sample time =     1.78 ms /     4 runs   (    0.44 ms per token,  2248.45 tokens per second)
llama_print_timings: prompt eval time =   577.09 ms /    36 tokens (   16.03 ms per token,    62.38 tokens per second)
llama_print_timings:        eval time =   252.11 ms /     3 runs   (   84.04 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =   837.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yeah.” Wangsen nodded. “So, what is the story with this cigarette? Is it yours?”



llama_print_timings:        load time =   685.96 ms
llama_print_timings:      sample time =    12.85 ms /    28 runs   (    0.46 ms per token,  2179.67 tokens per second)
llama_print_timings: prompt eval time =   685.91 ms /   115 tokens (    5.96 ms per token,   167.66 tokens per second)
llama_print_timings:        eval time =  2341.00 ms /    27 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  3086.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't smoke any kind of cigarettes anymore.



llama_print_timings:        load time =   570.18 ms
llama_print_timings:      sample time =     6.77 ms /    15 runs   (    0.45 ms per token,  2215.66 tokens per second)
llama_print_timings: prompt eval time =   570.14 ms /    38 tokens (   15.00 ms per token,    66.65 tokens per second)
llama_print_timings:        eval time =  1197.11 ms /    14 runs   (   85.51 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1797.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The reason why activated carbon has such a strong adsorption capacity is because of the microporous structure inside. When you put it in water, for example, the outside will absorb water, and the inside will absorb toxins or pollutants,” he explained. “So if there are some toxic substances, activated carbon can help clean them up.”



llama_print_timings:        load time =   676.97 ms
llama_print_timings:      sample time =    36.55 ms /    81 runs   (    0.45 ms per token,  2216.14 tokens per second)
llama_print_timings: prompt eval time =   676.92 ms /   111 tokens (    6.10 ms per token,   163.98 tokens per second)
llama_print_timings:        eval time =  6887.46 ms /    80 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  7734.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You want to say something?



llama_print_timings:        load time =   581.95 ms
llama_print_timings:      sample time =     3.05 ms /     7 runs   (    0.44 ms per token,  2292.83 tokens per second)
llama_print_timings: prompt eval time =   581.90 ms /    41 tokens (   14.19 ms per token,    70.46 tokens per second)
llama_print_timings:        eval time =   503.73 ms /     6 runs   (   83.95 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  1099.62 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The sponge or activated carbon in the filter is three-dimensional, but their adsorption surfaces are two-dimensional. Thus, a small three-dimensional structure can store an enormous two-dimensional structure. But there's a limit to how much higher dimension can be contained within the lower dimensional space since God is stingy and only gave us three dimensions of space in the Big Bang. But this does not mean that higher dimensions do not exist, as there are eight more dimensions trapped within the microcosm, plus the three-dimensional space in the macrocosm, thus, a total of eleven dimensions exist in the universe.”



llama_print_timings:        load time =   755.16 ms
llama_print_timings:      sample time =    62.51 ms /   136 runs   (    0.46 ms per token,  2175.76 tokens per second)
llama_print_timings: prompt eval time =   755.12 ms /   145 tokens (    5.21 ms per token,   192.02 tokens per second)
llama_print_timings:        eval time = 11692.49 ms /   135 runs   (   86.61 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 12736.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So what?



llama_print_timings:        load time =   577.78 ms
llama_print_timings:      sample time =     1.76 ms /     4 runs   (    0.44 ms per token,  2279.20 tokens per second)
llama_print_timings: prompt eval time =   577.74 ms /    35 tokens (   16.51 ms per token,    60.58 tokens per second)
llama_print_timings:        eval time =   252.46 ms /     3 runs   (   84.15 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =   837.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I only want to point out the following fact: an important marker of a technological civilization's level is its ability to manipulate and control microscopic dimensions. This control over chemical reactions began from our ancestors who, having found fire while huddling in caves, began to manipulate particles on one dimension at a time; they continued with steam engines, then generators, until now when humans can manipulate particles on a one-dimensional level with computers and nano materials. Unfortunately for them, the whole of this is limited to one-dimensional control over microscopic dimensions in the cosmos, which they consider inferior to their higher-level civilizations; alas, they are right.



llama_print_timings:        load time =   861.22 ms
llama_print_timings:      sample time =    65.04 ms /   145 runs   (    0.45 ms per token,  2229.36 tokens per second)
llama_print_timings: prompt eval time =   861.18 ms /   213 tokens (    4.04 ms per token,   247.34 tokens per second)
llama_print_timings:        eval time = 12640.47 ms /   144 runs   (   87.78 ms per token,    11.39 tokens per second)
llama_print_timings:       total time = 13805.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Can't you be more specific? What does this have to do with those two protons? After all, these two protons reached the Earth. They can do what?”



llama_print_timings:        load time =   676.95 ms
llama_print_timings:      sample time =    17.37 ms /    38 runs   (    0.46 ms per token,  2188.31 tokens per second)
llama_print_timings: prompt eval time =   676.92 ms /   109 tokens (    6.21 ms per token,   161.02 tokens per second)
llama_print_timings:        eval time =  3258.90 ms /    37 runs   (   88.08 ms per token,    11.35 tokens per second)
llama_print_timings:       total time =  4014.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I don't feel it, they are converted into energy in the fingers of the bacteria and that bacterium may not even feel anything.”



llama_print_timings:        load time =   590.99 ms
llama_print_timings:      sample time =    15.53 ms /    33 runs   (    0.47 ms per token,  2125.47 tokens per second)
llama_print_timings: prompt eval time =   590.95 ms /    56 tokens (   10.55 ms per token,    94.76 tokens per second)
llama_print_timings:        eval time =  2672.64 ms /    32 runs   (   83.52 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  3334.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What were you going to say just now?



llama_print_timings:        load time =   578.81 ms
llama_print_timings:      sample time =     4.40 ms /    10 runs   (    0.44 ms per token,  2274.28 tokens per second)
llama_print_timings: prompt eval time =   578.76 ms /    36 tokens (   16.08 ms per token,    62.20 tokens per second)
llama_print_timings:        eval time =   759.09 ms /     9 runs   (   84.34 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1359.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't have anything to say, I know nothing. What can a bug know?



llama_print_timings:        load time =   576.50 ms
llama_print_timings:      sample time =     9.12 ms /    20 runs   (    0.46 ms per token,  2191.78 tokens per second)
llama_print_timings: prompt eval time =   576.46 ms /    46 tokens (   12.53 ms per token,    79.80 tokens per second)
llama_print_timings:        eval time =  1633.18 ms /    19 runs   (   85.96 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  2252.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are a physicist among bugs, and you know more than me. About this matter, at least you haven't been as confused as I am. Even if I asked for help, maybe tonight I won't sleep well.”



llama_print_timings:        load time =   629.61 ms
llama_print_timings:      sample time =    22.92 ms /    52 runs   (    0.44 ms per token,  2268.56 tokens per second)
llama_print_timings: prompt eval time =   629.57 ms /    73 tokens (    8.62 ms per token,   115.95 tokens per second)
llama_print_timings:        eval time =  4394.36 ms /    51 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  5129.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“If I keep talking, you probably won't sleep well. Forget it and learn to be like Wei Cheng and Dashu, who are so cool and accomplish their goals in their own way.”



llama_print_timings:        load time =   659.01 ms
llama_print_timings:      sample time =    19.64 ms /    44 runs   (    0.45 ms per token,  2239.87 tokens per second)
llama_print_timings: prompt eval time =   658.97 ms /    85 tokens (    7.75 ms per token,   128.99 tokens per second)
llama_print_timings:        eval time =  3758.97 ms /    43 runs   (   87.42 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  4507.95 ms


translated 73.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“No problem, I’m not radioactive anymore.” Shi Qiang spoke to Wang Sen who sat next to him. “These past few days, they searched everywhere for me but still couldn't find anything, hehe.”



llama_print_timings:        load time =   660.31 ms
llama_print_timings:      sample time =    23.31 ms /    51 runs   (    0.46 ms per token,  2188.09 tokens per second)
llama_print_timings: prompt eval time =   660.28 ms /    97 tokens (    6.81 ms per token,   146.91 tokens per second)
llama_print_timings:        eval time =  4341.70 ms /    50 runs   (   86.83 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  5110.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Strong said, reaching into the ashtray on the conference table and taking out a cigar butt, lighting it up, nodding his head in contentment while letting the smoke wash over to the faces of all the participants, including Captain Stanley Don, an American marine corps colonel who gave him a disdainful look.



llama_print_timings:        load time =   678.10 ms
llama_print_timings:      sample time =    33.09 ms /    73 runs   (    0.45 ms per token,  2206.10 tokens per second)
llama_print_timings: prompt eval time =   678.05 ms /   107 tokens (    6.34 ms per token,   157.80 tokens per second)
llama_print_timings:        eval time =  6234.14 ms /    72 runs   (   86.59 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  7063.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

There were more foreign soldiers present this time, and all of them were wearing military uniforms. This was the first time in human history that all the world's armed forces would confront a common enemy.



llama_print_timings:        load time =   598.31 ms
llama_print_timings:      sample time =    19.51 ms /    43 runs   (    0.45 ms per token,  2203.88 tokens per second)
llama_print_timings: prompt eval time =   598.26 ms /    59 tokens (   10.14 ms per token,    98.62 tokens per second)
llama_print_timings:        eval time =  3620.87 ms /    42 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  4308.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

General Chang Weisi said, "Comrades, everyone present has a basic understanding of the current situation. According to Dr. Shi's words, information is equalized. The war against the alien invaders from outer space has begun. Although our descendants will truly face the Tertis invaders four and a half centuries later, we are still fighting against these humans who have betrayed their own species; however, in essence, these humans can also be considered to be enemies coming from outside the earth's civilization. Our next target is clear: to capture the ‘Judgment Day’ transport ship that has been intercepted and seize the crucial information contained within it. These pieces of information may have great significance for the survival of humanity."



llama_print_timings:        load time =   801.44 ms
llama_print_timings:      sample time =    72.94 ms /   162 runs   (    0.45 ms per token,  2221.16 tokens per second)
llama_print_timings: prompt eval time =   801.39 ms /   170 tokens (    4.71 ms per token,   212.13 tokens per second)
llama_print_timings:        eval time = 14126.20 ms /   161 runs   (   87.74 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 15265.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We haven’t bothered to activate the Judgement Day, and the ship is currently sailing in a legal manner across the Atlantic Ocean. It has applied to the Panama Canal Authority for entry through the canal four days from now. This is an excellent opportunity for us to act. As with any situation, there is no guarantee that such an opportunity will ever come again. Now, various operational centers around the world are working on their plans and these ones will be selected and decided by headquarters within ten hours. Our task at this meeting is to discuss and narrow down the options for submission to headquarters.”



llama_print_timings:        load time =   765.27 ms
llama_print_timings:      sample time =    56.69 ms /   128 runs   (    0.44 ms per token,  2257.89 tokens per second)
llama_print_timings: prompt eval time =   765.24 ms /   154 tokens (    4.97 ms per token,   201.24 tokens per second)
llama_print_timings:        eval time = 11042.12 ms /   127 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 12068.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Please note that all the plans must ensure the following: ensure the security of ‘Judgment Day’ information and seize it. ‘Judgment Day’ is a cargo ship modified as an oil tanker, with additional structures added to both the hull and the interior which are said to require the crew to use maps while navigating unfamiliar regions. We have little understanding of ‘Judgment Day’ computer center’s exact location or whether ‘Judgments Day’ information is stored in the server at the computer center, nor do we know the number of backup copies. The only way to achieve our goals is by fully capturing and controlling ‘Judgements Day’, which is the most difficult part, as it involves avoiding enemy deletion of the ‘Judgments Day’ information during the attack operation. Deletion of this information is extremely easy, as the enemy may not delete it in an emergency because recovery is relatively easy; however, they must lose their ability to operate within a short period of time after the target is


llama_print_timings:        load time =  1089.83 ms
llama_print_timings:      sample time =   126.96 ms /   283 runs   (    0.45 ms per token,  2228.98 tokens per second)
llama_print_timings: prompt eval time =  1089.78 ms /   319 tokens (    3.42 ms per token,   292.72 tokens per second)
llama_print_timings:        eval time = 25199.83 ms /   282 runs   (   89.36 ms per token,    11.19 tokens per second)
llama_print_timings:       total time = 26888.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

One Japanese Self-Defense Force officer said, "We believe the only possible successful operation is to take advantage of our reconnaissance personnel who are familiar with the storage location of information aboard the Judgement Day's internal structures before action.



llama_print_timings:        load time =   662.66 ms
llama_print_timings:      sample time =    23.16 ms /    52 runs   (    0.45 ms per token,  2244.77 tokens per second)
llama_print_timings: prompt eval time =   662.63 ms /    82 tokens (    8.08 ms per token,   123.75 tokens per second)
llama_print_timings:        eval time =  4402.48 ms /    51 runs   (   86.32 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  5171.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Someone asked, “Is there a sleeper inside the NATO intelligence agency and CIA for ‘Judgment Day’?”



llama_print_timings:        load time =   609.45 ms
llama_print_timings:      sample time =    12.79 ms /    29 runs   (    0.44 ms per token,  2266.69 tokens per second)
llama_print_timings: prompt eval time =   609.41 ms /    60 tokens (   10.16 ms per token,    98.46 tokens per second)
llama_print_timings:        eval time =  2387.90 ms /    28 runs   (   85.28 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  3057.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

There is no.



llama_print_timings:        load time =   584.53 ms
llama_print_timings:      sample time =     2.23 ms /     5 runs   (    0.45 ms per token,  2241.15 tokens per second)
llama_print_timings: prompt eval time =   584.49 ms /    38 tokens (   15.38 ms per token,    65.01 tokens per second)
llama_print_timings:        eval time =   307.98 ms /     4 runs   (   77.00 ms per token,    12.99 tokens per second)
llama_print_timings:       total time =   903.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That’s just nonsense.” said Dash with a wave of his hand. “We have so much more to talk about.”



llama_print_timings:        load time =   600.56 ms
llama_print_timings:      sample time =    13.68 ms /    30 runs   (    0.46 ms per token,  2192.50 tokens per second)
llama_print_timings: prompt eval time =   600.52 ms /    53 tokens (   11.33 ms per token,    88.26 tokens per second)
llama_print_timings:        eval time =  2450.85 ms /    29 runs   (   84.51 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  3114.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Colonel Stone said, "To destroy a closed structure and its occupants without damaging other facilities, we first came to mind ball lightning weapons."



llama_print_timings:        load time =   606.23 ms
llama_print_timings:      sample time =    14.21 ms /    32 runs   (    0.44 ms per token,  2251.30 tokens per second)
llama_print_timings: prompt eval time =   606.19 ms /    63 tokens (    9.62 ms per token,   103.93 tokens per second)
llama_print_timings:        eval time =  2660.51 ms /    31 runs   (   85.82 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3332.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, ” said Ding Yi shaking his head. “This type of weapon is known to everyone; we don’t know if the ship has a shield magnetic bolt of bolide; even if it does not, bolides cannot guarantee exterminate within a short time period; and, even when they release their energy, bolides can remain in the air for some seconds before being released; therefore, they will have enough time to take destructive action against us.”



llama_print_timings:        load time =   736.49 ms
llama_print_timings:      sample time =    45.15 ms /   100 runs   (    0.45 ms per token,  2214.74 tokens per second)
llama_print_timings: prompt eval time =   736.45 ms /   132 tokens (    5.58 ms per token,   179.24 tokens per second)
llama_print_timings:        eval time =  8520.08 ms /    99 runs   (   86.06 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  9468.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Colonel Stone said, “So what about the Chinese bullet?”



llama_print_timings:        load time =   578.91 ms
llama_print_timings:      sample time =     6.41 ms /    14 runs   (    0.46 ms per token,  2182.73 tokens per second)
llama_print_timings: prompt eval time =   578.88 ms /    42 tokens (   13.78 ms per token,    72.55 tokens per second)
llama_print_timings:        eval time =  1143.99 ms /    13 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  1753.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Colonel, you should know that's not possible!” a Russian officer said. “Nuclear radiation can't instantly kill, and the remaining enemies on board have enough time to hold this meeting. ”



llama_print_timings:        load time =   619.98 ms
llama_print_timings:      sample time =    20.49 ms /    45 runs   (    0.46 ms per token,  2196.51 tokens per second)
llama_print_timings: prompt eval time =   619.94 ms /    74 tokens (    8.38 ms per token,   119.37 tokens per second)
llama_print_timings:        eval time =  3771.45 ms /    44 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  4485.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Another option would be neurotoxins, but they have a release and diffusion process within the ship that wouldn't achieve the desired result.



llama_print_timings:        load time =   601.79 ms
llama_print_timings:      sample time =    14.55 ms /    32 runs   (    0.45 ms per token,  2198.86 tokens per second)
llama_print_timings: prompt eval time =   601.74 ms /    61 tokens (    9.86 ms per token,   101.37 tokens per second)
llama_print_timings:        eval time =  2667.01 ms /    31 runs   (   86.03 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3334.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The rest of the options are bombs and sub-sonic weapons.” said Colonel Stanton, who waited in anticipation for his next sentence, but it was never uttered.



llama_print_timings:        load time =   610.56 ms
llama_print_timings:      sample time =    17.31 ms /    39 runs   (    0.44 ms per token,  2252.90 tokens per second)
llama_print_timings: prompt eval time =   610.51 ms /    63 tokens (    9.69 ms per token,   103.19 tokens per second)
llama_print_timings:        eval time =  3266.28 ms /    38 runs   (   85.95 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3957.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Big Story said, "The shake bombs are the equipment used by our police, they can actually knock out people inside a building at once, but they may only work for a single room at present."



llama_print_timings:        load time =   631.68 ms
llama_print_timings:      sample time =    19.49 ms /    44 runs   (    0.44 ms per token,  2257.22 tokens per second)
llama_print_timings: prompt eval time =   631.63 ms /    77 tokens (    8.20 ms per token,   121.91 tokens per second)
llama_print_timings:        eval time =  3638.26 ms /    43 runs   (   84.61 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  4357.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

No, even if there was, that much explosive wouldn't be able to destroy the facilities on board.



llama_print_timings:        load time =   597.08 ms
llama_print_timings:      sample time =    10.74 ms /    24 runs   (    0.45 ms per token,  2234.22 tokens per second)
llama_print_timings: prompt eval time =   597.02 ms /    55 tokens (   10.85 ms per token,    92.12 tokens per second)
llama_print_timings:        eval time =  1988.40 ms /    23 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2635.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What about sub-sonic weapons?” someone asks.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The weapon is still in the experimental stage and cannot be used in combat. Specifically, that ship is too large to be effectively attacked simultaneously with the current test levels of secondary sonar weapons, at least it will not cause any serious damage.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Haha, said the General with a cigar no bigger than a pea, "I guess we've been going on about this too long now. Just remember the Commander-in-Chief's words: time is of the essence! Now come on soldier, you know what to do." With a broad smile he turned to the interpreter, a handsome female lieutenant, and said, "Get back, soldier. We got our message across."



llama_print_timings:        load time =   677.99 ms
llama_print_timings:      sample time =    42.85 ms /    95 runs   (    0.45 ms per token,  2216.83 tokens per second)
llama_print_timings: prompt eval time =   677.85 ms /   102 tokens (    6.65 ms per token,   150.48 tokens per second)
llama_print_timings:        eval time =  8040.04 ms /    94 runs   (   85.53 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  8918.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Butstanly seemed to understand. He pointed at Shi Qiang and said, “This policeman has no right to speak like this.”



llama_print_timings:        load time =   611.51 ms
llama_print_timings:      sample time =    14.04 ms /    31 runs   (    0.45 ms per token,  2208.13 tokens per second)
llama_print_timings: prompt eval time =   611.49 ms /    61 tokens (   10.02 ms per token,    99.76 tokens per second)
llama_print_timings:        eval time =  2567.95 ms /    30 runs   (   85.60 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3243.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What about your qualifications?" the teacher asked.



llama_print_timings:        load time =   579.60 ms
llama_print_timings:      sample time =     5.33 ms /    12 runs   (    0.44 ms per token,  2250.56 tokens per second)
llama_print_timings: prompt eval time =   579.56 ms /    40 tokens (   14.49 ms per token,    69.02 tokens per second)
llama_print_timings:        eval time =   953.87 ms /    11 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  1558.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Colonel Stanton is a senior special operations expert who has taken part in almost all major military actions since the Vietnam War.



llama_print_timings:        load time =   600.24 ms
llama_print_timings:      sample time =    11.84 ms /    26 runs   (    0.46 ms per token,  2195.58 tokens per second)
llama_print_timings: prompt eval time =   600.21 ms /    58 tokens (   10.35 ms per token,    96.63 tokens per second)
llama_print_timings:        eval time =  2135.36 ms /    25 runs   (   85.41 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  2791.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That tells you my qualifications. Twenty years ago, my reconnaissance platoon penetrated deep into the enemy's rear, occupied a well-fortified hydroelectric plant and prevented them from sabotaging the dam to hamper our offensive. That is why I have qualifications: I conquered and defeated your enemies.



llama_print_timings:        load time =   656.07 ms
llama_print_timings:      sample time =    32.84 ms /    73 runs   (    0.45 ms per token,  2222.56 tokens per second)
llama_print_timings: prompt eval time =   656.03 ms /    94 tokens (    6.98 ms per token,   143.29 tokens per second)
llama_print_timings:        eval time =  6147.88 ms /    72 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  6955.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Enough of this, ” said Chang Weisi, pounding his table with his fist. “Tell me your plan.”



llama_print_timings:        load time =   602.91 ms
llama_print_timings:      sample time =    13.42 ms /    30 runs   (    0.45 ms per token,  2235.47 tokens per second)
llama_print_timings: prompt eval time =   602.87 ms /    55 tokens (   10.96 ms per token,    91.23 tokens per second)
llama_print_timings:        eval time =  2518.81 ms /    29 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  3183.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't see the need to waste my time on this policeman.



llama_print_timings:        load time =   590.17 ms
llama_print_timings:      sample time =     7.69 ms /    17 runs   (    0.45 ms per token,  2209.80 tokens per second)
llama_print_timings: prompt eval time =   590.13 ms /    52 tokens (   11.35 ms per token,    88.12 tokens per second)
llama_print_timings:        eval time =  1387.76 ms /    16 runs   (   86.73 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2012.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Without waiting for the translator, Big Story jumped up and said: "Police! I have heard this word twice now. What's wrong with looking down on the police? To shake a pile of bombs to blow up that big ship into scraps, but if you are able to take something out from inside completely without causing any damage, don't be so arrogant as the soldiers who hold stars on their shoulders. These things can be handled by criminal masters. Know how much they can handle? I have dealt with a case in which criminals can steal a section of running train and make the rest of it complete without any damage to the vehicle, using only a steel wire rope and several iron hooks." "This is special operation expert! But even like me who has been working in criminal investigation for many years at the grassroots, I have received the best training and education from them.



llama_print_timings:        load time =   878.63 ms
llama_print_timings:      sample time =    88.47 ms /   196 runs   (    0.45 ms per token,  2215.34 tokens per second)
llama_print_timings: prompt eval time =   878.58 ms /   223 tokens (    3.94 ms per token,   253.82 tokens per second)
llama_print_timings:        eval time = 17223.08 ms /   195 runs   (   88.32 ms per token,    11.32 tokens per second)
llama_print_timings:       total time = 18518.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Don't speak until you have a plan!



llama_print_timings:        load time =   601.57 ms
llama_print_timings:      sample time =     5.27 ms /    11 runs   (    0.48 ms per token,  2086.89 tokens per second)
llama_print_timings: prompt eval time =   601.54 ms /    50 tokens (   12.03 ms per token,    83.12 tokens per second)
llama_print_timings:        eval time =   882.14 ms /    10 runs   (   88.21 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =  1508.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“There are so many important people here, I was afraid that I might not get a chance to speak my mind. If I had spoken up and you would have disagreed with me, it might look unprofessional of me.”



llama_print_timings:        load time =   595.85 ms
llama_print_timings:      sample time =    22.42 ms /    49 runs   (    0.46 ms per token,  2185.65 tokens per second)
llama_print_timings: prompt eval time =   595.80 ms /    55 tokens (   10.83 ms per token,    92.31 tokens per second)
llama_print_timings:        eval time =  4074.20 ms /    48 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  4773.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have already arrived home! Hurry up. Say your incantation quickly!



llama_print_timings:        load time =   585.21 ms
llama_print_timings:      sample time =     8.47 ms /    18 runs   (    0.47 ms per token,  2125.40 tokens per second)
llama_print_timings: prompt eval time =   585.17 ms /    48 tokens (   12.19 ms per token,    82.03 tokens per second)
llama_print_timings:        eval time =  1474.71 ms /    17 runs   (   86.75 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2099.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$He picked up a pen and drew two parallel lines on the desk. "This is the canal, " he said, placing a cigar ashtray between the lines. "And this is The Day of Judgement." Then he brushed past the desk and tore the cigar out of Stanfield's hand.



llama_print_timings:        load time =   659.36 ms
llama_print_timings:      sample time =    32.73 ms /    71 runs   (    0.46 ms per token,  2169.20 tokens per second)
llama_print_timings: prompt eval time =   659.30 ms /    92 tokens (    7.17 ms per token,   139.54 tokens per second)
llama_print_timings:        eval time =  5950.61 ms /    70 runs   (   85.01 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  6761.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I can't tolerate this idiot!” the captain shouted.



llama_print_timings:        load time =   584.95 ms
llama_print_timings:      sample time =     7.73 ms /    17 runs   (    0.45 ms per token,  2198.94 tokens per second)
llama_print_timings: prompt eval time =   584.91 ms /    45 tokens (   13.00 ms per token,    76.93 tokens per second)
llama_print_timings:        eval time =  1374.65 ms /    16 runs   (   85.92 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  1994.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Shiqiang, go outside.” said CWX firmly.



llama_print_timings:        load time =   580.18 ms
llama_print_timings:      sample time =     7.51 ms /    17 runs   (    0.44 ms per token,  2264.85 tokens per second)
llama_print_timings: prompt eval time =   580.15 ms /    43 tokens (   13.49 ms per token,    74.12 tokens per second)
llama_print_timings:        eval time =  1407.08 ms /    16 runs   (   87.94 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  2020.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Wait until I finish speaking, and then we'll have a handshake for one minute," said Dashi. "But first, let's start our conversation in English."



llama_print_timings:        load time =   594.36 ms
llama_print_timings:      sample time =    17.27 ms /    39 runs   (    0.44 ms per token,  2258.91 tokens per second)
llama_print_timings: prompt eval time =   594.32 ms /    51 tokens (   11.65 ms per token,    85.81 tokens per second)
llama_print_timings:        eval time =  3176.80 ms /    38 runs   (   83.60 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  3850.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What? The Colonel did not understand.



llama_print_timings:        load time =   577.28 ms
llama_print_timings:      sample time =     3.98 ms /     9 runs   (    0.44 ms per token,  2262.44 tokens per second)
llama_print_timings: prompt eval time =   577.26 ms /    39 tokens (   14.80 ms per token,    67.56 tokens per second)
llama_print_timings:        eval time =   698.41 ms /     8 runs   (   87.30 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  1293.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Give me another one."



llama_print_timings:        load time =   576.40 ms
llama_print_timings:      sample time =     3.73 ms /     8 runs   (    0.47 ms per token,  2146.50 tokens per second)
llama_print_timings: prompt eval time =   576.36 ms /    35 tokens (   16.47 ms per token,    60.73 tokens per second)
llama_print_timings:        eval time =   567.52 ms /     7 runs   (   81.07 ms per token,    12.33 tokens per second)
llama_print_timings:       total time =  1162.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Stanton hesitated before pulling a cigar from a delicate wooden box and passing it to Shi Qian, who positioned the first cigar on his desk horizontally at one end so that it stood in a painting of the Panama Canal on one side, while he smoothed out the other end so that it stood vertically in the image on the opposite side.



llama_print_timings:        load time =   667.10 ms
llama_print_timings:      sample time =    35.95 ms /    80 runs   (    0.45 ms per token,  2225.25 tokens per second)
llama_print_timings: prompt eval time =   667.06 ms /    99 tokens (    6.74 ms per token,   148.41 tokens per second)
llama_print_timings:        eval time =  6848.54 ms /    79 runs   (   86.69 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  7681.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the banks of the canal, two columns were erected. Between these columns, many fine wires were stretched in a parallel manner, with an interval of about half a meter, and those nanomaterials made by Wang Zhenhe and his team were used for them.



llama_print_timings:        load time =   622.02 ms
llama_print_timings:      sample time =    27.04 ms /    59 runs   (    0.46 ms per token,  2181.87 tokens per second)
llama_print_timings: prompt eval time =   621.98 ms /    76 tokens (    8.18 ms per token,   122.19 tokens per second)
llama_print_timings:        eval time =  4883.83 ms /    58 runs   (   84.20 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  5631.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After saying this, Shi Qiang stood still for a few seconds and raised his hands towards the people who were still not reacting. "That's it," he said as he turned and walked away from the podium.



llama_print_timings:        load time =   625.07 ms
llama_print_timings:      sample time =    21.83 ms /    49 runs   (    0.45 ms per token,  2245.03 tokens per second)
llama_print_timings: prompt eval time =   625.03 ms /    66 tokens (    9.47 ms per token,   105.59 tokens per second)
llama_print_timings:        eval time =  4097.86 ms /    48 runs   (   85.37 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  4824.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The air froze, and everyone remained motionless like petrifaction. The hum of the computers around them seemed to be shivering with caution. No one knew how long it was before someone finally broke the silence:



llama_print_timings:        load time =   615.57 ms
llama_print_timings:      sample time =    21.21 ms /    47 runs   (    0.45 ms per token,  2215.52 tokens per second)
llama_print_timings: prompt eval time =   615.52 ms /    70 tokens (    8.79 ms per token,   113.72 tokens per second)
llama_print_timings:        eval time =  3885.20 ms /    46 runs   (   84.46 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  4599.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Professor Jiang, ‘Flying Blade’ is filiform?



llama_print_timings:        load time =   573.59 ms
llama_print_timings:      sample time =     7.54 ms /    17 runs   (    0.44 ms per token,  2253.15 tokens per second)
llama_print_timings: prompt eval time =   573.55 ms /    44 tokens (   13.04 ms per token,    76.71 tokens per second)
llama_print_timings:        eval time =  1373.34 ms /    16 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1982.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Mop nodded, “Using our existing molecular construction technology, we can only produce material in the form of filaments, with a thickness roughly equivalent to one tenth of……Chief Scientist Sun has told me about this before.”



llama_print_timings:        load time =   620.08 ms
llama_print_timings:      sample time =    24.20 ms /    54 runs   (    0.45 ms per token,  2231.04 tokens per second)
llama_print_timings: prompt eval time =   620.06 ms /    76 tokens (    8.16 ms per token,   122.57 tokens per second)
llama_print_timings:        eval time =  4607.57 ms /    53 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  5341.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do you have enough of them?



llama_print_timings:        load time =   566.84 ms
llama_print_timings:      sample time =     3.58 ms /     8 runs   (    0.45 ms per token,  2232.77 tokens per second)
llama_print_timings: prompt eval time =   566.82 ms /    37 tokens (   15.32 ms per token,    65.28 tokens per second)
llama_print_timings:        eval time =   608.82 ms /     7 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  1191.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

How wide is the canal and how tall are the boats?



llama_print_timings:        load time =   576.16 ms
llama_print_timings:      sample time =     6.00 ms /    13 runs   (    0.46 ms per token,  2166.31 tokens per second)
llama_print_timings: prompt eval time =   576.12 ms /    38 tokens (   15.16 ms per token,    65.96 tokens per second)
llama_print_timings:        eval time =  1044.46 ms /    12 runs   (   87.04 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  1649.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The canal is narrowest at one hundred and fifty meters, with the ‘Judgment Day’ ship thirty-one meters long, with a water depth of around eight meters.



llama_print_timings:        load time =   606.93 ms
llama_print_timings:      sample time =    16.29 ms /    37 runs   (    0.44 ms per token,  2270.77 tokens per second)
llama_print_timings: prompt eval time =   606.89 ms /    54 tokens (   11.24 ms per token,    88.98 tokens per second)
llama_print_timings:        eval time =  3129.76 ms /    36 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  3812.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wang Sen looked at the cigar on the table and calculated roughly, “Basically it should be enough.”



llama_print_timings:        load time =   590.17 ms
llama_print_timings:      sample time =    10.77 ms /    24 runs   (    0.45 ms per token,  2228.21 tokens per second)
llama_print_timings: prompt eval time =   590.13 ms /    48 tokens (   12.29 ms per token,    81.34 tokens per second)
llama_print_timings:        eval time =  1942.64 ms /    23 runs   (   84.46 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  2584.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The long silence made the participants tried to recover from their shock.



llama_print_timings:        load time =   583.86 ms
llama_print_timings:      sample time =     6.26 ms /    14 runs   (    0.45 ms per token,  2236.06 tokens per second)
llama_print_timings: prompt eval time =   583.84 ms /    48 tokens (   12.16 ms per token,    82.22 tokens per second)
llama_print_timings:        eval time =  1105.02 ms /    13 runs   (   85.00 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  1717.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Q: If the storage device, such as hard drives and CD-ROMs, is also cut?



llama_print_timings:        load time =   579.82 ms
llama_print_timings:      sample time =     9.77 ms /    22 runs   (    0.44 ms per token,  2252.48 tokens per second)
llama_print_timings: prompt eval time =   579.77 ms /    49 tokens (   11.83 ms per token,    84.52 tokens per second)
llama_print_timings:        eval time =  1804.57 ms /    21 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2428.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Not very likely.”



llama_print_timings:        load time =   573.07 ms
llama_print_timings:      sample time =     2.68 ms /     6 runs   (    0.45 ms per token,  2240.48 tokens per second)
llama_print_timings: prompt eval time =   573.03 ms /    35 tokens (   16.37 ms per token,    61.08 tokens per second)
llama_print_timings:        eval time =   420.48 ms /     5 runs   (   84.10 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  1005.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It’s not a big deal to be sliced,” said an expert in computers. “That thin wire is extremely sharp, and it must cut cleanly; at this point the vast majority of the information in a hard disk or optical disc and even in an integrated circuit storage system can be recovered.”



llama_print_timings:        load time =   620.68 ms
llama_print_timings:      sample time =    28.96 ms /    64 runs   (    0.45 ms per token,  2209.56 tokens per second)
llama_print_timings: prompt eval time =   620.63 ms /    78 tokens (    7.96 ms per token,   125.68 tokens per second)
llama_print_timings:        eval time =  5369.19 ms /    63 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6122.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Is there any other possible solution?" asked Steven. No one responded and he went on, "Alright, let's focus on this proposal now and get to the details."



llama_print_timings:        load time =   613.15 ms
llama_print_timings:      sample time =    17.69 ms /    39 runs   (    0.45 ms per token,  2205.01 tokens per second)
llama_print_timings: prompt eval time =   613.10 ms /    65 tokens (    9.43 ms per token,   106.02 tokens per second)
llama_print_timings:        eval time =  3247.35 ms /    38 runs   (   85.46 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  3941.85 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He stood up and said, "I'm going to call the police officer back."



llama_print_timings:        load time =   589.08 ms
llama_print_timings:      sample time =     8.81 ms /    19 runs   (    0.46 ms per token,  2155.42 tokens per second)
llama_print_timings: prompt eval time =   589.03 ms /    47 tokens (   12.53 ms per token,    79.79 tokens per second)
llama_print_timings:        eval time =  1569.17 ms /    18 runs   (   87.18 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2199.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$Constant Thoughts waved him to take a seat, and then called out to Steven Strong, "Steven!" Steven came in with that familiar mocking smile and took two cigars on the table beside the canal, placing one into his mouth after lighting it up, and holding the other behind his back.



llama_print_timings:        load time =   649.64 ms
llama_print_timings:      sample time =    30.09 ms /    68 runs   (    0.44 ms per token,  2259.59 tokens per second)
llama_print_timings: prompt eval time =   649.60 ms /    89 tokens (    7.30 ms per token,   137.01 tokens per second)
llama_print_timings:        eval time =  5913.72 ms /    67 runs   (   88.26 ms per token,    11.33 tokens per second)
llama_print_timings:       total time =  6705.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Q: “When the trial day passed, those two pillars can withstand ‘flaming sword’? Won't they be cut first?”



llama_print_timings:        load time =   611.02 ms
llama_print_timings:      sample time =    14.48 ms /    32 runs   (    0.45 ms per token,  2209.18 tokens per second)
llama_print_timings: prompt eval time =   610.98 ms /    63 tokens (    9.70 ms per token,   103.11 tokens per second)
llama_print_timings:        eval time =  2657.11 ms /    31 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3337.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said, "This can be solved. There are some small pieces of 'flying-blade' material that can be used as pads to fasten the filaments onto a shaft."



llama_print_timings:        load time =   614.82 ms
llama_print_timings:      sample time =    20.21 ms /    42 runs   (    0.48 ms per token,  2078.49 tokens per second)
llama_print_timings: prompt eval time =   614.79 ms /    64 tokens (    9.61 ms per token,   104.10 tokens per second)
llama_print_timings:        eval time =  3414.11 ms /    41 runs   (   83.27 ms per token,    12.01 tokens per second)
llama_print_timings:       total time =  4125.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The following discussion was conducted among naval officers and marine experts.



llama_print_timings:        load time =   582.18 ms
llama_print_timings:      sample time =     6.18 ms /    14 runs   (    0.44 ms per token,  2264.64 tokens per second)
llama_print_timings: prompt eval time =   582.14 ms /    42 tokens (   13.86 ms per token,    72.15 tokens per second)
llama_print_timings:        eval time =  1126.56 ms /    13 runs   (   86.66 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1736.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The ‘Admiral William P. Mofat’ is the biggest tanker that can pass through the Panama Canal, with a deep hull so extra care has to be taken in installing nanostructured coatings underwater.



llama_print_timings:        load time =   623.46 ms
llama_print_timings:      sample time =    22.83 ms /    50 runs   (    0.46 ms per token,  2190.39 tokens per second)
llama_print_timings: prompt eval time =   623.41 ms /    66 tokens (    9.45 ms per token,   105.87 tokens per second)
llama_print_timings:        eval time =  4136.04 ms /    49 runs   (   84.41 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  4865.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The submerged part is more difficult, if time is not enough we can give up. There the engine, fuel and some ballast are located. Noise, vibration and disturbance are all very big. The environment is also poor, so it is not likely to have computer centers and similar institutions in that location.”



llama_print_timings:        load time =   662.35 ms
llama_print_timings:      sample time =    31.84 ms /    69 runs   (    0.46 ms per token,  2167.22 tokens per second)
llama_print_timings: prompt eval time =   662.31 ms /    99 tokens (    6.69 ms per token,   149.48 tokens per second)
llama_print_timings:        eval time =  5877.80 ms /    68 runs   (   86.44 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  6690.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The best one to move is the one among the three ship locks. ‘Judgment Day’ is a panamax ship (note: the name refers to 32 meters-wide ships designed to pass through the Panama Canal). The exact distance it moves when passing through the lock is just thirty-two meters, which is ideal for the operation of the ‘Swordblade’ silk. And if there are additional requirements for precision in terms of the distance and positioning, the panamax type ship is more suitable than others.”



llama_print_timings:        load time =   736.20 ms
llama_print_timings:      sample time =    52.85 ms /   115 runs   (    0.46 ms per token,  2176.01 tokens per second)
llama_print_timings: prompt eval time =   736.13 ms /   137 tokens (    5.37 ms per token,   186.11 tokens per second)
llama_print_timings:        eval time =  9915.85 ms /   114 runs   (   86.98 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 10901.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, the situation at the lock is complex. Four locomotives will be needed to pull the ship through, and it will definitely be the most cautious day aboard the Judgmenthip. The ship would surely be spotted while cutting through the water during that time.”



llama_print_timings:        load time =   638.56 ms
llama_print_timings:      sample time =    26.46 ms /    59 runs   (    0.45 ms per token,  2229.36 tokens per second)
llama_print_timings: prompt eval time =   638.51 ms /    78 tokens (    8.19 ms per token,   122.16 tokens per second)
llama_print_timings:        eval time =  4985.15 ms /    58 runs   (   85.95 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  5748.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is it possible to consider the Miraflores lock outside of American Bridge? The pillars of the bridge can be used as tensioning columns.”



llama_print_timings:        load time =   593.90 ms
llama_print_timings:      sample time =    15.02 ms /    32 runs   (    0.47 ms per token,  2131.20 tokens per second)
llama_print_timings: prompt eval time =   593.88 ms /    56 tokens (   10.60 ms per token,    94.30 tokens per second)
llama_print_timings:        eval time =  2655.05 ms /    31 runs   (   85.65 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  3319.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, the distance between the bridge piers is too wide. The material for ‘flying blades’ definitely isn't enough.”



llama_print_timings:        load time =   589.58 ms
llama_print_timings:      sample time =    13.39 ms /    30 runs   (    0.45 ms per token,  2240.81 tokens per second)
llama_print_timings: prompt eval time =   589.54 ms /    50 tokens (   11.79 ms per token,    84.81 tokens per second)
llama_print_timings:        eval time =  2481.76 ms /    29 runs   (   85.58 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3132.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Then we can confirm the position. It is the narrowest part of the Panama Canal, only about 150 meters wide. Let's say it's about 170 meters including column supports.



llama_print_timings:        load time =   645.47 ms
llama_print_timings:      sample time =    20.78 ms /    46 runs   (    0.45 ms per token,  2213.77 tokens per second)
llama_print_timings: prompt eval time =   645.42 ms /    84 tokens (    7.68 ms per token,   130.15 tokens per second)
llama_print_timings:        eval time =  3910.33 ms /    45 runs   (   86.90 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  4651.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“We should make the pitching distance at least 50 centimeters, no less, ” said Wang Sen. “The material is not enough.”



llama_print_timings:        load time =   595.54 ms
llama_print_timings:      sample time =    14.62 ms /    33 runs   (    0.44 ms per token,  2257.18 tokens per second)
llama_print_timings: prompt eval time =   595.50 ms /    55 tokens (   10.83 ms per token,    92.36 tokens per second)
llama_print_timings:        eval time =  2734.58 ms /    32 runs   (   85.46 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  3397.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That is to say, " said the Great Story, exhaling a cigar, "that we need an idea on how to get the ship through the canal in daylight.



llama_print_timings:        load time =   602.66 ms
llama_print_timings:      sample time =    17.03 ms /    38 runs   (    0.45 ms per token,  2231.62 tokens per second)
llama_print_timings: prompt eval time =   602.62 ms /    52 tokens (   11.59 ms per token,    86.29 tokens per second)
llama_print_timings:        eval time =  3189.35 ms /    37 runs   (   86.20 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  3873.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Why?



llama_print_timings:        load time =   582.82 ms
llama_print_timings:      sample time =     1.40 ms /     3 runs   (    0.47 ms per token,  2142.86 tokens per second)
llama_print_timings: prompt eval time =   582.77 ms /    33 tokens (   17.66 ms per token,    56.63 tokens per second)
llama_print_timings:        eval time =   174.42 ms /     2 runs   (   87.21 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =   765.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“People sleep on the ship at night, all lying down. The space of fifty centimeters is too big for them to stretch out their legs during the day either; they sit or kneel just as much as that’s enough.”



llama_print_timings:        load time =   616.68 ms
llama_print_timings:      sample time =    22.61 ms /    51 runs   (    0.44 ms per token,  2255.94 tokens per second)
llama_print_timings: prompt eval time =   616.64 ms /    61 tokens (   10.11 ms per token,    98.92 tokens per second)
llama_print_timings:        eval time =  4346.17 ms /    50 runs   (   86.92 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  5066.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A few sporadic laughs broke through the heavy pressure, and people felt a little bit of ease that was tinged with a hint of blood.



llama_print_timings:        load time =   582.57 ms
llama_print_timings:      sample time =    15.06 ms /    33 runs   (    0.46 ms per token,  2191.53 tokens per second)
llama_print_timings: prompt eval time =   582.53 ms /    51 tokens (   11.42 ms per token,    87.55 tokens per second)
llama_print_timings:        eval time =  2754.70 ms /    32 runs   (   86.08 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3410.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are a devil.” A UN female official told Dashi.



llama_print_timings:        load time =   585.20 ms
llama_print_timings:      sample time =     6.72 ms /    15 runs   (    0.45 ms per token,  2232.48 tokens per second)
llama_print_timings: prompt eval time =   585.15 ms /    45 tokens (   13.00 ms per token,    76.90 tokens per second)
llama_print_timings:        eval time =  1182.07 ms /    14 runs   (   84.43 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  1799.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Won't it harm innocent people?" Wang Sen asked, his voice trembled with a noticeable shaking.



llama_print_timings:        load time =   592.08 ms
llama_print_timings:      sample time =    13.19 ms /    27 runs   (    0.49 ms per token,  2046.69 tokens per second)
llama_print_timings: prompt eval time =   592.06 ms /    52 tokens (   11.39 ms per token,    87.83 tokens per second)
llama_print_timings:        eval time =  2240.86 ms /    26 runs   (   86.19 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2895.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A naval officer replied, "There are about 15 crew members who must be boarded with the ship to pass through the canal. However, after passing, they will immediately leave." A Panama pilot has to accompany the ship all the way along the eighty-two kilometers of canal and surely will have to sacrifice his life.



llama_print_timings:        load time =   636.38 ms
llama_print_timings:      sample time =    32.03 ms /    71 runs   (    0.45 ms per token,  2216.67 tokens per second)
llama_print_timings: prompt eval time =   636.33 ms /    80 tokens (    7.95 ms per token,   125.72 tokens per second)
llama_print_timings:        eval time =  6060.84 ms /    70 runs   (   86.58 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  6848.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The CIA officer said, "There are still a portion of the crew members on trial ship who may not know what this vessel is for."



llama_print_timings:        load time =   599.30 ms
llama_print_timings:      sample time =    13.54 ms /    30 runs   (    0.45 ms per token,  2215.66 tokens per second)
llama_print_timings: prompt eval time =   599.27 ms /    59 tokens (   10.16 ms per token,    98.45 tokens per second)
llama_print_timings:        eval time =  2449.59 ms /    29 runs   (   84.47 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  3111.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Professor, we don’t need to think about these now. This information is relevant to the survival of humanity and who will be making the final decision.” Constant said.



llama_print_timings:        load time =   630.44 ms
llama_print_timings:      sample time =    17.75 ms /    40 runs   (    0.44 ms per token,  2252.89 tokens per second)
llama_print_timings: prompt eval time =   630.39 ms /    67 tokens (    9.41 ms per token,   106.28 tokens per second)
llama_print_timings:        eval time =  3257.69 ms /    39 runs   (   83.53 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  3969.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the meeting, Colonel Stanton pushed the exquisite wooden box of Cuban cigars to Captain Shiqiang in front of him.



llama_print_timings:        load time =   611.51 ms
llama_print_timings:      sample time =    14.29 ms /    31 runs   (    0.46 ms per token,  2169.96 tokens per second)
llama_print_timings: prompt eval time =   611.47 ms /    65 tokens (    9.41 ms per token,   106.30 tokens per second)
llama_print_timings:        eval time =  2622.99 ms /    30 runs   (   87.43 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  3299.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Four days later, the Panama Canal.



llama_print_timings:        load time =   585.21 ms
llama_print_timings:      sample time =     5.01 ms /    11 runs   (    0.46 ms per token,  2197.36 tokens per second)
llama_print_timings: prompt eval time =   585.17 ms /    42 tokens (   13.93 ms per token,    71.77 tokens per second)
llama_print_timings:        eval time =   905.36 ms /    10 runs   (   90.54 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =  1513.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As soon as he got off the plane, Wang Sen felt no sense of being in a foreign country. He knew that to the west was the beautiful Lake Gatun and to the east were the magnificent Panama Canal and the Panamanian city but he had no chance to see them because two days before flying directly from China to Tocumen Military Airport near Panama City, then taking a helicopter here. The scenery in front of him was quite ordinary, as the widening of the canal brought about the exposure of huge swathes of tropical rainforest on both banks with yellow soil appearing on the slopes. The waters of the canal also looked quite ordinary because it seemed to be rather narrow here. This canal was dug by over a million men in the early 20th century.



llama_print_timings:        load time =   803.01 ms
llama_print_timings:      sample time =    76.56 ms /   171 runs   (    0.45 ms per token,  2233.45 tokens per second)
llama_print_timings: prompt eval time =   802.96 ms /   180 tokens (    4.46 ms per token,   224.17 tokens per second)
llama_print_timings:        eval time = 14712.55 ms /   170 runs   (   86.54 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 15877.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sir John and Major Wang were sitting on a bench in the semi-hill. Both of them were wearing loose white shirts with large straw hats, and they appeared to be just two ordinary tourists. From this position, the canal below could be seen entirely.



llama_print_timings:        load time =   625.58 ms
llama_print_timings:      sample time =    26.49 ms /    60 runs   (    0.44 ms per token,  2265.26 tokens per second)
llama_print_timings: prompt eval time =   625.54 ms /    80 tokens (    7.82 ms per token,   127.89 tokens per second)
llama_print_timings:        eval time =  4900.36 ms /    59 runs   (   83.06 ms per token,    12.04 tokens per second)
llama_print_timings:       total time =  5649.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They are both made of nanometer-scale steel fibers. The nanofibers on the right bank connect to ordinary steel cables, while those on the left bank remain loose to allow other vessels to pass. When the last ship from "The judgment day" moves past, the two steel beams will be pulled into an upright position by pulling back the nanofiber strands that connect them to each other. The action is called "Guitar", which is a natural derivation of the shape of the strand-pulling apparatus and the net of nanofibers.



llama_print_timings:        load time =   901.96 ms
llama_print_timings:      sample time =    55.45 ms /   124 runs   (    0.45 ms per token,  2236.13 tokens per second)
llama_print_timings: prompt eval time =   901.93 ms /   230 tokens (    3.92 ms per token,   255.01 tokens per second)
llama_print_timings:        eval time = 10721.90 ms /   123 runs   (   87.17 ms per token,    11.47 tokens per second)
llama_print_timings:       total time = 11888.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

An hour ago, the "Judgment Day" train from Gatineau Lake crossed the Gatineau Canal.



llama_print_timings:        load time =   593.11 ms
llama_print_timings:      sample time =    12.12 ms /    27 runs   (    0.45 ms per token,  2227.54 tokens per second)
llama_print_timings: prompt eval time =   593.06 ms /    52 tokens (   11.41 ms per token,    87.68 tokens per second)
llama_print_timings:        eval time =  2161.61 ms /    26 runs   (   83.14 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =  2809.82 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Steve asks Wang Sun whether he has ever been to Panama before.



llama_print_timings:        load time =   586.96 ms
llama_print_timings:      sample time =     7.44 ms /    16 runs   (    0.46 ms per token,  2150.83 tokens per second)
llama_print_timings: prompt eval time =   586.91 ms /    47 tokens (   12.49 ms per token,    80.08 tokens per second)
llama_print_timings:        eval time =  1308.16 ms /    15 runs   (   87.21 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  1930.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I have been here since 1999.” the captain said.



llama_print_timings:        load time =   580.82 ms
llama_print_timings:      sample time =     7.61 ms /    17 runs   (    0.45 ms per token,  2232.73 tokens per second)
llama_print_timings: prompt eval time =   580.77 ms /    43 tokens (   13.51 ms per token,    74.04 tokens per second)
llama_print_timings:        eval time =  1358.24 ms /    16 runs   (   84.89 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1974.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That war?



llama_print_timings:        load time =   577.38 ms
llama_print_timings:      sample time =     1.75 ms /     4 runs   (    0.44 ms per token,  2281.80 tokens per second)
llama_print_timings: prompt eval time =   577.34 ms /    37 tokens (   15.60 ms per token,    64.09 tokens per second)
llama_print_timings:        eval time =   246.13 ms /     3 runs   (   82.04 ms per token,    12.19 tokens per second)
llama_print_timings:       total time =   831.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, but it was the least impressive war I have ever fought. All I remember is playing Michael Jackson's rock ballad 'Escapology' for the trapped Nole Garcia President at the Vatican embassy, which was my suggestion.”



llama_print_timings:        load time =   626.19 ms
llama_print_timings:      sample time =    25.71 ms /    58 runs   (    0.44 ms per token,  2256.19 tokens per second)
llama_print_timings: prompt eval time =   626.15 ms /    75 tokens (    8.35 ms per token,   119.78 tokens per second)
llama_print_timings:        eval time =  4949.34 ms /    57 runs   (   86.83 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  5694.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A white French cruise ship slowly moves through the canal below, and several dressed in brightly colored clothes tourists are meandering on its green-carpeted decks.



llama_print_timings:        load time =   630.16 ms
llama_print_timings:      sample time =    16.82 ms /    38 runs   (    0.44 ms per token,  2259.48 tokens per second)
llama_print_timings: prompt eval time =   630.11 ms /    70 tokens (    9.00 ms per token,   111.09 tokens per second)
llama_print_timings:        eval time =  3231.65 ms /    37 runs   (   87.34 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  3939.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The second lookout reports, there are no ships ahead.” Stanford’s walkie-talkie began to ring.



llama_print_timings:        load time =   600.64 ms
llama_print_timings:      sample time =    13.03 ms /    28 runs   (    0.47 ms per token,  2148.39 tokens per second)
llama_print_timings: prompt eval time =   600.60 ms /    54 tokens (   11.12 ms per token,    89.91 tokens per second)
llama_print_timings:        eval time =  2303.97 ms /    27 runs   (   85.33 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  2966.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Put the ‘guitar’ up.” Stanford ordered.



llama_print_timings:        load time =   578.41 ms
llama_print_timings:      sample time =     6.31 ms /    14 runs   (    0.45 ms per token,  2220.11 tokens per second)
llama_print_timings: prompt eval time =   578.38 ms /    43 tokens (   13.45 ms per token,    74.35 tokens per second)
llama_print_timings:        eval time =  1140.76 ms /    13 runs   (   87.75 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  1748.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Three men in hard hats stood on the left bank. Wang Sen stood up but Colonel Liang held him back, "Professor, you don't have to worry about it. They'll do just fine." Wang Sen watched as the man on the right bank smoothly withdrew a thread of steel from the nanotube and tied it to the steel column. Then both men simultaneously pulled several lengths of steel cable and set up two steel columns. To camouflage, both steel columns were hung with some buoys and tidemarks. They worked very calmly, even looking a little leisurely as if they were doing something boring. Wang Sen stared at the space between the two steel columns, which looked empty, but Death's Piano was already in place.



llama_print_timings:        load time =   817.73 ms
llama_print_timings:      sample time =    75.20 ms /   168 runs   (    0.45 ms per token,  2234.01 tokens per second)
llama_print_timings: prompt eval time =   817.68 ms /   186 tokens (    4.40 ms per token,   227.47 tokens per second)
llama_print_timings:        eval time = 14479.00 ms /   167 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 15654.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The voice of the walkie-talkie said, “Target within four kilometers.”



llama_print_timings:        load time =   583.41 ms
llama_print_timings:      sample time =     9.46 ms /    20 runs   (    0.47 ms per token,  2115.28 tokens per second)
llama_print_timings: prompt eval time =   583.37 ms /    44 tokens (   13.26 ms per token,    75.42 tokens per second)
llama_print_timings:        eval time =  1564.36 ms /    19 runs   (   82.33 ms per token,    12.15 tokens per second)
llama_print_timings:       total time =  2190.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Stanton put down the walkie-talkie and continued their conversation. “I came to Panama for the second time in 1999, attended the ceremony of cessioning canal authority to the Panamanian government, which was quite interesting. When we arrived at the Canal Administration building, I saw the American flag had already been taken down, according to a request from the US government to avoid an embarrassing scene in public. At that time, I thought I was witnessing a historic moment… But now I see how trivial these things are.”



llama_print_timings:        load time =   706.75 ms
llama_print_timings:      sample time =    53.23 ms /   117 runs   (    0.45 ms per token,  2197.93 tokens per second)
llama_print_timings: prompt eval time =   706.71 ms /   124 tokens (    5.70 ms per token,   175.46 tokens per second)
llama_print_timings:        eval time = 10031.52 ms /   116 runs   (   86.48 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 10988.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The target is three kilometers away!”



llama_print_timings:        load time =   573.51 ms
llama_print_timings:      sample time =     4.50 ms /    10 runs   (    0.45 ms per token,  2221.73 tokens per second)
llama_print_timings: prompt eval time =   573.48 ms /    37 tokens (   15.50 ms per token,    64.52 tokens per second)
llama_print_timings:        eval time =   764.66 ms /     9 runs   (   84.96 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1358.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, insignificant.” Wang Miao echoed. He had no idea what Stanford was saying, and the rest of the world did not exist for him; his entire attention had already been devoted to “the Day of Judgment”. At that moment, the sun rising from the eastern part of the Pacific Ocean was sinking into the western part, its golden rays reflecting brightly in the canal below, and on either side of it were two steel columns like a dark age of time standing still, reflecting no light at all.



llama_print_timings:        load time =   766.50 ms
llama_print_timings:      sample time =    51.95 ms /   114 runs   (    0.46 ms per token,  2194.63 tokens per second)
llama_print_timings: prompt eval time =   766.46 ms /   148 tokens (    5.18 ms per token,   193.10 tokens per second)
llama_print_timings:        eval time =  9904.98 ms /   113 runs   (   87.65 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 10916.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The target is two kilometers away!”



llama_print_timings:        load time =   568.84 ms
llama_print_timings:      sample time =     4.46 ms /    10 runs   (    0.45 ms per token,  2241.65 tokens per second)
llama_print_timings: prompt eval time =   568.80 ms /    37 tokens (   15.37 ms per token,    65.05 tokens per second)
llama_print_timings:        eval time =   821.95 ms /     9 runs   (   91.33 ms per token,    10.95 tokens per second)
llama_print_timings:       total time =  1411.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Stanton didn't seem to hear the beeper, and continued on with his ranting. "Since I found out that an alien fleet is heading towards Earth, I've been suffering from amnesia," he said. It was strange that I couldn't remember the past events I went through in those wars. I mean, all of those wars were so trivial. After knowing about it, everyone will become a new person in spirit and the world would become a new world. I have been wondering if humanity would be in an entirely different state if people knew that an alien fleet was en route to arrive thousands of years later. Professor, can you imagine what that might look like?"



llama_print_timings:        load time =   777.97 ms
llama_print_timings:      sample time =    65.19 ms /   145 runs   (    0.45 ms per token,  2224.30 tokens per second)
llama_print_timings: prompt eval time =   777.93 ms /   161 tokens (    4.83 ms per token,   206.96 tokens per second)
llama_print_timings:        eval time = 12649.59 ms /   144 runs   (   87.84 ms per token,    11.38 tokens per second)
llama_print_timings:       total time = 13735.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Oh, can't...” Wang Sen was flipping through the lines mechanically.



llama_print_timings:        load time =   579.51 ms
llama_print_timings:      sample time =     9.28 ms /    21 runs   (    0.44 ms per token,  2263.66 tokens per second)
llama_print_timings: prompt eval time =   579.47 ms /    46 tokens (   12.60 ms per token,    79.38 tokens per second)
llama_print_timings:        eval time =  1759.90 ms /    20 runs   (   88.00 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  2383.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The target is five kilometers away!



llama_print_timings:        load time =   578.21 ms
llama_print_timings:      sample time =     4.06 ms /     9 runs   (    0.45 ms per token,  2218.39 tokens per second)
llama_print_timings: prompt eval time =   578.17 ms /    38 tokens (   15.22 ms per token,    65.72 tokens per second)
llama_print_timings:        eval time =   712.70 ms /     8 runs   (   89.09 ms per token,    11.22 tokens per second)
llama_print_timings:       total time =  1309.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I think you will become the new Garett, who is a famous engineer in history. We are looking forward to your "Panama Canal" building as well...isn't it? The space elevator is actually a canal connecting earth and space like the Panama Canal did, just that we connect the earth and the space with this space elevator instead of connecting two oceans...



llama_print_timings:        load time =   677.31 ms
llama_print_timings:      sample time =    37.30 ms /    82 runs   (    0.45 ms per token,  2198.39 tokens per second)
llama_print_timings: prompt eval time =   677.27 ms /   109 tokens (    6.21 ms per token,   160.94 tokens per second)
llama_print_timings:        eval time =  6931.37 ms /    81 runs   (   85.57 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  7782.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Now that he knows what the captain is doing, Wang Sen was very grateful, but it didn't help much.



llama_print_timings:        load time =   608.44 ms
llama_print_timings:      sample time =    11.47 ms /    25 runs   (    0.46 ms per token,  2179.79 tokens per second)
llama_print_timings: prompt eval time =   608.41 ms /    61 tokens (    9.97 ms per token,   100.26 tokens per second)
llama_print_timings:        eval time =  2081.05 ms /    24 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  2743.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The target is one kilometer away!



llama_print_timings:        load time =   578.90 ms
llama_print_timings:      sample time =     3.53 ms /     8 runs   (    0.44 ms per token,  2268.86 tokens per second)
llama_print_timings: prompt eval time =   578.86 ms /    37 tokens (   15.64 ms per token,    63.92 tokens per second)
llama_print_timings:        eval time =   600.64 ms /     7 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  1196.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The “Judgment Day” ship appeared, casting a black silhouette against the setting sun. Despite knowing that the canal could accommodate seven-thousand ton ships, it was still weird to see a six-million metric tons vessel in such a narrow waterway. Its enormity compared to the river below made it feel more like a mountain moving across land than a ship sailing on the water. When he got used to the sun’s light, Wangsen saw that the “Judgment Day” ship was black and its upper levels were white, and the giant antenna had disappeared.”



llama_print_timings:        load time =   872.29 ms
llama_print_timings:      sample time =    57.48 ms /   126 runs   (    0.46 ms per token,  2192.26 tokens per second)
llama_print_timings: prompt eval time =   872.25 ms /   220 tokens (    3.96 ms per token,   252.22 tokens per second)
llama_print_timings:        eval time = 10940.16 ms /   125 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 12085.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the distance between the "Judgment Day" and Death's Instrument shortened, Wang Sen's heartbeat rapidly accelerated, his breathing also became shallow. He had a strong urge to run away instantly, but his body felt weak and couldn't be controlled. His heart suddenly surged with a hatred of Shi Qiang, who was an evil bastard! But this feeling disappeared as quickly as it came. If Shiqiang were here with him, he would not have been in such a mess. Colonel Stanford had requested that Shiqiang come and was refused by General Caiwei. The position needed him more now. Wang Sen felt the colonel's hand on his shoulder.



llama_print_timings:        load time =   792.71 ms
llama_print_timings:      sample time =    69.93 ms /   156 runs   (    0.45 ms per token,  2230.90 tokens per second)
llama_print_timings: prompt eval time =   792.66 ms /   170 tokens (    4.66 ms per token,   214.47 tokens per second)
llama_print_timings:        eval time = 13332.92 ms /   155 runs   (   86.02 ms per token,    11.63 tokens per second)
llama_print_timings:       total time = 14457.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Professor, everything will pass.



llama_print_timings:        load time =   578.79 ms
llama_print_timings:      sample time =     4.06 ms /     9 runs   (    0.45 ms per token,  2214.57 tokens per second)
llama_print_timings: prompt eval time =   578.77 ms /    37 tokens (   15.64 ms per token,    63.93 tokens per second)
llama_print_timings:        eval time =   668.73 ms /     8 runs   (   83.59 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  1266.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The trial date was drawing to a close, and the death march. As its bow contacted the two steel girders between which seemed to be empty flat plane when his scalp tensed up, but nothing happened. The gigantic vessel slid from the girders as if they did not exist at all when it was halfway through the passage. However, a small sign dispelled his doubts. He noticed that one of the finest antennas on top of the building fell off and rolled down.



llama_print_timings:        load time =   760.19 ms
llama_print_timings:      sample time =    47.40 ms /   106 runs   (    0.45 ms per token,  2236.29 tokens per second)
llama_print_timings: prompt eval time =   760.15 ms /   150 tokens (    5.07 ms per token,   197.33 tokens per second)
llama_print_timings:        eval time =  9110.22 ms /   105 runs   (   86.76 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 10094.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   842.55 ms
llama_print_timings:      sample time =     0.92 ms /     2 runs   (    0.46 ms per token,  2166.85 tokens per second)
llama_print_timings: prompt eval time =   842.50 ms /   202 tokens (    4.17 ms per token,   239.76 tokens per second)
llama_print_timings:        eval time =    87.48 ms /     1 runs   (   87.48 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =   933.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As the ship passed through two steel columns, “The Judgement Day” continued to move forward at its constant speed. At first glance, it did not appear any oddities but then Wang Mori heard that the sound of the engine changed and was replaced with a chaotic loud noise like a torque converter inside an engine being thrown in with many spanners. He knew that this was due to a rotating component of the engine being cut away by a large metal object. A sudden burst of noise followed, and Wang Mori saw that one side of "The Judgement Day" had a hole created by a large metal object flying out of it. The flying object immediately fell into the water and caused high sprays of water as it passed. During this time, Wang Mori noticed that it was a section of the engine's crankshaft.



llama_print_timings:        load time =   829.33 ms
llama_print_timings:      sample time =    80.12 ms /   179 runs   (    0.45 ms per token,  2234.23 tokens per second)
llama_print_timings: prompt eval time =   829.29 ms /   196 tokens (    4.23 ms per token,   236.35 tokens per second)
llama_print_timings:        eval time = 15543.09 ms /   178 runs   (   87.32 ms per token,    11.45 tokens per second)
llama_print_timings:       total time = 16749.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A thick column of smoke was surging out from the hole in the ship, and then quickly crossing the river. Soon after, "Judgment Day" crossed to the left shore, the head of the huge ship crumpled and the soil hill were washed off like water, causing a wave of soil that spread across the area. At the same time, the pieces of the ship began to slide from each other making an irritating noise which lasted for a while before disappearing. Finally, the pieces all collapsed into a soft mass on the shore as if they were soft cloths.



llama_print_timings:        load time =  1036.11 ms
llama_print_timings:      sample time =    56.36 ms /   126 runs   (    0.45 ms per token,  2235.71 tokens per second)
llama_print_timings: prompt eval time =  1036.06 ms /   288 tokens (    3.60 ms per token,   277.98 tokens per second)
llama_print_timings:        eval time = 10922.94 ms /   125 runs   (   87.38 ms per token,    11.44 tokens per second)
llama_print_timings:       total time = 12226.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a huge number of soldiers began to charge down the hill, Wang Moxi was surprised by the fact that there were so many people hidden near him. The helicopter swarm roared overhead along the canal and landed on top of "The Judgment Day" wreckage. They released large amounts of white extinguishing agents and foam, controlling the raging fire on board quickly. Three other helicopters then dropped search personnel down onto the debris using these ropes.



llama_print_timings:        load time =   732.63 ms
llama_print_timings:      sample time =    48.09 ms /   107 runs   (    0.45 ms per token,  2225.18 tokens per second)
llama_print_timings: prompt eval time =   732.60 ms /   134 tokens (    5.47 ms per token,   182.91 tokens per second)
llama_print_timings:        eval time =  9192.02 ms /   106 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 10149.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Major Stanford has left, and Wang Moxue picked up the telescope on his straw hat. Overcoming her shaking hands, she observed “Judgment Day” by cut with ‘Feather Blades’. At this time, it had been covered by foam and extinguishing powder for about half of it, but still some parts were exposed. Wang Moxue saw the surface that was as smooth as a mirror, and it perfectly reflected the bright red sky at sunrise. She also saw a deep red spot on the mirror face, which might be blood.



llama_print_timings:        load time =   734.30 ms
llama_print_timings:      sample time =    56.96 ms /   126 runs   (    0.45 ms per token,  2211.96 tokens per second)
llama_print_timings: prompt eval time =   734.25 ms /   136 tokens (    5.40 ms per token,   185.22 tokens per second)
llama_print_timings:        eval time = 10542.45 ms /   125 runs   (   84.34 ms per token,    11.86 tokens per second)
llama_print_timings:       total time = 11544.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   575.38 ms
llama_print_timings:      sample time =     3.08 ms /     7 runs   (    0.44 ms per token,  2275.68 tokens per second)
llama_print_timings: prompt eval time =   575.34 ms /    45 tokens (   12.79 ms per token,    78.21 tokens per second)
llama_print_timings:        eval time =   509.92 ms /     6 runs   (   84.99 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1099.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In three days.



llama_print_timings:        load time =   573.46 ms
llama_print_timings:      sample time =     2.29 ms /     5 runs   (    0.46 ms per token,  2180.55 tokens per second)
llama_print_timings: prompt eval time =   573.42 ms /    32 tokens (   17.92 ms per token,    55.81 tokens per second)
llama_print_timings:        eval time =   335.59 ms /     4 runs   (   83.90 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =   919.00 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Questioner: Do you know the Three-Body Civilization?



llama_print_timings:        load time =   579.87 ms
llama_print_timings:      sample time =     6.64 ms /    14 runs   (    0.47 ms per token,  2107.80 tokens per second)
llama_print_timings: prompt eval time =   579.83 ms /    39 tokens (   14.87 ms per token,    67.26 tokens per second)
llama_print_timings:        eval time =  1122.42 ms /    13 runs   (   86.34 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  1733.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$YWJ: I don't know. We have very limited information, and the true and detailed appearance of the Three Body Civilization, other than those who intercept the three civilizations such as Dr. Evans in the Downfall Party, is unknown to anyone.



llama_print_timings:        load time =   614.57 ms
llama_print_timings:      sample time =    25.33 ms /    56 runs   (    0.45 ms per token,  2210.73 tokens per second)
llama_print_timings: prompt eval time =   614.53 ms /    68 tokens (    9.04 ms per token,   110.65 tokens per second)
llama_print_timings:        eval time =  4763.61 ms /    55 runs   (   86.61 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  5500.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I have that expectation because I believe that technology can bring about positive change in the world. 



llama_print_timings:        load time =   626.79 ms
llama_print_timings:      sample time =     8.92 ms /    20 runs   (    0.45 ms per token,  2243.16 tokens per second)
llama_print_timings: prompt eval time =   626.76 ms /    52 tokens (   12.05 ms per token,    82.97 tokens per second)
llama_print_timings:        eval time =  1605.92 ms /    19 runs   (   84.52 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  2274.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If they can travel across the galaxy to our world, it shows that their science has developed at a considerable level. A society with such an advanced science must also possess higher levels of civilization and ethics.



llama_print_timings:        load time =   618.74 ms
llama_print_timings:      sample time =    20.89 ms /    44 runs   (    0.47 ms per token,  2106.27 tokens per second)
llama_print_timings: prompt eval time =   618.70 ms /    66 tokens (    9.37 ms per token,   106.68 tokens per second)
llama_print_timings:        eval time =  3715.39 ms /    43 runs   (   86.40 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  4435.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do you think this conclusion is scientific?



llama_print_timings:        load time =   586.01 ms
llama_print_timings:      sample time =     4.33 ms /     9 runs   (    0.48 ms per token,  2079.96 tokens per second)
llama_print_timings: prompt eval time =   585.97 ms /    41 tokens (   14.29 ms per token,    69.97 tokens per second)
llama_print_timings:        eval time =   670.58 ms /     8 runs   (   83.82 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  1276.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

…



llama_print_timings:        load time =   574.96 ms
llama_print_timings:      sample time =     0.91 ms /     2 runs   (    0.45 ms per token,  2205.07 tokens per second)
llama_print_timings: prompt eval time =   574.92 ms /    35 tokens (   16.43 ms per token,    60.88 tokens per second)
llama_print_timings:        eval time =    82.73 ms /     1 runs   (   82.73 ms per token,    12.09 tokens per second)
llama_print_timings:       total time =   663.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

**Questioner:** Let me guess that your father was greatly influenced by the scientific nationalism of his own father, and you were also deeply influenced by him.



llama_print_timings:        load time =   604.93 ms
llama_print_timings:      sample time =    15.70 ms /    34 runs   (    0.46 ms per token,  2166.29 tokens per second)
llama_print_timings: prompt eval time =   604.88 ms /    57 tokens (   10.61 ms per token,    94.23 tokens per second)
llama_print_timings:        eval time =  2838.45 ms /    33 runs   (   86.01 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  3520.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Leo Ye (sighing softly): I don't know.



llama_print_timings:        load time =   595.92 ms
llama_print_timings:      sample time =     7.58 ms /    17 runs   (    0.45 ms per token,  2242.45 tokens per second)
llama_print_timings: prompt eval time =   595.88 ms /    44 tokens (   13.54 ms per token,    73.84 tokens per second)
llama_print_timings:        eval time =  1372.22 ms /    16 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  2004.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Revealing to you that we have obtained all the information about Three Body from the intercepted messages.



llama_print_timings:        load time =   610.44 ms
llama_print_timings:      sample time =    10.52 ms /    22 runs   (    0.48 ms per token,  2092.05 tokens per second)
llama_print_timings: prompt eval time =   610.39 ms /    49 tokens (   12.46 ms per token,    80.28 tokens per second)
llama_print_timings:        eval time =  1832.97 ms /    21 runs   (   87.28 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  2493.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lee Wenjie: Oh... Evans how is he?



llama_print_timings:        load time =   582.38 ms
llama_print_timings:      sample time =     6.74 ms /    15 runs   (    0.45 ms per token,  2224.20 tokens per second)
llama_print_timings: prompt eval time =   582.34 ms /    42 tokens (   13.87 ms per token,    72.12 tokens per second)
llama_print_timings:        eval time =  1227.43 ms /    14 runs   (   87.67 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =  1841.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the process of taking action on "Judgment Day" number, he died.



llama_print_timings:        load time =   578.30 ms
llama_print_timings:      sample time =     8.50 ms /    19 runs   (    0.45 ms per token,  2235.03 tokens per second)
llama_print_timings: prompt eval time =   578.26 ms /    47 tokens (   12.30 ms per token,    81.28 tokens per second)
llama_print_timings:        eval time =  1524.63 ms /    18 runs   (   84.70 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2142.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the center of command and control room, Yvins was cut into three segments. His topmost segment crawled forward a meter or so, eyes fixed upon the direction that contained the intercepted message from alien civilization. It was exactly in one of those computers which had been hijacked by "Sword Wings."



llama_print_timings:        load time =   661.65 ms
llama_print_timings:      sample time =    30.84 ms /    69 runs   (    0.45 ms per token,  2237.64 tokens per second)
llama_print_timings: prompt eval time =   661.61 ms /    95 tokens (    6.96 ms per token,   143.59 tokens per second)
llama_print_timings:        eval time =  5784.16 ms /    68 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  6591.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$$Yan Jie: Is there a lot of information?$$



llama_print_timings:        load time =   578.35 ms
llama_print_timings:      sample time =     6.60 ms /    15 runs   (    0.44 ms per token,  2273.76 tokens per second)
llama_print_timings: prompt eval time =   578.31 ms /    37 tokens (   15.63 ms per token,    63.98 tokens per second)
llama_print_timings:        eval time =  1216.00 ms /    14 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1824.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A lot, about 28GB.



llama_print_timings:        load time =   580.97 ms
llama_print_timings:      sample time =     4.42 ms /    10 runs   (    0.44 ms per token,  2262.44 tokens per second)
llama_print_timings: prompt eval time =   580.93 ms /    40 tokens (   14.52 ms per token,    68.86 tokens per second)
llama_print_timings:        eval time =   785.53 ms /     9 runs   (   87.28 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1386.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Lei Wenjie: This is impossible. The efficiency of interstellar super-long distance communication is very low, how can it send such a large amount of information?!



llama_print_timings:        load time =   594.69 ms
llama_print_timings:      sample time =    17.91 ms /    39 runs   (    0.46 ms per token,  2178.04 tokens per second)
llama_print_timings: prompt eval time =   594.67 ms /    52 tokens (   11.44 ms per token,    87.44 tokens per second)
llama_print_timings:        eval time =  3249.63 ms /    38 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  3927.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sure, I'll read some of it. But first, what exactly is the "information" you want me to read?



llama_print_timings:        load time =   620.60 ms
llama_print_timings:      sample time =    12.33 ms /    28 runs   (    0.44 ms per token,  2271.62 tokens per second)
llama_print_timings: prompt eval time =   620.57 ms /    76 tokens (    8.17 ms per token,   122.47 tokens per second)
llama_print_timings:        eval time =  2334.82 ms /    27 runs   (   86.47 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3013.21 ms


translated 126.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The Three-Body series does not contain any descriptions of the alien species' physical appearance, and humans will not see them for over 400 years. During this reading period, Ye Wenjie is unable to picture their physical appearance other than that of human beings.



llama_print_timings:        load time =   621.06 ms
llama_print_timings:      sample time =    26.68 ms /    60 runs   (    0.44 ms per token,  2248.96 tokens per second)
llama_print_timings: prompt eval time =   621.03 ms /    73 tokens (    8.51 ms per token,   117.55 tokens per second)
llama_print_timings:        eval time =  4994.71 ms /    59 runs   (   84.66 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  5731.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   575.94 ms
llama_print_timings:      sample time =     3.11 ms /     7 runs   (    0.44 ms per token,  2249.36 tokens per second)
llama_print_timings: prompt eval time =   575.89 ms /    45 tokens (   12.80 ms per token,    78.14 tokens per second)
llama_print_timings:        eval time =   519.97 ms /     6 runs   (   86.66 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1109.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The $1379 Listening Station has existed for thousands of years, like thousands of such listening stations in the Three Body universe. They listen intently to information about potential intelligent civilizations in the universe.



llama_print_timings:        load time =   626.07 ms
llama_print_timings:      sample time =    20.59 ms /    45 runs   (    0.46 ms per token,  2185.63 tokens per second)
llama_print_timings: prompt eval time =   626.02 ms /    72 tokens (    8.69 ms per token,   115.01 tokens per second)
llama_print_timings:        eval time =  3803.56 ms /    44 runs   (   86.44 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  4518.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The original listening station had hundreds of listeners, but with the advancement of technology, now only one person is on duty. Listeners are a humble profession, and although they reside in a constant-temperature environment that guarantees adequate supplies of food and water, their lives also fade away within this small space, their joy being less than that of others who have a broader range of choices.



llama_print_timings:        load time =   684.82 ms
llama_print_timings:      sample time =    39.11 ms /    87 runs   (    0.45 ms per token,  2224.67 tokens per second)
llama_print_timings: prompt eval time =   684.77 ms /   110 tokens (    6.23 ms per token,   160.64 tokens per second)
llama_print_timings:        eval time =  7316.00 ms /    86 runs   (   85.07 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  8173.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The $1379th Listener watched the outside of the three-dimensional world, which was in darkness. Most people were in their winter sleep and even plants were feeling desiccation due to a lack of water, forming an attached fiber without life on the ground like a piece of cold metal. Under the stars, the earth looked like a huge mass of cold metal.



llama_print_timings:        load time =   694.42 ms
llama_print_timings:      sample time =    35.62 ms /    79 runs   (    0.45 ms per token,  2218.10 tokens per second)
llama_print_timings: prompt eval time =   694.34 ms /   111 tokens (    6.26 ms per token,   159.86 tokens per second)
llama_print_timings:        eval time =  6695.14 ms /    78 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  7546.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is the loneliest moment, in a quiet midnight, when the universe reveals its desolation to its listeners. The 1379th Listener would rather not see the curvilinear motion of that bar on his monitor screen, which represents the waveform received by the listening system from the cosmos. He feels this bar is the abstraction of the universe, connecting infinity with infinity: one end connects to infinite past and another end to unlimited future. Each wave peak is like a grain of sand in the desert, and the whole line is like all these grains arranged into lines in a one-dimensional desert. It's desolate and intolerable. You can walk forward or backward infinitely far along it, but never find the end.



llama_print_timings:        load time =   828.14 ms
llama_print_timings:      sample time =    73.90 ms /   165 runs   (    0.45 ms per token,  2232.66 tokens per second)
llama_print_timings: prompt eval time =   828.09 ms /   192 tokens (    4.31 ms per token,   231.86 tokens per second)
llama_print_timings:        eval time = 14502.94 ms /   164 runs   (   88.43 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 15661.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The alien civilization is coming.



llama_print_timings:        load time =   939.42 ms
llama_print_timings:      sample time =     4.04 ms /     9 runs   (    0.45 ms per token,  2226.07 tokens per second)
llama_print_timings: prompt eval time =   939.38 ms /   246 tokens (    3.82 ms per token,   261.87 tokens per second)
llama_print_timings:        eval time =   717.51 ms /     8 runs   (   89.69 ms per token,    11.15 tokens per second)
llama_print_timings:       total time =  1674.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Wish the world a beautiful wish.



llama_print_timings:        load time =   571.17 ms
llama_print_timings:      sample time =     4.16 ms /     9 runs   (    0.46 ms per token,  2163.98 tokens per second)
llama_print_timings: prompt eval time =   571.13 ms /    39 tokens (   14.64 ms per token,    68.29 tokens per second)
llama_print_timings:        eval time =   696.19 ms /     8 runs   (   87.02 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  1285.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Through the information above, you will have a basic understanding of Earth civilization. Humans have created magnificent civilizations through long hard work and creativity, resulting in a rich variety of cultures. We treasure this very much.



llama_print_timings:        load time =   639.93 ms
llama_print_timings:      sample time =    22.52 ms /    51 runs   (    0.44 ms per token,  2264.65 tokens per second)
llama_print_timings: prompt eval time =   639.88 ms /    83 tokens (    7.71 ms per token,   129.71 tokens per second)
llama_print_timings:        eval time =  4335.26 ms /    50 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  5074.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, our world still has great flaws and is plagued by hatred, prejudice and warfare due to the contradiction between productivity and production relations, as well as a serious unequal distribution of wealth which leads many members of mankind to live in poverty and suffering.



llama_print_timings:        load time =   627.26 ms
llama_print_timings:      sample time =    28.08 ms /    61 runs   (    0.46 ms per token,  2172.21 tokens per second)
llama_print_timings: prompt eval time =   627.22 ms /    69 tokens (    9.09 ms per token,   110.01 tokens per second)
llama_print_timings:        eval time =  5049.84 ms /    60 runs   (   84.16 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  5803.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The human society is striving to solve the various difficulties and problems it is facing, and striving to create a better future for Earth civilization. The nation that sends this message is involved in this effort. We devote ourselves to building an ideal social order in which the labor and value of every individual member are fully respected, the material and spiritual needs of all members are fully satisfied, and Earth civilization becomes a more perfect civilization.



llama_print_timings:        load time =   674.46 ms
llama_print_timings:      sample time =    42.19 ms /    93 runs   (    0.45 ms per token,  2204.21 tokens per second)
llama_print_timings: prompt eval time =   674.42 ms /   107 tokens (    6.30 ms per token,   158.66 tokens per second)
llama_print_timings:        eval time =  7803.33 ms /    92 runs   (   84.82 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  8663.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

With the hope of a better future, we aspire to connect with other civilizations in space and create a better life together.



llama_print_timings:        load time =   611.43 ms
llama_print_timings:      sample time =    12.44 ms /    27 runs   (    0.46 ms per token,  2170.07 tokens per second)
llama_print_timings: prompt eval time =   611.40 ms /    62 tokens (    9.86 ms per token,   101.41 tokens per second)
llama_print_timings:        eval time =  2202.12 ms /    26 runs   (   84.70 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2867.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the midst of his head spinning excitement, the monitor looked at the waveform display and saw that information continued to flow into the antennae. Thanks to the real-time translation system, the received information was displayed instantly. In the two subsequent三体时, the monitor knew about Earth's existence, knowing that it is a solar system with only one sun in its constant epoch and having been born in the paradise of never-ending bliss.



llama_print_timings:        load time =   706.49 ms
llama_print_timings:      sample time =    42.90 ms /    95 runs   (    0.45 ms per token,  2214.50 tokens per second)
llama_print_timings: prompt eval time =   706.45 ms /   125 tokens (    5.65 ms per token,   176.94 tokens per second)
llama_print_timings:        eval time =  8158.65 ms /    94 runs   (   86.79 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  9058.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The information from the solar system has ended, and the interpretation computer starts running without any result. The system hears a desolate noise in the universe, but the interpreter can be sure that everything is not a dream. He also knows that there are thousands of interception stations distributed all over the world, and they all received the information from three advanced civilizations expecting for millions of years. Two hundred civilizations crawled through the pitch-black tunnel and now saw a glimmer of light ahead.



llama_print_timings:        load time =   700.23 ms
llama_print_timings:      sample time =    45.54 ms /   102 runs   (    0.45 ms per token,  2239.59 tokens per second)
llama_print_timings: prompt eval time =   700.18 ms /   117 tokens (    5.98 ms per token,   167.10 tokens per second)
llama_print_timings:        eval time =  8669.74 ms /   101 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  9572.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The monitor read the information from Earth for the second time, and his thoughts flew in the eternal blue ocean and verdant forest fields of that world. He felt the warm sunlight and refreshing breeze there, a beautiful place like the utopia imagined in more than 200 rounds of civilization.



llama_print_timings:        load time =   683.97 ms
llama_print_timings:      sample time =    31.70 ms /    71 runs   (    0.45 ms per token,  2239.54 tokens per second)
llama_print_timings: prompt eval time =   683.92 ms /    96 tokens (    7.12 ms per token,   140.37 tokens per second)
llama_print_timings:        eval time =  6006.58 ms /    70 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  6834.08 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The excitement and exhilaration soon cool down, leaving behind only the sadness and desolation. Over a long period of time spent in isolation, listening officers have asked themselves many times whether even if alien civilizations had arrived with messages, it would have any effect on their own lives that are lonely and humble.



llama_print_timings:        load time =   677.09 ms
llama_print_timings:      sample time =    31.43 ms /    69 runs   (    0.46 ms per token,  2195.35 tokens per second)
llama_print_timings: prompt eval time =   677.04 ms /    98 tokens (    6.91 ms per token,   144.75 tokens per second)
llama_print_timings:        eval time =  5818.60 ms /    68 runs   (   85.57 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  6638.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But at least I can have it in my dreams... the listener thought, and fell asleep on her own. In harsh conditions, humans evolved a switch to enable sleep within a few seconds of stimulation.



llama_print_timings:        load time =   620.81 ms
llama_print_timings:      sample time =    21.60 ms /    46 runs   (    0.47 ms per token,  2130.02 tokens per second)
llama_print_timings: prompt eval time =   620.78 ms /    75 tokens (    8.28 ms per token,   120.82 tokens per second)
llama_print_timings:        eval time =  3876.66 ms /    45 runs   (   86.15 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  4595.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But he didn't get the dream he wanted. The beautiful Earth did appear in his dream, but when the huge interstellar fleet fired its guns, the beautiful lands of Earth began to burn, and the azure oceans started boiling up…



llama_print_timings:        load time =   615.02 ms
llama_print_timings:      sample time =    24.07 ms /    53 runs   (    0.45 ms per token,  2202.28 tokens per second)
llama_print_timings: prompt eval time =   614.98 ms /    70 tokens (    8.79 ms per token,   113.83 tokens per second)
llama_print_timings:        eval time =  4507.01 ms /    52 runs   (   86.67 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  5231.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The listener woke up from the nightmare, and saw a huge moon casting cold light into the small window. He looked out at the frozen earth outside and started reviewing his lonely life. Currently, he has already lived 600 million times in this universe, and the lifespan of an alien in this universe is usually around 70-80 million times, with most people losing their work ability long before this point, and then being forced to undergo autoscarification, which will burn away their dry fibroid bodies. The society doesn't need idle people.



llama_print_timings:        load time =   721.42 ms
llama_print_timings:      sample time =    56.85 ms /   126 runs   (    0.45 ms per token,  2216.32 tokens per second)
llama_print_timings: prompt eval time =   721.38 ms /   129 tokens (    5.59 ms per token,   178.82 tokens per second)
llama_print_timings:        eval time = 10824.69 ms /   125 runs   (   86.60 ms per token,    11.55 tokens per second)
llama_print_timings:       total time = 11805.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$At this moment, the monitor suddenly thought of another possibility: it is not exactly correct to say that receiving alien messages does not affect oneself. After determining the target, the Three Kingdoms World will definitely prune a part of monitoring stations, and the station where he himself belongs is bound to be among them first. By then, he will face unemployment when his skills are very single and limited to routine operations and maintenance. If no other job finds him within five thousand years, he will also face the fate of being forcibly dehydrated and incinerated.



llama_print_timings:        load time =   741.59 ms
llama_print_timings:      sample time =    56.19 ms /   124 runs   (    0.45 ms per token,  2206.92 tokens per second)
llama_print_timings: prompt eval time =   741.54 ms /   136 tokens (    5.45 ms per token,   183.40 tokens per second)
llama_print_timings:        eval time = 10632.85 ms /   123 runs   (   86.45 ms per token,    11.57 tokens per second)
llama_print_timings:       total time = 11630.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The only way to escape this fate is to combine with a mate. At that moment, the organic matter that makes up their bodies will unite and become two-thirds of the fuel for the biochemical reactions while one-third would remain to complete a thorough regeneration and generate a new body; after which this body will split into three to five new small life forms. These are their children who inherit part of their parents' memories and continue their lives as an extension, beginning a whole new life. But in the humble social status of a monitor and isolated working environment at this age, how many females would even look at myself?



llama_print_timings:        load time =   757.73 ms
llama_print_timings:      sample time =    60.58 ms /   135 runs   (    0.45 ms per token,  2228.31 tokens per second)
llama_print_timings: prompt eval time =   757.70 ms /   152 tokens (    4.98 ms per token,   200.61 tokens per second)
llama_print_timings:        eval time = 11628.59 ms /   134 runs   (   86.78 ms per token,    11.52 tokens per second)
llama_print_timings:       total time = 12661.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the last few years of his life, the Listener asked himself countless times: This is my entire life? And he replied countless times: Yes, this is your entire life, with only the constant solitary of a small listening room.



llama_print_timings:        load time =   638.97 ms
llama_print_timings:      sample time =    23.32 ms /    52 runs   (    0.45 ms per token,  2229.46 tokens per second)
llama_print_timings: prompt eval time =   638.92 ms /    82 tokens (    7.79 ms per token,   128.34 tokens per second)
llama_print_timings:        eval time =  4458.23 ms /    51 runs   (   87.42 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  5203.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He can't lose that distant paradise, even in his dreams.



llama_print_timings:        load time =   582.45 ms
llama_print_timings:      sample time =     7.54 ms /    17 runs   (    0.44 ms per token,  2254.94 tokens per second)
llama_print_timings: prompt eval time =   582.41 ms /    42 tokens (   13.87 ms per token,    72.11 tokens per second)
llama_print_timings:        eval time =  1335.35 ms /    16 runs   (   83.46 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  1952.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

```



llama_print_timings:        load time =   998.33 ms
llama_print_timings:      sample time =     0.92 ms /     2 runs   (    0.46 ms per token,  2178.65 tokens per second)
llama_print_timings: prompt eval time =   998.29 ms /    29 tokens (   34.42 ms per token,    29.05 tokens per second)
llama_print_timings:        eval time =    83.72 ms /     1 runs   (   83.72 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  1085.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The listener knows that, on the cosmic scale, for low frequency electromagnetic waves coming from space, due to lack of sufficient measurement range, only its direction can be determined, but its distance is unknown. On that direction, it may be a distant high power emission source or a nearby low-power emission source; and that direction has billions of stars in the starry sea, where each star appears with varying distances and backgrounds, making it impossible to determine their coordinates correctly.



llama_print_timings:        load time =   699.60 ms
llama_print_timings:      sample time =    45.12 ms /   100 runs   (    0.45 ms per token,  2216.26 tokens per second)
llama_print_timings: prompt eval time =   699.55 ms /   124 tokens (    5.64 ms per token,   177.26 tokens per second)
llama_print_timings:        eval time =  8442.90 ms /    99 runs   (   85.28 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  9347.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Distance is the key!



llama_print_timings:        load time =   574.90 ms
llama_print_timings:      sample time =     2.69 ms /     6 runs   (    0.45 ms per token,  2227.17 tokens per second)
llama_print_timings: prompt eval time =   574.86 ms /    34 tokens (   16.91 ms per token,    59.15 tokens per second)
llama_print_timings:        eval time =   444.58 ms /     5 runs   (   88.92 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =  1030.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The method of determining the distance between two points is very simple. Simply reply an message to the other person, and if the other person answers in a relatively short period of time from receiving this message, you can determine the distance by the delay and the speed of light. But the question is whether or not the other person will respond at all, or whether they will take a long time to answer, making it difficult for us to determine how long the signal took on its way. However, since the source of the signal has taken the initiative to send out an SOS, it can be concluded that they would most likely respond if they receive our messages. Monitoring station can be sure that a command has been given from the Trisolarian government to send information toward that distant world and attract them to respond. If the message has not been sent yet, then this monitoring station has a chance of igniting their own lives once again.



llama_print_timings:        load time =   788.89 ms
llama_print_timings:      sample time =    86.66 ms /   193 runs   (    0.45 ms per token,  2227.04 tokens per second)
llama_print_timings: prompt eval time =   788.85 ms /   168 tokens (    4.70 ms per token,   212.97 tokens per second)
llama_print_timings:        eval time = 16808.04 ms /   192 runs   (   87.54 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 17995.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The vast majority of the monitoring stations in the Three-Body universe are also simultaneously transmitting information to space, calling out any possible extraterrestrial civilizations. Unfortunately, because the Sun of Half Man region differs significantly from that of humanity in structure, a large and relatively transparent plasma layer (that is, this plasma layer causes the Sun of the Three-Body universe to suddenly change into a fly star or reemerge as one) exists at some distance away from the solar surface, thereby preventing the solar energy from being used as an antenna for transmitting information, and only ground antennas can be used directly to transmit to targets. Otherwise, humanity would have known about the Three-Body Civilization long ago.



llama_print_timings:        load time =   828.70 ms
llama_print_timings:      sample time =    68.37 ms /   152 runs   (    0.45 ms per token,  2223.33 tokens per second)
llama_print_timings: prompt eval time =   828.67 ms /   183 tokens (    4.53 ms per token,   220.84 tokens per second)
llama_print_timings:        eval time = 13219.88 ms /   151 runs   (   87.55 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 14356.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Listener crouched in front of the screen and typed a short message. Then, he turned on the computer's radio transmitter towards the direction where the incoming source of the data from Earth was located, pressing down the red rectangular button. At that time, his finger was hanging over the button.



llama_print_timings:        load time =   683.25 ms
llama_print_timings:      sample time =    29.52 ms /    66 runs   (    0.45 ms per token,  2235.62 tokens per second)
llama_print_timings: prompt eval time =   683.21 ms /   100 tokens (    6.83 ms per token,   146.37 tokens per second)
llama_print_timings:        eval time =  5538.62 ms /    65 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  6357.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The fate of the Three Civilizations lies in these two fingers.



llama_print_timings:        load time =   582.72 ms
llama_print_timings:      sample time =     6.80 ms /    14 runs   (    0.49 ms per token,  2057.31 tokens per second)
llama_print_timings: prompt eval time =   582.69 ms /    44 tokens (   13.24 ms per token,    75.51 tokens per second)
llama_print_timings:        eval time =  1070.72 ms /    13 runs   (   82.36 ms per token,    12.14 tokens per second)
llama_print_timings:       total time =  1685.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Without hesitation, the listener pressed the transmit button and sent out a high-powered radio wave carrying that short but potentially life-saving message into the abyss of space.



llama_print_timings:        load time =   601.14 ms
llama_print_timings:      sample time =    17.57 ms /    39 runs   (    0.45 ms per token,  2219.31 tokens per second)
llama_print_timings: prompt eval time =   601.10 ms /    62 tokens (    9.70 ms per token,   103.14 tokens per second)
llama_print_timings:        eval time =  3255.74 ms /    38 runs   (   85.68 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3935.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The world has received your message.



llama_print_timings:        load time =   604.58 ms
llama_print_timings:      sample time =     3.73 ms /     8 runs   (    0.47 ms per token,  2146.50 tokens per second)
llama_print_timings: prompt eval time =   604.54 ms /    34 tokens (   17.78 ms per token,    56.24 tokens per second)
llama_print_timings:        eval time =   596.36 ms /     7 runs   (   85.19 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  1216.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   606.54 ms
llama_print_timings:      sample time =    18.68 ms /    41 runs   (    0.46 ms per token,  2194.63 tokens per second)
llama_print_timings: prompt eval time =   606.50 ms /    59 tokens (   10.28 ms per token,    97.28 tokens per second)
llama_print_timings:        eval time =  3433.65 ms /    40 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  4122.73 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You have millions of stars in your direction. If you do not respond, the world will not be able to locate the source.



llama_print_timings:        load time =   582.21 ms
llama_print_timings:      sample time =    12.54 ms /    27 runs   (    0.46 ms per token,  2152.25 tokens per second)
llama_print_timings: prompt eval time =   582.17 ms /    47 tokens (   12.39 ms per token,    80.73 tokens per second)
llama_print_timings:        eval time =  2211.45 ms /    26 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  2850.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If you respond, the launcher will be located and your civilization will be invaded. Your world will be occupied!



llama_print_timings:        load time =   601.81 ms
llama_print_timings:      sample time =    11.48 ms /    26 runs   (    0.44 ms per token,  2264.41 tokens per second)
llama_print_timings: prompt eval time =   601.77 ms /    51 tokens (   11.80 ms per token,    84.75 tokens per second)
llama_print_timings:        eval time =  2107.94 ms /    25 runs   (   84.32 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  2762.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Do not answer! Do not answer! ! Do not answer! ! !



llama_print_timings:        load time =   583.74 ms
llama_print_timings:      sample time =     7.69 ms /    16 runs   (    0.48 ms per token,  2080.08 tokens per second)
llama_print_timings: prompt eval time =   583.70 ms /    41 tokens (   14.24 ms per token,    70.24 tokens per second)
llama_print_timings:        eval time =  1329.07 ms /    15 runs   (   88.60 ms per token,    11.29 tokens per second)
llama_print_timings:       total time =  1947.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We don't know what the palace of the Three-Body world leader looks like, but we can be sure that there are thick walls between him and the outside world to adapt to the harsh climate. The gold-domed pyramid is only a guess, but it may also be underground.



llama_print_timings:        load time =   640.55 ms
llama_print_timings:      sample time =    28.59 ms /    64 runs   (    0.45 ms per token,  2238.31 tokens per second)
llama_print_timings: prompt eval time =   640.50 ms /    81 tokens (    7.91 ms per token,   126.46 tokens per second)
llama_print_timings:        eval time =  5393.99 ms /    63 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  6164.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   629.40 ms
llama_print_timings:      sample time =    19.54 ms /    44 runs   (    0.44 ms per token,  2251.45 tokens per second)
llama_print_timings: prompt eval time =   629.35 ms /    72 tokens (    8.74 ms per token,   114.40 tokens per second)
llama_print_timings:        eval time =  3672.35 ms /    43 runs   (   85.40 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  4391.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   749.69 ms
llama_print_timings:      sample time =    57.32 ms /   128 runs   (    0.45 ms per token,  2233.19 tokens per second)
llama_print_timings: prompt eval time =   749.65 ms /   140 tokens (    5.35 ms per token,   186.75 tokens per second)
llama_print_timings:        eval time = 10946.34 ms /   127 runs   (   86.19 ms per token,    11.60 tokens per second)
llama_print_timings:       total time = 11961.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dictator asked the 1379th surveillance agent standing in front of him, “Why do you do this?”



llama_print_timings:        load time =   589.20 ms
llama_print_timings:      sample time =    13.77 ms /    29 runs   (    0.47 ms per token,  2106.03 tokens per second)
llama_print_timings: prompt eval time =   589.16 ms /    51 tokens (   11.55 ms per token,    86.56 tokens per second)
llama_print_timings:        eval time =  2373.25 ms /    28 runs   (   84.76 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  3026.04 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“To make the most of life.”



llama_print_timings:        load time =   584.78 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2248.31 tokens per second)
llama_print_timings: prompt eval time =   584.74 ms /    44 tokens (   13.29 ms per token,    75.25 tokens per second)
llama_print_timings:        eval time =   684.12 ms /     8 runs   (   85.51 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  1286.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   579.03 ms
llama_print_timings:      sample time =    11.97 ms /    27 runs   (    0.44 ms per token,  2254.89 tokens per second)
llama_print_timings: prompt eval time =   579.00 ms /    46 tokens (   12.59 ms per token,    79.45 tokens per second)
llama_print_timings:        eval time =  2272.89 ms /    26 runs   (   87.42 ms per token,    11.44 tokens per second)
llama_print_timings:       total time =  2904.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But gave the Earth civilization such an opportunity. Supreme Leader, please allow me to say this: About 10000 years before the Chaos Period, when I was located at 1379 station, the patrol supply truck had missed my station and so it meant that I stopped receiving supplies for a hundred 10000 time units. I had eaten up all the food available on the station, including my own clothes, but even then, when the supply truck returned, I was about to die of starvation. My superior gave me the longest vacation that I had ever received and as I traveled back to the city with the supply truck, I was controlled by a strong desire that I could only describe as wanting to eat everything on the supply car. Every time I saw someone else eating food on the car, my hatred would rise in me, and I wanted to kill them all! I kept stealing food from the car and hid it under my clothes and beneath the seats. The staff on board found what I was doing amusing and gave me gifts of food. When I arrived back at t


llama_print_timings:        load time =   891.94 ms
llama_print_timings:      sample time =   115.33 ms /   258 runs   (    0.45 ms per token,  2237.04 tokens per second)
llama_print_timings: prompt eval time =   891.89 ms /   225 tokens (    3.96 ms per token,   252.27 tokens per second)
llama_print_timings:        eval time = 22595.83 ms /   257 runs   (   87.92 ms per token,    11.37 tokens per second)
llama_print_timings:       total time = 24017.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, I recovered from this mental illness eventually, but that strong desire to possess left a deep impression on me. The Three-Body Civilization was also a group in survival crisis, it possessed the same ruthless desire to have its own living space as my desire for food did at the time. It could not share the world with the Earth civilization and had to destroy it without hesitation so that it could have full control of the entire solar system. Do I have it right?



llama_print_timings:        load time =   724.14 ms
llama_print_timings:      sample time =    46.91 ms /   103 runs   (    0.46 ms per token,  2195.74 tokens per second)
llama_print_timings: prompt eval time =   724.11 ms /   120 tokens (    6.03 ms per token,   165.72 tokens per second)
llama_print_timings:        eval time =  8578.47 ms /   102 runs   (   84.10 ms per token,    11.89 tokens per second)
llama_print_timings:       total time =  9517.20 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, there is another reason to destroy the Earth civilization. They are a warlike race and very dangerous when they coexist with us in this world. When we share our universe with them after we take over the sun and earth, they will learn rapidly technologically. That way, both civilizations would not live peacefully together. We have decided to follow the following policy: once we occupy the sun and Earth, we won't interfere significantly in their life except for one thing: reproduction. Now I want to ask you this: how can a civilization that has no sense of responsibility toward its own civilization be the savior of the Earth?”



llama_print_timings:        load time =   753.55 ms
llama_print_timings:      sample time =    62.55 ms /   140 runs   (    0.45 ms per token,  2238.25 tokens per second)
llama_print_timings: prompt eval time =   753.50 ms /   142 tokens (    5.31 ms per token,   188.45 tokens per second)
llama_print_timings:        eval time = 12230.56 ms /   139 runs   (   87.99 ms per token,    11.36 tokens per second)
llama_print_timings:       total time = 13272.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Three-Body universe has grown stale. We have nothing left in our lives and spirits but to fight for survival.



llama_print_timings:        load time =   591.45 ms
llama_print_timings:      sample time =    12.82 ms /    27 runs   (    0.47 ms per token,  2106.41 tokens per second)
llama_print_timings: prompt eval time =   591.41 ms /    54 tokens (   10.95 ms per token,    91.31 tokens per second)
llama_print_timings:        eval time =  2267.16 ms /    26 runs   (   87.20 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2916.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What's wrong with that?



llama_print_timings:        load time =   574.58 ms
llama_print_timings:      sample time =     3.59 ms /     8 runs   (    0.45 ms per token,  2230.90 tokens per second)
llama_print_timings: prompt eval time =   574.55 ms /    36 tokens (   15.96 ms per token,    62.66 tokens per second)
llama_print_timings:        eval time =   582.53 ms /     7 runs   (   83.22 ms per token,    12.02 tokens per second)
llama_print_timings:       total time =  1173.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course not, survival is the prerequisite for everything else. But, please see our lives: everything is for the survival of civilization. For the entire civilization to survive, individual respect does not exist; an individual cannot work so they will die. The Three Body society is in extreme autarchy with only two laws: guilty and innocent, guilty sentenced to death, innocent released. What I can't stand is the lack of spiritual life and barrenness - all possibilities causing fragility are considered evil. We have no literature, art or love to speak about - emperor, such a life has any meaning?



llama_print_timings:        load time =   767.23 ms
llama_print_timings:      sample time =    60.90 ms /   135 runs   (    0.45 ms per token,  2216.82 tokens per second)
llama_print_timings: prompt eval time =   767.18 ms /   154 tokens (    4.98 ms per token,   200.73 tokens per second)
llama_print_timings:        eval time = 11730.12 ms /   134 runs   (   87.54 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 12782.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The civilization you yearn for has existed in the past, and they had democratic freedom as well as a rich cultural heritage. But you can only see very small parts of them due to censorship. However, in all the rounds of civilizations, that kind of civilization is the most fragile and short-lived one, and a minor catastrophe could destroy it completely. Then look at the Earth civilization which has been nurtured like a pet forever in an eternal spring beauty. If you put it into the Three Body Universe, definitely survival will be impossible - a million parallels of Three Body Time.”



llama_print_timings:        load time =   737.75 ms
llama_print_timings:      sample time =    62.23 ms /   139 runs   (    0.45 ms per token,  2233.54 tokens per second)
llama_print_timings: prompt eval time =   737.71 ms /   135 tokens (    5.46 ms per token,   183.00 tokens per second)
llama_print_timings:        eval time = 11936.68 ms /   138 runs   (   86.50 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 12964.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The flowers may be fragile, but they are beautiful. She enjoys the freedom and beauty in heaven.



llama_print_timings:        load time =   589.79 ms
llama_print_timings:      sample time =    10.47 ms /    23 runs   (    0.46 ms per token,  2197.17 tokens per second)
llama_print_timings: prompt eval time =   589.75 ms /    50 tokens (   11.79 ms per token,    84.78 tokens per second)
llama_print_timings:        eval time =  1801.55 ms /    22 runs   (   81.89 ms per token,    12.21 tokens per second)
llama_print_timings:       total time =  2440.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If the Three-Body Civilization gains control of that world, we can also create a life there.



llama_print_timings:        load time =   575.32 ms
llama_print_timings:      sample time =    10.52 ms /    23 runs   (    0.46 ms per token,  2187.14 tokens per second)
llama_print_timings: prompt eval time =   575.27 ms /    45 tokens (   12.78 ms per token,    78.22 tokens per second)
llama_print_timings:        eval time =  1860.52 ms /    22 runs   (   84.57 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  2483.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I suspect that the metallic tripod spirit has already solidified into our every cell. Do you really think it can melt?” “I am a humble person living at the very bottom of society with no wealth, status or love. Without hope, I have lived an entire life alone and unwanted. If I could save a beautiful distant world that my heart loves, then this lifetime would not be in vain.” Of course, I feel grateful for having met you, Your Majesty. It is only by this gesture that I can express my delight to the masses on TV.



llama_print_timings:        load time =   749.11 ms
llama_print_timings:      sample time =    54.72 ms /   121 runs   (    0.45 ms per token,  2211.30 tokens per second)
llama_print_timings: prompt eval time =   749.06 ms /   146 tokens (    5.13 ms per token,   194.91 tokens per second)
llama_print_timings:        eval time = 10180.55 ms /   120 runs   (   84.84 ms per token,    11.79 tokens per second)
llama_print_timings:       total time = 11178.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"There is no doubt that you are guilty, but there is an exception in the Three Body law: you are free."



llama_print_timings:        load time =   623.31 ms
llama_print_timings:      sample time =    11.63 ms /    26 runs   (    0.45 ms per token,  2235.60 tokens per second)
llama_print_timings: prompt eval time =   623.28 ms /    62 tokens (   10.05 ms per token,    99.47 tokens per second)
llama_print_timings:        eval time =  2143.14 ms /    25 runs   (   85.73 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2818.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This can't be allowed!”



llama_print_timings:        load time =   579.80 ms
llama_print_timings:      sample time =     4.06 ms /     9 runs   (    0.45 ms per token,  2216.75 tokens per second)
llama_print_timings: prompt eval time =   579.76 ms /    38 tokens (   15.26 ms per token,    65.54 tokens per second)
llama_print_timings:        eval time =   696.65 ms /     8 runs   (   87.08 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  1295.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“For you, dehydration is a mere trifle. You are old and won’t see the end of Earth civilization anyway, but at least I want you to know that it will not be in your power to save her from ruin. So go away.”



llama_print_timings:        load time =   647.21 ms
llama_print_timings:      sample time =    26.36 ms /    59 runs   (    0.45 ms per token,  2238.32 tokens per second)
llama_print_timings: prompt eval time =   647.17 ms /    86 tokens (    7.53 ms per token,   132.89 tokens per second)
llama_print_timings:        eval time =  5002.38 ms /    58 runs   (   86.25 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  5768.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After Listener 1379 left, the dictator summoned the minister responsible for the surveillance system. To him, the dictator also avoided getting angry, it was just a matter of routine.



llama_print_timings:        load time =   625.81 ms
llama_print_timings:      sample time =    20.54 ms /    46 runs   (    0.45 ms per token,  2239.21 tokens per second)
llama_print_timings: prompt eval time =   625.77 ms /    68 tokens (    9.20 ms per token,   108.67 tokens per second)
llama_print_timings:        eval time =  3862.10 ms /    45 runs   (   85.82 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  4580.55 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"How can you allow such fragile and evil people to enter the surveillance system?"



llama_print_timings:        load time =   585.56 ms
llama_print_timings:      sample time =     8.95 ms /    20 runs   (    0.45 ms per token,  2233.89 tokens per second)
llama_print_timings: prompt eval time =   585.51 ms /    43 tokens (   13.62 ms per token,    73.44 tokens per second)
llama_print_timings:        eval time =  1582.53 ms /    19 runs   (   83.29 ms per token,    12.01 tokens per second)
llama_print_timings:       total time =  2208.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The head of the surveillance system has thousands of employees and it's difficult to strictly identify them all. 1379 is, after all, in the surveillance station for over half his life with no major mistake occurring. Of course, I am fully responsible for this most serious blunder.



llama_print_timings:        load time =   650.16 ms
llama_print_timings:      sample time =    29.81 ms /    66 runs   (    0.45 ms per token,  2213.80 tokens per second)
llama_print_timings: prompt eval time =   650.12 ms /    77 tokens (    8.44 ms per token,   118.44 tokens per second)
llama_print_timings:        eval time =  5435.85 ms /    65 runs   (   83.63 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =  6222.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the space surveillance system of the Three-Body universe, how many other people are involved?



llama_print_timings:        load time =   592.31 ms
llama_print_timings:      sample time =     9.90 ms /    22 runs   (    0.45 ms per token,  2222.45 tokens per second)
llama_print_timings: prompt eval time =   592.26 ms /    49 tokens (   12.09 ms per token,    82.73 tokens per second)
llama_print_timings:        eval time =  1791.52 ms /    21 runs   (   85.31 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  2431.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I checked and there are about 6,000 people involved in this.



llama_print_timings:        load time =   589.30 ms
llama_print_timings:      sample time =     7.95 ms /    18 runs   (    0.44 ms per token,  2265.58 tokens per second)
llama_print_timings: prompt eval time =   589.27 ms /    50 tokens (   11.79 ms per token,    84.85 tokens per second)
llama_print_timings:        eval time =  1446.63 ms /    17 runs   (   85.10 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  2073.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They are all guilty.



llama_print_timings:        load time =   580.20 ms
llama_print_timings:      sample time =     2.72 ms /     6 runs   (    0.45 ms per token,  2202.64 tokens per second)
llama_print_timings: prompt eval time =   580.15 ms /    35 tokens (   16.58 ms per token,    60.33 tokens per second)
llama_print_timings:        eval time =   413.07 ms /     5 runs   (   82.61 ms per token,    12.10 tokens per second)
llama_print_timings:       total time =  1004.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes.



llama_print_timings:        load time =   567.58 ms
llama_print_timings:      sample time =     1.38 ms /     3 runs   (    0.46 ms per token,  2169.20 tokens per second)
llama_print_timings: prompt eval time =   567.54 ms /    33 tokens (   17.20 ms per token,    58.15 tokens per second)
llama_print_timings:        eval time =   179.19 ms /     2 runs   (   89.59 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =   753.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Six thousand people are dehydrated, burned in the capital square - you, let it be an incendiary object.



llama_print_timings:        load time =   595.28 ms
llama_print_timings:      sample time =    13.40 ms /    30 runs   (    0.45 ms per token,  2239.31 tokens per second)
llama_print_timings: prompt eval time =   595.24 ms /    55 tokens (   10.82 ms per token,    92.40 tokens per second)
llama_print_timings:        eval time =  2514.16 ms /    29 runs   (   86.70 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  3170.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thank you, President. This has helped to ease our conscience a little bit.



llama_print_timings:        load time =   580.40 ms
llama_print_timings:      sample time =     7.49 ms /    17 runs   (    0.44 ms per token,  2270.91 tokens per second)
llama_print_timings: prompt eval time =   580.36 ms /    43 tokens (   13.50 ms per token,    74.09 tokens per second)
llama_print_timings:        eval time =  1362.04 ms /    16 runs   (   85.13 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  1976.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   589.23 ms
llama_print_timings:      sample time =     7.91 ms /    18 runs   (    0.44 ms per token,  2275.03 tokens per second)
llama_print_timings: prompt eval time =   589.19 ms /    48 tokens (   12.27 ms per token,    81.47 tokens per second)
llama_print_timings:        eval time =  1454.38 ms /    17 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  2079.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is a small monitoring station with low power, it can transmit about 120 million lx (around 120 ly).



llama_print_timings:        load time =   609.92 ms
llama_print_timings:      sample time =    13.59 ms /    30 runs   (    0.45 ms per token,  2206.86 tokens per second)
llama_print_timings: prompt eval time =   609.88 ms /    63 tokens (    9.68 ms per token,   103.30 tokens per second)
llama_print_timings:        eval time =  2461.60 ms /    29 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3134.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I think we should all work together to achieve a common goal. We could exchange ideas and come up with solutions that are beneficial for everyone.”



llama_print_timings:        load time =   587.10 ms
llama_print_timings:      sample time =    13.76 ms /    31 runs   (    0.44 ms per token,  2252.91 tokens per second)
llama_print_timings: prompt eval time =   587.06 ms /    47 tokens (   12.49 ms per token,    80.06 tokens per second)
llama_print_timings:        eval time =  2574.40 ms /    30 runs   (   85.81 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  3224.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Did you send a carefully crafted message to that alien world in an attempt to get them to respond?



llama_print_timings:        load time =   581.35 ms
llama_print_timings:      sample time =    10.72 ms /    23 runs   (    0.47 ms per token,  2145.32 tokens per second)
llama_print_timings: prompt eval time =   581.33 ms /    50 tokens (   11.63 ms per token,    86.01 tokens per second)
llama_print_timings:        eval time =  1874.60 ms /    22 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  2504.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   615.54 ms
llama_print_timings:      sample time =    18.90 ms /    42 runs   (    0.45 ms per token,  2222.46 tokens per second)
llama_print_timings: prompt eval time =   615.50 ms /    68 tokens (    9.05 ms per token,   110.48 tokens per second)
llama_print_timings:        eval time =  3587.72 ms /    41 runs   (   87.51 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  4287.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the departure of the overseer, the supreme ruler convened a meeting with the commander of the Trisolaran fleet.



llama_print_timings:        load time =   583.41 ms
llama_print_timings:      sample time =    13.42 ms /    30 runs   (    0.45 ms per token,  2236.14 tokens per second)
llama_print_timings: prompt eval time =   583.37 ms /    46 tokens (   12.68 ms per token,    78.85 tokens per second)
llama_print_timings:        eval time =  2461.61 ms /    29 runs   (   84.88 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3105.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How long will it take for the first fleet to complete preparations?”



llama_print_timings:        load time =   592.38 ms
llama_print_timings:      sample time =     7.21 ms /    16 runs   (    0.45 ms per token,  2220.68 tokens per second)
llama_print_timings: prompt eval time =   592.34 ms /    42 tokens (   14.10 ms per token,    70.91 tokens per second)
llama_print_timings:        eval time =  1264.01 ms /    15 runs   (   84.27 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  1888.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The suggestion of the commanding officer is still in the final stage and has a minimum sailing capacity of six thousand hours.”



llama_print_timings:        load time =   593.01 ms
llama_print_timings:      sample time =    11.99 ms /    27 runs   (    0.44 ms per token,  2251.69 tokens per second)
llama_print_timings: prompt eval time =   592.98 ms /    51 tokens (   11.63 ms per token,    86.01 tokens per second)
llama_print_timings:        eval time =  2232.92 ms /    26 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2880.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I will request the senate to discuss my plan: after the formation of the fleet, we will sail immediately in that direction.



llama_print_timings:        load time =   599.75 ms
llama_print_timings:      sample time =    11.83 ms /    27 runs   (    0.44 ms per token,  2281.95 tokens per second)
llama_print_timings: prompt eval time =   599.72 ms /    56 tokens (   10.71 ms per token,    93.38 tokens per second)
llama_print_timings:        eval time =  2201.70 ms /    26 runs   (   84.68 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2854.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Supreme Leader, at that frequency of reception, even the direction-finding is not very accurate. You know, a fleet can only travel at a speed close to the speed of light, and its power reserve is limited to only one deceleration, so it's impossible for us to search for a large area along that direction if we don't know its exact distance. If no clear result is achieved, the final fate of our entire fleet would be falling into the abyss of space.”



llama_print_timings:        load time =   662.01 ms
llama_print_timings:      sample time =    47.42 ms /   106 runs   (    0.45 ms per token,  2235.49 tokens per second)
llama_print_timings: prompt eval time =   661.98 ms /    97 tokens (    6.82 ms per token,   146.53 tokens per second)
llama_print_timings:        eval time =  8983.08 ms /   105 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  9866.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But look at our galaxy's three suns, any of them could expand simultaneously and engulf this last planet. So there is no choice but to face the danger.



llama_print_timings:        load time =   628.63 ms
llama_print_timings:      sample time =    16.47 ms /    37 runs   (    0.45 ms per token,  2247.05 tokens per second)
llama_print_timings: prompt eval time =   628.59 ms /    71 tokens (    8.85 ms per token,   112.95 tokens per second)
llama_print_timings:        eval time =  3137.04 ms /    36 runs   (   87.14 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  3841.00 ms


translated 75.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

After 85,000 years.



llama_print_timings:        load time =   585.76 ms
llama_print_timings:      sample time =     4.96 ms /    11 runs   (    0.45 ms per token,  2217.74 tokens per second)
llama_print_timings: prompt eval time =   585.73 ms /    45 tokens (   13.02 ms per token,    76.83 tokens per second)
llama_print_timings:        eval time =   863.73 ms /    10 runs   (   86.37 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  1472.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The emperor ordered an emergency meeting of all the ruling officials in the Trisolarian world, which is very unusual. Something major must have happened.



llama_print_timings:        load time =   590.29 ms
llama_print_timings:      sample time =    15.41 ms /    34 runs   (    0.45 ms per token,  2207.08 tokens per second)
llama_print_timings: prompt eval time =   590.25 ms /    54 tokens (   10.93 ms per token,    91.49 tokens per second)
llama_print_timings:        eval time =  2852.34 ms /    33 runs   (   86.43 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  3508.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Before the time of two million years, the三体舰队 set sail. They knew only about the general direction of their target, but not its exact distance. Perhaps their goal was billions of times away in the vast star-filled universe or on the other side of the Milky Way galaxy; it was a mission with no hope of success.



llama_print_timings:        load time =   650.26 ms
llama_print_timings:      sample time =    32.03 ms /    71 runs   (    0.45 ms per token,  2216.81 tokens per second)
llama_print_timings: prompt eval time =   650.23 ms /    87 tokens (    7.47 ms per token,   133.80 tokens per second)
llama_print_timings:        eval time =  6080.37 ms /    70 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  6868.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The senate meeting was held under the huge monument.



llama_print_timings:        load time =   680.14 ms
llama_print_timings:      sample time =     5.95 ms /    12 runs   (    0.50 ms per token,  2016.47 tokens per second)
llama_print_timings: prompt eval time =   680.10 ms /    81 tokens (    8.40 ms per token,   119.10 tokens per second)
llama_print_timings:        eval time =   955.48 ms /    11 runs   (   86.86 ms per token,    11.51 tokens per second)
llama_print_timings:       total time =  1660.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The time has come to set the world in order, and this is not the place for it. We must take a different approach. Here on our platform of titans we can control time itself! I hereby declare you, each and every one of you, to be the head of a new civilization. Let us begin by sending time back three years.



llama_print_timings:        load time =   770.74 ms
llama_print_timings:      sample time =    33.79 ms /    75 runs   (    0.45 ms per token,  2219.85 tokens per second)
llama_print_timings: prompt eval time =   770.69 ms /   157 tokens (    4.91 ms per token,   203.71 tokens per second)
llama_print_timings:        eval time =  6380.62 ms /    74 runs   (   86.22 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  7297.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I just turned off the power supply of the giant pendulum and it will stop slowly under air resistance.



llama_print_timings:        load time =   588.26 ms
llama_print_timings:      sample time =    10.70 ms /    24 runs   (    0.45 ms per token,  2242.57 tokens per second)
llama_print_timings: prompt eval time =   588.21 ms /    49 tokens (   12.00 ms per token,    83.30 tokens per second)
llama_print_timings:        eval time =  1982.78 ms /    23 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2616.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Because the people need peace and stability, to ensure their safety and well-being,” the dictator replies. “If we don’t fight for our country’s interests against foreign aggression, it will be a disaster.”



llama_print_timings:        load time =   578.37 ms
llama_print_timings:      sample time =    22.94 ms /    52 runs   (    0.44 ms per token,  2266.59 tokens per second)
llama_print_timings: prompt eval time =   578.33 ms /    42 tokens (   13.77 ms per token,    72.62 tokens per second)
llama_print_timings:        eval time =  4348.14 ms /    51 runs   (   85.26 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  5026.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

We all know the historical significance of the Great Vibrator. It was used to hypnotize God. Now we know that it is better for us for God to be awake. So it has started to bless us.



llama_print_timings:        load time =   619.38 ms
llama_print_timings:      sample time =    20.78 ms /    47 runs   (    0.44 ms per token,  2261.90 tokens per second)
llama_print_timings: prompt eval time =   619.34 ms /    67 tokens (    9.24 ms per token,   108.18 tokens per second)
llama_print_timings:        eval time =  3944.78 ms /    46 runs   (   85.76 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  4654.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The crowd fell silent, pondering the implications of the leader's words. After three more times of the giant pendulum swinging, someone asked, "Has Earth answered our messages?"



llama_print_timings:        load time =   610.70 ms
llama_print_timings:      sample time =    20.09 ms /    43 runs   (    0.47 ms per token,  2140.37 tokens per second)
llama_print_timings: prompt eval time =   610.65 ms /    62 tokens (    9.85 ms per token,   101.53 tokens per second)
llama_print_timings:        eval time =  3526.55 ms /    42 runs   (   83.97 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  4224.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   598.19 ms
llama_print_timings:      sample time =    11.86 ms /    26 runs   (    0.46 ms per token,  2191.69 tokens per second)
llama_print_timings: prompt eval time =   598.16 ms /    57 tokens (   10.49 ms per token,    95.29 tokens per second)
llama_print_timings:        eval time =  2163.42 ms /    25 runs   (   86.54 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  2813.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   586.93 ms
llama_print_timings:      sample time =    11.51 ms /    26 runs   (    0.44 ms per token,  2258.32 tokens per second)
llama_print_timings: prompt eval time =   586.89 ms /    52 tokens (   11.29 ms per token,    88.60 tokens per second)
llama_print_timings:        eval time =  2206.63 ms /    25 runs   (   88.27 ms per token,    11.33 tokens per second)
llama_print_timings:       total time =  2844.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This means that the civilization of Earth is only 40,000 light years away from us.



llama_print_timings:        load time =   584.45 ms
llama_print_timings:      sample time =    10.92 ms /    24 runs   (    0.46 ms per token,  2197.00 tokens per second)
llama_print_timings: prompt eval time =   584.41 ms /    44 tokens (   13.28 ms per token,    75.29 tokens per second)
llama_print_timings:        eval time =  2013.14 ms /    23 runs   (   87.53 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  2646.42 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Isn't that the nearest star to us?"



llama_print_timings:        load time =   584.85 ms
llama_print_timings:      sample time =     5.89 ms /    13 runs   (    0.45 ms per token,  2206.38 tokens per second)
llama_print_timings: prompt eval time =   584.81 ms /    43 tokens (   13.60 ms per token,    73.53 tokens per second)
llama_print_timings:        eval time =  1046.71 ms /    12 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  1656.99 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, so I said: God is blessing the Three Body Civilization.” The exhilaration spreading in the hall was not fully expressed, like a suppressed volcano. Prime Minister Zhousui knew that letting this fragile emotion explode would be detrimental to him and he immediately poured a cold cup of water on it: “I have commanded the Three Body fleet to sail towards the star, but things are not as optimistic as you think at present – According to current conditions, they are heading towards their own grave.”



llama_print_timings:        load time =   703.80 ms
llama_print_timings:      sample time =    52.43 ms /   116 runs   (    0.45 ms per token,  2212.47 tokens per second)
llama_print_timings: prompt eval time =   703.75 ms /   126 tokens (    5.59 ms per token,   179.04 tokens per second)
llama_print_timings:        eval time = 10073.11 ms /   115 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 11010.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dictator's words calmed the senators immediately.



llama_print_timings:        load time =   578.62 ms
llama_print_timings:      sample time =     6.15 ms /    14 runs   (    0.44 ms per token,  2276.42 tokens per second)
llama_print_timings: prompt eval time =   578.58 ms /    41 tokens (   14.11 ms per token,    70.86 tokens per second)
llama_print_timings:        eval time =  1104.35 ms /    13 runs   (   84.95 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  1709.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Does anyone understand what I mean?” the leader asked.



llama_print_timings:        load time =   585.36 ms
llama_print_timings:      sample time =     5.78 ms /    13 runs   (    0.44 ms per token,  2250.69 tokens per second)
llama_print_timings: prompt eval time =   585.34 ms /    41 tokens (   14.28 ms per token,    70.05 tokens per second)
llama_print_timings:        eval time =  1098.10 ms /    12 runs   (   91.51 ms per token,    10.93 tokens per second)
llama_print_timings:       total time =  1708.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I understand,” said the science ruler. “We have carefully studied the first set of Earth information, and what is most worth noting is their history of civilization. Please see below facts: Humanity took millions of years to progress from the hunter-gatherer era to agricultural age; several thousand years for them to transition from agricultural age to industrial age; only two hundred Earth years for them to enter the information age; and they have come to the nuclear era in just a few Earth years. This civilization has an incredibly fast evolutionary capacity! In our own world, neither of the two hundred civilizations that exist including us, have experienced such rapid development. All scientific and technological progress of each civilization is steady or slowing down.”



llama_print_timings:        load time =   817.08 ms
llama_print_timings:      sample time =    72.85 ms /   159 runs   (    0.46 ms per token,  2182.66 tokens per second)
llama_print_timings: prompt eval time =   817.04 ms /   183 tokens (    4.46 ms per token,   223.98 tokens per second)
llama_print_timings:        eval time = 13832.40 ms /   158 runs   (   87.55 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 14981.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The reality is that, when the Three-Body fleet arrives at the planetary system in which Earth resides forty-five million years later, their technology will be in even faster growth and greatly surpass ours! After such a long journey through two interstellar dust clouds, only half of the fleet will likely reach the sun's system. The rest would lose their ships during the voyage.”



llama_print_timings:        load time =   731.17 ms
llama_print_timings:      sample time =    38.27 ms /    84 runs   (    0.46 ms per token,  2195.10 tokens per second)
llama_print_timings: prompt eval time =   731.14 ms /   132 tokens (    5.54 ms per token,   180.54 tokens per second)
llama_print_timings:        eval time =  7238.73 ms /    83 runs   (   87.21 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  8140.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If that is the case, then there are even more terrible things...



llama_print_timings:        load time =   589.32 ms
llama_print_timings:      sample time =     6.65 ms /    15 runs   (    0.44 ms per token,  2255.30 tokens per second)
llama_print_timings: prompt eval time =   589.28 ms /    48 tokens (   12.28 ms per token,    81.46 tokens per second)
llama_print_timings:        eval time =  1192.98 ms /    14 runs   (   85.21 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  1812.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, this is a simple solution. The positions of the Three-Body Civilization have been exposed, and to eliminate future threats, Earth's interstellar fleet will launch an attack against our galaxy. Most likely, the Three-Body Civilization has already been eliminated by humans before being swallowed up by expanding sun.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The bright prospect suddenly became so dim, and the assembly was silent for a long time.



llama_print_timings:        load time =   582.81 ms
llama_print_timings:      sample time =     8.44 ms /    19 runs   (    0.44 ms per token,  2252.52 tokens per second)
llama_print_timings: prompt eval time =   582.76 ms /    44 tokens (   13.24 ms per token,    75.50 tokens per second)
llama_print_timings:        eval time =  1559.25 ms /    18 runs   (   86.62 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  2178.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The head said, “Our next step is to curb the scientific progress of Earth's civilization. Even before receiving the first message, we began planning for this. Now that our response is from a traitor in the Earth civilization, we have reason to believe that there are many different factions within it and we will make good use of these forces.”



llama_print_timings:        load time =   688.86 ms
llama_print_timings:      sample time =    34.31 ms /    76 runs   (    0.45 ms per token,  2215.16 tokens per second)
llama_print_timings: prompt eval time =   688.81 ms /   110 tokens (    6.26 ms per token,   159.70 tokens per second)
llama_print_timings:        eval time =  6378.40 ms /    75 runs   (   85.05 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  7220.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It is difficult to communicate with the Earth, for our connection is so thin that it takes eighty thousand years to complete a response.



llama_print_timings:        load time =   610.16 ms
llama_print_timings:      sample time =    12.94 ms /    28 runs   (    0.46 ms per token,  2164.50 tokens per second)
llama_print_timings: prompt eval time =   610.12 ms /    57 tokens (   10.70 ms per token,    93.42 tokens per second)
llama_print_timings:        eval time =  2315.99 ms /    27 runs   (   85.78 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  2983.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Not necessarily. Just as the discovery of extraterrestrial civilizations has been a huge shock to our world, it will have a profound impact on society as a whole. We can reasonably predict that inwardly opposed forces will gather and grow within Earth's civilization.



llama_print_timings:        load time =   632.84 ms
llama_print_timings:      sample time =    27.12 ms /    59 runs   (    0.46 ms per token,  2175.84 tokens per second)
llama_print_timings: prompt eval time =   632.81 ms /    79 tokens (    8.01 ms per token,   124.84 tokens per second)
llama_print_timings:        eval time =  4939.06 ms /    58 runs   (   85.16 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  5693.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So what can they do? Can they cause damage?



llama_print_timings:        load time =   580.31 ms
llama_print_timings:      sample time =     5.90 ms /    12 runs   (    0.49 ms per token,  2035.62 tokens per second)
llama_print_timings: prompt eval time =   580.27 ms /    40 tokens (   14.51 ms per token,    68.93 tokens per second)
llama_print_timings:        eval time =   957.78 ms /    11 runs   (   87.07 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  1564.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In the vast span of four thousand years, any strategic significance in traditional war and terrorism is not very big, all can be recovered. On such a long time frame, if to effectively curb a civilization's development and disarm it, there is only one way to do it - killing their science.”



llama_print_timings:        load time =   664.01 ms
llama_print_timings:      sample time =    30.51 ms /    67 runs   (    0.46 ms per token,  2196.22 tokens per second)
llama_print_timings: prompt eval time =   663.97 ms /    97 tokens (    6.85 ms per token,   146.09 tokens per second)
llama_print_timings:        eval time =  5559.30 ms /    66 runs   (   84.23 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  6361.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The first plan is codenamed ‘Dyeing’, said the science dictator. “Using the side effects of scientific and technological development to frighten and dislike people about scientific knowledge such as environmental problems caused by technological progress, we can assume that there are similar consequences in this world. The Dyeing Plan aims to make full use of these factors by displaying supernatural forces to earthlings through a series of ‘miracles’.”



llama_print_timings:        load time =   805.91 ms
llama_print_timings:      sample time =    43.04 ms /    95 runs   (    0.45 ms per token,  2207.25 tokens per second)
llama_print_timings: prompt eval time =   805.87 ms /   178 tokens (    4.53 ms per token,   220.88 tokens per second)
llama_print_timings:        eval time =  8184.79 ms /    94 runs   (   87.07 ms per token,    11.48 tokens per second)
llama_print_timings:       total time =  9181.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How can miracles be produced?”



llama_print_timings:        load time =   583.71 ms
llama_print_timings:      sample time =     5.14 ms /    10 runs   (    0.51 ms per token,  1945.90 tokens per second)
llama_print_timings: prompt eval time =   583.67 ms /    37 tokens (   15.77 ms per token,    63.39 tokens per second)
llama_print_timings:        eval time =   800.47 ms /     9 runs   (   88.94 ms per token,    11.24 tokens per second)
llama_print_timings:       total time =  1406.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The reason why a miracle is called a miracle is that it is beyond the ability of Earthlings to understand. It may require us to input technology above their current level in order to send some higher than their level.



llama_print_timings:        load time =   617.08 ms
llama_print_timings:      sample time =    20.14 ms /    46 runs   (    0.44 ms per token,  2284.58 tokens per second)
llama_print_timings: prompt eval time =   617.03 ms /    65 tokens (    9.49 ms per token,   105.34 tokens per second)
llama_print_timings:        eval time =  3754.51 ms /    45 runs   (   83.43 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  4460.53 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is too risky. Who will end up with these technologies in the end? It's like playing with fire!



llama_print_timings:        load time =   584.53 ms
llama_print_timings:      sample time =    12.19 ms /    27 runs   (    0.45 ms per token,  2215.66 tokens per second)
llama_print_timings: prompt eval time =   584.48 ms /    47 tokens (   12.44 ms per token,    80.41 tokens per second)
llama_print_timings:        eval time =  2263.14 ms /    26 runs   (   87.04 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  2901.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, what kind of technology to input to produce miracles still needs further study. . .



llama_print_timings:        load time =   590.57 ms
llama_print_timings:      sample time =    10.28 ms /    22 runs   (    0.47 ms per token,  2141.12 tokens per second)
llama_print_timings: prompt eval time =   590.53 ms /    50 tokens (   11.81 ms per token,    84.67 tokens per second)
llama_print_timings:        eval time =  1831.85 ms /    21 runs   (   87.23 ms per token,    11.46 tokens per second)
llama_print_timings:       total time =  2469.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The military ruler stood up and said, "I would like to state my opinion. The two plans for killing humanity's science, are almost useless."



llama_print_timings:        load time =   612.35 ms
llama_print_timings:      sample time =    15.22 ms /    34 runs   (    0.45 ms per token,  2233.61 tokens per second)
llama_print_timings: prompt eval time =   612.33 ms /    67 tokens (    9.14 ms per token,   109.42 tokens per second)
llama_print_timings:        eval time =  2870.93 ms /    33 runs   (   87.00 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  3552.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's better to do something than nothing at all.”, the science-minded minister argued.



llama_print_timings:        load time =   598.29 ms
llama_print_timings:      sample time =    11.05 ms /    23 runs   (    0.48 ms per token,  2080.51 tokens per second)
llama_print_timings: prompt eval time =   598.25 ms /    51 tokens (   11.73 ms per token,    85.25 tokens per second)
llama_print_timings:        eval time =  1895.77 ms /    22 runs   (   86.17 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  2543.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's all there is to it.



llama_print_timings:        load time =   578.57 ms
llama_print_timings:      sample time =     4.42 ms /    10 runs   (    0.44 ms per token,  2263.98 tokens per second)
llama_print_timings: prompt eval time =   578.52 ms /    42 tokens (   13.77 ms per token,    72.60 tokens per second)
llama_print_timings:        eval time =   766.06 ms /     9 runs   (   85.12 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  1364.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I agree with your view that ‘Dyeing’ and ‘Miracle’ are only detrimental to the progress of earth science.” The head of state spoke to the military commander, then looked towards all attendees, “We need a decisive action to thoroughly choke off the development of earth science and keep it locked on its current level. Here we need to focus: The development of modern sciences depends entirely on basic sciences, and the foundation of basic sciences is exploring the substance structure at deeper levels. If this field fails to make progress, modern sciences as a whole cannot potentially make major breakthroughs. In fact, this is not only targeted at earth civilization, but also at all targets that the Three Body Civilization wants to conquer before receiving the first extraterrestrial message, we have been making an effort in this field for some time now and recently intensified it greatly.”



llama_print_timings:        load time =   814.64 ms
llama_print_timings:      sample time =    84.79 ms /   190 runs   (    0.45 ms per token,  2240.94 tokens per second)
llama_print_timings: prompt eval time =   814.58 ms /   186 tokens (    4.38 ms per token,   228.34 tokens per second)
llama_print_timings:        eval time = 16404.03 ms /   189 runs   (   86.79 ms per token,    11.52 tokens per second)
llama_print_timings:       total time = 17601.13 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The heads of state looked up at the sky and saw a round ring in space, radiating metallic gloss under the sun.



llama_print_timings:        load time =   614.18 ms
llama_print_timings:      sample time =    13.51 ms /    29 runs   (    0.47 ms per token,  2147.03 tokens per second)
llama_print_timings: prompt eval time =   614.14 ms /    60 tokens (   10.24 ms per token,    97.70 tokens per second)
llama_print_timings:        eval time =  2483.67 ms /    28 runs   (   88.70 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =  3158.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That's not a space docking facility, is it?"



llama_print_timings:        load time =   573.70 ms
llama_print_timings:      sample time =     6.60 ms /    15 runs   (    0.44 ms per token,  2274.11 tokens per second)
llama_print_timings: prompt eval time =   573.65 ms /    44 tokens (   13.04 ms per token,    76.70 tokens per second)
llama_print_timings:        eval time =  1179.23 ms /    14 runs   (   84.23 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  1782.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Not, it is a giant particle accelerator that is under construction. The project to build the second space fleet was cancelled and all resources were used for Project Xi.



llama_print_timings:        load time =   600.98 ms
llama_print_timings:      sample time =    15.88 ms /    36 runs   (    0.44 ms per token,  2266.29 tokens per second)
llama_print_timings: prompt eval time =   600.94 ms /    61 tokens (    9.85 ms per token,   101.51 tokens per second)
llama_print_timings:        eval time =  2989.70 ms /    35 runs   (   85.42 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  3660.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What's this 'Chizhi Jing Gong' business?"



llama_print_timings:        load time =   568.74 ms
llama_print_timings:      sample time =     7.51 ms /    17 runs   (    0.44 ms per token,  2263.05 tokens per second)
llama_print_timings: prompt eval time =   568.71 ms /    36 tokens (   15.80 ms per token,    63.30 tokens per second)
llama_print_timings:        eval time =  1349.17 ms /    16 runs   (   84.32 ms per token,    11.86 tokens per second)
llama_print_timings:       total time =  1951.26 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, at least half of the people in the room don't know about this plan. Now I ask for the science ruling council to introduce it to everyone.



llama_print_timings:        load time =   584.46 ms
llama_print_timings:      sample time =    15.38 ms /    35 runs   (    0.44 ms per token,  2275.53 tokens per second)
llama_print_timings: prompt eval time =   584.42 ms /    53 tokens (   11.03 ms per token,    90.69 tokens per second)
llama_print_timings:        eval time =  2912.39 ms /    34 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  3565.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I know about this plan, but I didn't expect it to go this far.



llama_print_timings:        load time =   583.15 ms
llama_print_timings:      sample time =     8.51 ms /    19 runs   (    0.45 ms per token,  2232.41 tokens per second)
llama_print_timings: prompt eval time =   583.11 ms /    48 tokens (   12.15 ms per token,    82.32 tokens per second)
llama_print_timings:        eval time =  1568.70 ms /    18 runs   (   87.15 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  2190.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Education Minister said, "I know, but it feels like a fairy tale."



llama_print_timings:        load time =   583.19 ms
llama_print_timings:      sample time =     8.98 ms /    19 runs   (    0.47 ms per token,  2115.81 tokens per second)
llama_print_timings: prompt eval time =   583.15 ms /    45 tokens (   12.96 ms per token,    77.17 tokens per second)
llama_print_timings:        eval time =  1571.62 ms /    18 runs   (   87.31 ms per token,    11.45 tokens per second)
llama_print_timings:       total time =  2196.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The scientific dictator said, “In short, the Zhizhi Project is a plan to transform a proton into a super-intelligent computer.”



llama_print_timings:        load time =   592.61 ms
llama_print_timings:      sample time =    14.69 ms /    33 runs   (    0.45 ms per token,  2246.89 tokens per second)
llama_print_timings: prompt eval time =   592.58 ms /    55 tokens (   10.77 ms per token,    92.82 tokens per second)
llama_print_timings:        eval time =  2702.06 ms /    32 runs   (   84.44 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  3361.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As a widely spread science fiction, this is something everyone has heard of. "But to become a reality, it still comes as a surprise," said the Agrarian Minister. "I know physicists can manipulate the nine-dimensional structure in the tenth dimension of their microcosm. But we cannot imagine that they could put a pair of tweezers into a proton and build a large-scale integrated circuit inside."



llama_print_timings:        load time =   676.60 ms
llama_print_timings:      sample time =    39.62 ms /    88 runs   (    0.45 ms per token,  2221.04 tokens per second)
llama_print_timings: prompt eval time =   676.55 ms /   103 tokens (    6.57 ms per token,   152.24 tokens per second)
llama_print_timings:        eval time =  7446.07 ms /    87 runs   (   85.59 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  8299.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course not. The etching of a microscopic integrated circuit can only be done on the macro level, and it has to be done on a two-dimensional plane. Therefore, we need to expand a proton into two dimensions.



llama_print_timings:        load time =   643.47 ms
llama_print_timings:      sample time =    22.57 ms /    51 runs   (    0.44 ms per token,  2259.24 tokens per second)
llama_print_timings: prompt eval time =   643.43 ms /    73 tokens (    8.81 ms per token,   113.46 tokens per second)
llama_print_timings:        eval time =  4240.44 ms /    50 runs   (   84.81 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  4983.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Expand a nine-dimensional structure into two dimensions? How big is the area?"



llama_print_timings:        load time =   583.95 ms
llama_print_timings:      sample time =     8.21 ms /    18 runs   (    0.46 ms per token,  2191.38 tokens per second)
llama_print_timings: prompt eval time =   583.91 ms /    43 tokens (   13.58 ms per token,    73.64 tokens per second)
llama_print_timings:        eval time =  1467.28 ms /    17 runs   (   86.31 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  2087.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It's very big, you will see.” The science ruler smiled.



llama_print_timings:        load time =   574.06 ms
llama_print_timings:      sample time =     8.20 ms /    18 runs   (    0.46 ms per token,  2194.59 tokens per second)
llama_print_timings: prompt eval time =   574.02 ms /    44 tokens (   13.05 ms per token,    76.65 tokens per second)
llama_print_timings:        eval time =  1502.64 ms /    17 runs   (   88.39 ms per token,    11.31 tokens per second)
llama_print_timings:       total time =  2113.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   576.03 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2260.98 tokens per second)
llama_print_timings: prompt eval time =   576.01 ms /    45 tokens (   12.80 ms per token,    78.12 tokens per second)
llama_print_timings:        eval time =   503.46 ms /     6 runs   (   83.91 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  1092.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Time flies, and six billion years have passed since the giant accelerator was built. Two billion years after that, in a two-dimensional expansion of protons will take place on the synchronized orbital paths around three planets.



llama_print_timings:        load time =   638.28 ms
llama_print_timings:      sample time =    22.77 ms /    49 runs   (    0.46 ms per token,  2151.77 tokens per second)
llama_print_timings: prompt eval time =   638.26 ms /    73 tokens (    8.74 ms per token,   114.37 tokens per second)
llama_print_timings:        eval time =  4153.73 ms /    48 runs   (   86.54 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  4893.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is a bright and clear day, the sky is pure. Just like eight million years before when the First Fleet set sail, the people of Trisolaran civilization looked up into space, watching the giant circular orbits. The Prime Minister and all of her deputies gathered at the monument dedicated to Gou Mian, a gigantic, immovable force that had once swung from the immense pylons like an eternal rock. It was hard to believe it had ever moved before.



llama_print_timings:        load time =   696.44 ms
llama_print_timings:      sample time =    48.47 ms /   105 runs   (    0.46 ms per token,  2166.33 tokens per second)
llama_print_timings: prompt eval time =   696.41 ms /   114 tokens (    6.11 ms per token,   163.70 tokens per second)
llama_print_timings:        eval time =  9088.40 ms /   104 runs   (   87.39 ms per token,    11.44 tokens per second)
llama_print_timings:       total time = 10005.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The science magistrate issued the two-dimensional unfolding command. In space, there were three cubes around a round ring, which provided energy to the fusion generators. Now, they are shaping themselves into heat sink fins, gradually emitting dark red light. The science magistrate reported that unfolding is underway, and people were nervously looking up at the space accelerator, nothing was happening.



llama_print_timings:        load time =   687.29 ms
llama_print_timings:      sample time =    37.65 ms /    84 runs   (    0.45 ms per token,  2231.37 tokens per second)
llama_print_timings: prompt eval time =   687.24 ms /   106 tokens (    6.48 ms per token,   154.24 tokens per second)
llama_print_timings:        eval time =  7087.48 ms /    83 runs   (   85.39 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  7941.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After one-tenth of a trilogy, the science ruler listened to music for a while and said, "I'm sorry, supreme leader, I'm afraid we can't reach our goal. We have added an extra dimension and the target proton has been reduced to 1D."



llama_print_timings:        load time =   623.62 ms
llama_print_timings:      sample time =    29.86 ms /    66 runs   (    0.45 ms per token,  2210.17 tokens per second)
llama_print_timings: prompt eval time =   623.58 ms /    73 tokens (    8.54 ms per token,   117.07 tokens per second)
llama_print_timings:        eval time =  5542.46 ms /    65 runs   (   85.27 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  6300.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“One dimension? A line?”



llama_print_timings:        load time =   567.64 ms
llama_print_timings:      sample time =     3.63 ms /     8 runs   (    0.45 ms per token,  2202.64 tokens per second)
llama_print_timings: prompt eval time =   567.60 ms /    37 tokens (   15.34 ms per token,    65.19 tokens per second)
llama_print_timings:        eval time =   631.78 ms /     7 runs   (   90.25 ms per token,    11.08 tokens per second)
llama_print_timings:       total time =  1215.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, a line that is infinitely thin and theoretically its length has been calculated to be about five hundred billion light years.



llama_print_timings:        load time =   583.36 ms
llama_print_timings:      sample time =    11.90 ms /    26 runs   (    0.46 ms per token,  2185.06 tokens per second)
llama_print_timings: prompt eval time =   583.32 ms /    51 tokens (   11.44 ms per token,    87.43 tokens per second)
llama_print_timings:        eval time =  2142.42 ms /    25 runs   (   85.70 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2778.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Hmmph!” the military ruler said, “Spending resources on a space fleet and getting this result?”



llama_print_timings:        load time =   585.74 ms
llama_print_timings:      sample time =    11.27 ms /    25 runs   (    0.45 ms per token,  2217.69 tokens per second)
llama_print_timings: prompt eval time =   585.70 ms /    54 tokens (   10.85 ms per token,    92.20 tokens per second)
llama_print_timings:        eval time =  2016.78 ms /    24 runs   (   84.03 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  2652.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is a scientific experiment, and there's always some debugging process involved. That was the first time to expand the quantum.” People were dismayed by this disappointment but the thing did not end here. They had thought that a one-dimensional silk thread spreading from a quantum would always run in synchronous orbit around a planet forever, but due to the resistance of a solar storm it slowed down, and some one-dimensional threads still fell into the atmosphere. Six days later, people who had come outdoors noticed strange flashes that were elusive and flickered quickly, appearing here and there. They soon learned from the news that these one-dimensional silks were falling back to the ground due to the gravitational attraction. Although these one-dimensional silks were infinitely thin, their nuclear field still reflected visible light, and could still be seen.”



llama_print_timings:        load time =   846.04 ms
llama_print_timings:      sample time =    81.69 ms /   182 runs   (    0.45 ms per token,  2227.83 tokens per second)
llama_print_timings: prompt eval time =   845.99 ms /   199 tokens (    4.25 ms per token,   235.23 tokens per second)
llama_print_timings:        eval time = 15848.84 ms /   181 runs   (   87.56 ms per token,    11.42 tokens per second)
llama_print_timings:       total time = 17066.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

These things are so annoying. The leader kept fingering his face as he stood on the wide steps of the government building with the science ruler, “I always feel an itch on my face.”



llama_print_timings:        load time =   630.66 ms
llama_print_timings:      sample time =    20.34 ms /    45 runs   (    0.45 ms per token,  2212.28 tokens per second)
llama_print_timings: prompt eval time =   630.63 ms /    68 tokens (    9.27 ms per token,   107.83 tokens per second)
llama_print_timings:        eval time =  3753.06 ms /    44 runs   (   85.30 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  4472.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The president, this is just your mental function. All one-dimensional silk has a quality that is equivalent to a proton, so they have almost no effect on the macrocosm and, of course, do not pose any harm, like they don't exist at all.



llama_print_timings:        load time =   627.49 ms
llama_print_timings:      sample time =    26.11 ms /    59 runs   (    0.44 ms per token,  2259.93 tokens per second)
llama_print_timings: prompt eval time =   627.45 ms /    73 tokens (    8.60 ms per token,   116.34 tokens per second)
llama_print_timings:        eval time =  5049.75 ms /    58 runs   (   87.06 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  5792.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The falling one-dimensional silk is getting denser and denser, the space near the ground filled with tiny sparkles under the sunlight. The sun and stars appear to have silver margins around them surrounded by these one-dimensional silks. People walking outside are covered in a layer of one-dimensional silks; as they move, the sparkling silks around their shapes can be seen while inside, the silk is shining brightly under lights but does not provide any tactile stimulation, nonetheless it's still irritating.



llama_print_timings:        load time =   759.70 ms
llama_print_timings:      sample time =    52.56 ms /   116 runs   (    0.45 ms per token,  2206.83 tokens per second)
llama_print_timings: prompt eval time =   759.65 ms /   152 tokens (    5.00 ms per token,   200.09 tokens per second)
llama_print_timings:        eval time =  9971.19 ms /   115 runs   (   86.71 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 10967.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

One dimensional silk rained for more than twenty body times before stopping, not because the silk had all fallen to the ground. Although their mass was very small, still some of them existed under the force of gravity. However, when they entered the atmosphere, they were fully controlled by the air currents and never came down again. But after one dimensional expansion, the strong interaction within silk became weak, resulting in decreased strength for silk to keep intact and eventually breaking into small segments. Reflected light can't be seen with naked eyes, so people believed that they had disappeared. In the space of three-dimensional worlds, silk dust was always floating around forever.



llama_print_timings:        load time =   779.32 ms
llama_print_timings:      sample time =    64.76 ms /   144 runs   (    0.45 ms per token,  2223.56 tokens per second)
llama_print_timings: prompt eval time =   779.29 ms /   163 tokens (    4.78 ms per token,   209.17 tokens per second)
llama_print_timings:        eval time = 12329.88 ms /   143 runs   (   86.22 ms per token,    11.60 tokens per second)
llama_print_timings:       total time = 13404.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   575.37 ms
llama_print_timings:      sample time =     3.12 ms /     7 runs   (    0.45 ms per token,  2242.15 tokens per second)
llama_print_timings: prompt eval time =   575.33 ms /    45 tokens (   12.79 ms per token,    78.22 tokens per second)
llama_print_timings:        eval time =   488.71 ms /     6 runs   (   81.45 ms per token,    12.28 tokens per second)
llama_print_timings:       total time =  1077.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After fifty years of the two-dimensional expansion of the proton, people on Earth soon saw strange signs. When the cooling plate of the fusion power station began to emit red light, there were several huge objects in the accelerator location that had a regular geometric shape such as ball, cube, pyramid and cone. The surface color was very complicated, but when closely watched, it turned out that there were no colors on the reflection surfaces of these geometries at all; they were just images of distorted planet surfaces reflected by total refraction. “Is this successful?” Chief asked.



llama_print_timings:        load time =   759.51 ms
llama_print_timings:      sample time =    55.34 ms /   121 runs   (    0.46 ms per token,  2186.40 tokens per second)
llama_print_timings: prompt eval time =   759.47 ms /   150 tokens (    5.06 ms per token,   197.51 tokens per second)
llama_print_timings:        eval time = 10513.68 ms /   120 runs   (   87.61 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 11524.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Scientist-in-Charge answered, “Dear Leader, I’m afraid that this attempt is still unsuccessful. As per the report from Accelerator Control Centre, this time a dimension has been missed and the target proton was unfolded into three dimensions.”



llama_print_timings:        load time =   614.59 ms
llama_print_timings:      sample time =    27.27 ms /    60 runs   (    0.45 ms per token,  2200.46 tokens per second)
llama_print_timings: prompt eval time =   614.55 ms /    67 tokens (    9.17 ms per token,   109.02 tokens per second)
llama_print_timings:        eval time =  5021.72 ms /    59 runs   (   85.11 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  5759.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The huge mirrored geometric bodies continued to emerge at high speed, with more varied shapes, including rings and three-dimensional crosses, even a twisted Möbius band. All these geometric shapes floated away from the accelerator position in half a time span, looking like a giant kid in the sky casting blocks of wood. The sunlight reflected by these objects increased the brightness on the ground by one hundred percent, and their shadows, which shook back and forth left and right, casted a random changeable pattern on the ground. Then, as these geometric shapes gradually deformed, they became increasingly complex and unorganized, and now in the sky, people can no longer think of them as blocks but more like a giant who had been chopped up into body parts and internal organs. Due to their irregular shape, the scattered sunlight on the ground evened out a bit but their own surface colors became more peculiar and variable.



llama_print_timings:        load time =   933.00 ms
llama_print_timings:      sample time =    87.76 ms /   197 runs   (    0.45 ms per token,  2244.78 tokens per second)
llama_print_timings: prompt eval time =   932.97 ms /   242 tokens (    3.86 ms per token,   259.39 tokens per second)
llama_print_timings:        eval time = 17234.64 ms /   196 runs   (   87.93 ms per token,    11.37 tokens per second)
llama_print_timings:       total time = 18569.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the disorderly three-dimensional objects that fill the sky, there are a few which have drawn the attention of ground observers. At first, they were recognized because of their striking similarity to one another; when closely examined, people felt an overwhelming fear sweeping through the entire Three Body World.



llama_print_timings:        load time =   682.55 ms
llama_print_timings:      sample time =    29.12 ms /    66 runs   (    0.44 ms per token,  2266.17 tokens per second)
llama_print_timings: prompt eval time =   682.50 ms /    84 tokens (    8.13 ms per token,   123.08 tokens per second)
llama_print_timings:        eval time =  5668.62 ms /    65 runs   (   87.21 ms per token,    11.47 tokens per second)
llama_print_timings:       total time =  6482.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's the eyes! (We don't know what shape the alien's eyes are, but one thing is certain: any intelligent life form is extremely sensitive to images of their eyes.)



llama_print_timings:        load time =   608.77 ms
llama_print_timings:      sample time =    19.81 ms /    43 runs   (    0.46 ms per token,  2171.06 tokens per second)
llama_print_timings: prompt eval time =   608.73 ms /    60 tokens (   10.15 ms per token,    98.57 tokens per second)
llama_print_timings:        eval time =  3568.60 ms /    42 runs   (   84.97 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  4266.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The supreme leader is rarely someone who remains calm, but he asked the science ruler, "What's the most complex structure that a subatomic particle can possess within its interior?"



llama_print_timings:        load time =   615.33 ms
llama_print_timings:      sample time =    17.13 ms /    39 runs   (    0.44 ms per token,  2277.11 tokens per second)
llama_print_timings: prompt eval time =   615.29 ms /    63 tokens (    9.77 ms per token,   102.39 tokens per second)
llama_print_timings:        eval time =  3235.95 ms /    38 runs   (   85.16 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  3927.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It depends on the perspective. From a one-dimensional viewpoint, the particle is only a single point; from two- and three-dimensional perspectives, it begins to have internal structure; at a four-dimensional view, it already constitutes an enormous world.”



llama_print_timings:        load time =   648.59 ms
llama_print_timings:      sample time =    25.87 ms /    58 runs   (    0.45 ms per token,  2241.72 tokens per second)
llama_print_timings: prompt eval time =   648.55 ms /    87 tokens (    7.45 ms per token,   134.15 tokens per second)
llama_print_timings:        eval time =  4848.45 ms /    57 runs   (   85.06 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  5615.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dictator said, “I find it incredible that this word is used for a proton, which is such a microscopic thing.” The science ruler ignored the dictator and continued saying, “At higher dimensions, the complexity and number of structures within a particle increase exponentially, as I can say in an imprecise manner: at a basic particle level, its complexity may be comparable to that of three large galaxies; at eight dimensions, it is similar to a galaxy with its own complex structure; and when we reach nine dimensions, the number of internal structures within a particle may be comparable to the entire Universe.” As for higher dimensions, our physicists are still unable to detect them, and their complexity I cannot even imagine.



llama_print_timings:        load time =   807.84 ms
llama_print_timings:      sample time =    71.25 ms /   157 runs   (    0.45 ms per token,  2203.42 tokens per second)
llama_print_timings: prompt eval time =   807.81 ms /   179 tokens (    4.51 ms per token,   221.59 tokens per second)
llama_print_timings:        eval time = 13692.55 ms /   156 runs   (   87.77 ms per token,    11.39 tokens per second)
llama_print_timings:       total time = 14822.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Chief refers to the huge eyes in space, "Is it clear from what happened here that there is intelligent life within the atomic universe contained by expanding proton?".



llama_print_timings:        load time =   610.05 ms
llama_print_timings:      sample time =    16.70 ms /    37 runs   (    0.45 ms per token,  2215.17 tokens per second)
llama_print_timings: prompt eval time =   610.01 ms /    63 tokens (    9.68 ms per token,   103.28 tokens per second)
llama_print_timings:        eval time =  3045.15 ms /    36 runs   (   84.59 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  3730.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The definition of life, which is used in the high-dimensional microcosm, may not be suitable. More accurately, we can only say that there exists intelligent or clever life within it. Such a possibility has already been predicted by scientists, and it would be abnormal for such a complex and vast world to evolve without intelligent creatures.”



llama_print_timings:        load time =   653.84 ms
llama_print_timings:      sample time =    34.28 ms /    76 runs   (    0.45 ms per token,  2216.84 tokens per second)
llama_print_timings: prompt eval time =   653.80 ms /    86 tokens (    7.60 ms per token,   131.54 tokens per second)
llama_print_timings:        eval time =  6385.08 ms /    75 runs   (   85.13 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  7192.30 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Why are they looking at us?" the leader asked, gazing up at the sky. Those eyes in space were exquisite sculptures that looked so real and made their stares very eerie.



llama_print_timings:        load time =   627.15 ms
llama_print_timings:      sample time =    19.55 ms /    43 runs   (    0.45 ms per token,  2198.93 tokens per second)
llama_print_timings: prompt eval time =   627.11 ms /    71 tokens (    8.83 ms per token,   113.22 tokens per second)
llama_print_timings:        eval time =  3583.30 ms /    42 runs   (   85.32 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  4300.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Maybe he just wants to show that he exists.



llama_print_timings:        load time =   581.70 ms
llama_print_timings:      sample time =     5.47 ms /    11 runs   (    0.50 ms per token,  2009.50 tokens per second)
llama_print_timings: prompt eval time =   581.67 ms /    38 tokens (   15.31 ms per token,    65.33 tokens per second)
llama_print_timings:        eval time =   854.09 ms /    10 runs   (   85.41 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1460.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Those things will fall to the ground, right?



llama_print_timings:        load time =   577.72 ms
llama_print_timings:      sample time =     5.56 ms /    12 runs   (    0.46 ms per token,  2159.83 tokens per second)
llama_print_timings: prompt eval time =   577.68 ms /    40 tokens (   14.44 ms per token,    69.24 tokens per second)
llama_print_timings:        eval time =   968.36 ms /    11 runs   (   88.03 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =  1571.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, you can rest assured. Even if they fall down, the thin fibers that expand in one dimension will have the same mass as a proton, so they would have no effect on our world. The only thing people need to do is get used to the spectacle.”



llama_print_timings:        load time =   667.23 ms
llama_print_timings:      sample time =    26.56 ms /    59 runs   (    0.45 ms per token,  2221.55 tokens per second)
llama_print_timings: prompt eval time =   667.19 ms /    96 tokens (    6.95 ms per token,   143.89 tokens per second)
llama_print_timings:        eval time =  4898.20 ms /    58 runs   (   84.45 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =  5682.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

People can now notice that the speed of movement of "eyes" in the sky is obviously faster than those of other geometric shapes. Moreover, they are all converging to the same point. Soon enough, two eyes meet and fuse into a single shape, which is still an eye with just increased volume. More and more "eyes" join them until eventually, all the "eyes" merge into one enormous, representing the entire universe in a beam of light which emanates from its pupil. The details on the surface gradually fade away and disappear eventually, leaving behind only a perfect circle. And then, when it begins to rotate slowly, people find out that it is not flat but a parabolic surface like an oval sectioned from a giant ball.



llama_print_timings:        load time =   930.92 ms
llama_print_timings:      sample time =    72.69 ms /   162 runs   (    0.45 ms per token,  2228.70 tokens per second)
llama_print_timings: prompt eval time =   930.88 ms /   247 tokens (    3.77 ms per token,   265.34 tokens per second)
llama_print_timings:        eval time = 14090.26 ms /   161 runs   (   87.52 ms per token,    11.43 tokens per second)
llama_print_timings:       total time = 15350.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The military commander stared at the slowly rotating giant object and suddenly realized something. He yelled, “Chairman, quick! There are other people; hurry into the bunker.” He pointed to above him, “It’s… ”



llama_print_timings:        load time =   626.24 ms
llama_print_timings:      sample time =    25.42 ms /    53 runs   (    0.48 ms per token,  2084.89 tokens per second)
llama_print_timings: prompt eval time =   626.19 ms /    76 tokens (    8.24 ms per token,   121.37 tokens per second)
llama_print_timings:        eval time =  4480.49 ms /    52 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  5223.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The leader said calmly, “Space defense forces are ordered to destroy it immediately. We will just watch here and not go anywhere else.” The mirror focused the sunlight now. Initially, the area of the spot was large, but its heat power remained harmless. This spot was moving on the continent and looking for a target. The mirror had found the city, which is the largest one, so it moved quickly to cover it up with its field. People under the monument felt hot because of the strong sunlight. At that time, people's eyes were still adapting to the normal lighting when the spot stopped suddenly and everything stopped moving. Later on, people looked up again and saw a column of rays reaching the sky, which was made up by the mirror at the bottom of the conical column that had penetrated the center of the city, turning it into white hotness in an instant. Trembling plumes of dust were formed from there due to not uniformly heating the head of the conical column and being influenced by dragon-li


llama_print_timings:        load time =  1146.18 ms
llama_print_timings:      sample time =   107.23 ms /   239 runs   (    0.45 ms per token,  2228.81 tokens per second)
llama_print_timings: prompt eval time =  1146.12 ms /   343 tokens (    3.34 ms per token,   299.27 tokens per second)
llama_print_timings:        eval time = 21114.91 ms /   238 runs   (   88.72 ms per token,    11.27 tokens per second)
llama_print_timings:       total time = 22755.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

A few bright fireballs appeared in the different parts of the reflector, which emitted different colors from the reflector light. These were nuclear bombs launched by the space defense force in Trisolaris, which exploded outside the atmosphere without any sound. After these fireballs died out, several large holes appeared on the surface of the reflector, and then the entire surface started to rip apart and eventually fragmented into tens of pieces with other geometric objects. Meanwhile, the death ray disappeared, and the world returned to normal lighting conditions for a moment. Those who had lost their intelligence continued deforming quickly and soon merged with other geometric objects in space without being distinguishable from each other.



llama_print_timings:        load time =   809.33 ms
llama_print_timings:      sample time =    64.31 ms /   145 runs   (    0.44 ms per token,  2254.81 tokens per second)
llama_print_timings: prompt eval time =   809.29 ms /   170 tokens (    4.76 ms per token,   210.06 tokens per second)
llama_print_timings:        eval time = 12474.67 ms /   144 runs   (   86.63 ms per token,    11.54 tokens per second)
llama_print_timings:       total time = 13577.58 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What about the next experiment?” asked the dictator with a sardonic smile, “Won't you expand a proton into four dimensions?”



llama_print_timings:        load time =   609.54 ms
llama_print_timings:      sample time =    14.94 ms /    33 runs   (    0.45 ms per token,  2209.57 tokens per second)
llama_print_timings: prompt eval time =   609.50 ms /    62 tokens (    9.83 ms per token,   101.72 tokens per second)
llama_print_timings:        eval time =  2769.90 ms /    32 runs   (   86.56 ms per token,    11.55 tokens per second)
llama_print_timings:       total time =  3449.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The president, even if it is not a problem, the quadricidal expanding of proton volume is small much more. If space defense force prepare enough, to project in three dimensions attack on its expansion in the four dimension, also can destroy it.



llama_print_timings:        load time =   623.93 ms
llama_print_timings:      sample time =    23.83 ms /    53 runs   (    0.45 ms per token,  2223.71 tokens per second)
llama_print_timings: prompt eval time =   623.90 ms /    72 tokens (    8.67 ms per token,   115.40 tokens per second)
llama_print_timings:        eval time =  4463.09 ms /    52 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  5193.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“You are deceiving the supreme leader! ” the military ruler angrily said to the scientific ruler. “You don’t mention the real danger! If the protons are reduced dimensions?”



llama_print_timings:        load time =   611.41 ms
llama_print_timings:      sample time =    20.46 ms /    44 runs   (    0.46 ms per token,  2150.64 tokens per second)
llama_print_timings: prompt eval time =   611.37 ms /    67 tokens (    9.12 ms per token,   109.59 tokens per second)
llama_print_timings:        eval time =  3610.97 ms /    43 runs   (   83.98 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  4317.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Zero dimensions?” the supreme leader asked, “That would be a point with no size.”



llama_print_timings:        load time =   589.35 ms
llama_print_timings:      sample time =     9.34 ms /    21 runs   (    0.44 ms per token,  2249.12 tokens per second)
llama_print_timings: prompt eval time =   589.31 ms /    51 tokens (   11.56 ms per token,    86.54 tokens per second)
llama_print_timings:        eval time =  1684.05 ms /    20 runs   (   84.20 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  2315.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, the Singularity! A proton is infinitely small compared to it, all the mass of that proton will be contained within this singularity. Its density will be infinite.” “Major, you can certainly imagine what that would be like.”



llama_print_timings:        load time =   624.87 ms
llama_print_timings:      sample time =    24.10 ms /    55 runs   (    0.44 ms per token,  2282.54 tokens per second)
llama_print_timings: prompt eval time =   624.83 ms /    76 tokens (    8.22 ms per token,   121.63 tokens per second)
llama_print_timings:        eval time =  4597.06 ms /    54 runs   (   85.13 ms per token,    11.75 tokens per second)
llama_print_timings:       total time =  5330.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Hey, “black hole”?



llama_print_timings:        load time =   575.44 ms
llama_print_timings:      sample time =     4.31 ms /     9 runs   (    0.48 ms per token,  2090.11 tokens per second)
llama_print_timings: prompt eval time =   575.41 ms /    34 tokens (   16.92 ms per token,    59.09 tokens per second)
llama_print_timings:        eval time =   661.55 ms /     8 runs   (   82.69 ms per token,    12.09 tokens per second)
llama_print_timings:       total time =  1255.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes.



llama_print_timings:        load time =   577.44 ms
llama_print_timings:      sample time =     1.31 ms /     3 runs   (    0.44 ms per token,  2283.11 tokens per second)
llama_print_timings: prompt eval time =   577.42 ms /    34 tokens (   16.98 ms per token,    58.88 tokens per second)
llama_print_timings:        eval time =   162.42 ms /     2 runs   (   81.21 ms per token,    12.31 tokens per second)
llama_print_timings:       total time =   745.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“President, this is—” the science ruler explained quickly, “We choose to expand in two dimensions rather than one, in order to avoid such a disastrous scenario. If an expansion in zero dimensions really occurs, then the charge on the proton will be transferred to the black hole created and we can capture and control it using magnetic force.”



llama_print_timings:        load time =   662.91 ms
llama_print_timings:      sample time =    33.11 ms /    74 runs   (    0.45 ms per token,  2234.77 tokens per second)
llama_print_timings: prompt eval time =   662.87 ms /    99 tokens (    6.70 ms per token,   149.35 tokens per second)
llama_print_timings:        eval time =  6158.70 ms /    73 runs   (   84.37 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  6975.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What if you can't find it or control it?” the military commander asked. “It might fall to Earth and absorb everything in its path, grow in mass until it lands in our solar system and eventually draw in the entire Three-Body World!”



llama_print_timings:        load time =   649.18 ms
llama_print_timings:      sample time =    24.69 ms /    54 runs   (    0.46 ms per token,  2187.12 tokens per second)
llama_print_timings: prompt eval time =   649.13 ms /    87 tokens (    7.46 ms per token,   134.03 tokens per second)
llama_print_timings:        eval time =  4557.06 ms /    53 runs   (   85.98 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  5319.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This won't happen, I guarantee! Why do you keep pushing me? I have said before, scientific experiments...”



llama_print_timings:        load time =   598.27 ms
llama_print_timings:      sample time =    12.12 ms /    27 runs   (    0.45 ms per token,  2227.72 tokens per second)
llama_print_timings: prompt eval time =   598.23 ms /    55 tokens (   10.88 ms per token,    91.94 tokens per second)
llama_print_timings:        eval time =  2237.25 ms /    26 runs   (   86.05 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  2888.88 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Enough!” the leader said. “What is the probability of success next time?”



llama_print_timings:        load time =   577.54 ms
llama_print_timings:      sample time =     9.19 ms /    19 runs   (    0.48 ms per token,  2068.36 tokens per second)
llama_print_timings: prompt eval time =   577.51 ms /    44 tokens (   13.13 ms per token,    76.19 tokens per second)
llama_print_timings:        eval time =  1508.71 ms /    18 runs   (   83.82 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  2129.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Almost 100%! Chief, please believe me. We have mastered the regularity of microscope to macrocosm expansion through these two failures.”



llama_print_timings:        load time =   611.09 ms
llama_print_timings:      sample time =    17.10 ms /    38 runs   (    0.45 ms per token,  2222.09 tokens per second)
llama_print_timings: prompt eval time =   611.04 ms /    62 tokens (    9.86 ms per token,   101.47 tokens per second)
llama_print_timings:        eval time =  3079.02 ms /    37 runs   (   83.22 ms per token,    12.02 tokens per second)
llama_print_timings:       total time =  3767.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Okay, for the survival of the Three Civilizations, this risk has to be taken.”



llama_print_timings:        load time =   574.63 ms
llama_print_timings:      sample time =     9.78 ms /    22 runs   (    0.44 ms per token,  2250.41 tokens per second)
llama_print_timings: prompt eval time =   574.62 ms /    44 tokens (   13.06 ms per token,    76.57 tokens per second)
llama_print_timings:        eval time =  1802.58 ms /    21 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2421.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Thank you, Prime Minister!



llama_print_timings:        load time =   575.92 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2258.06 tokens per second)
llama_print_timings: prompt eval time =   575.88 ms /    35 tokens (   16.45 ms per token,    60.78 tokens per second)
llama_print_timings:        eval time =   502.70 ms /     6 runs   (   83.78 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  1091.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But, if it fails again, you and all the scientists involved in the Child's Project will be guilty.



llama_print_timings:        load time =   593.87 ms
llama_print_timings:      sample time =    11.29 ms /    25 runs   (    0.45 ms per token,  2214.15 tokens per second)
llama_print_timings: prompt eval time =   593.83 ms /    52 tokens (   11.42 ms per token,    87.57 tokens per second)
llama_print_timings:        eval time =  2056.18 ms /    24 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2701.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, they are guilty. If the aliens could sweat, the science emperor must have been very nervous.



llama_print_timings:        load time =   597.47 ms
llama_print_timings:      sample time =    12.11 ms /    27 runs   (    0.45 ms per token,  2229.75 tokens per second)
llama_print_timings: prompt eval time =   597.43 ms /    56 tokens (   10.67 ms per token,    93.74 tokens per second)
llama_print_timings:        eval time =  2249.37 ms /    26 runs   (   86.51 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  2901.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$The cleaning of the protons on synchronous orbit is much easier than the ones in one dimension. Small spacecraft can pull them away from near-Earth space so they do not enter the atmosphere. Those mountains of proton material have almost no mass, resemble giant silver ghosts and an infant could easily move them around.



llama_print_timings:        load time =   662.89 ms
llama_print_timings:      sample time =    30.91 ms /    70 runs   (    0.44 ms per token,  2264.86 tokens per second)
llama_print_timings: prompt eval time =   662.84 ms /    99 tokens (    6.70 ms per token,   149.36 tokens per second)
llama_print_timings:        eval time =  5988.30 ms /    69 runs   (   86.79 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  6789.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the experiment, the head of state asked the science ruler: "In this experiment, did we destroy a civilization in the microcosm?"



llama_print_timings:        load time =   598.70 ms
llama_print_timings:      sample time =    14.03 ms /    32 runs   (    0.44 ms per token,  2281.31 tokens per second)
llama_print_timings: prompt eval time =   598.68 ms /    56 tokens (   10.69 ms per token,    93.54 tokens per second)
llama_print_timings:        eval time =  2634.12 ms /    31 runs   (   84.97 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  3295.68 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“At least it is a smart body, and the emperor, we are destroying an entire microworld. That microworld is very large in high dimensions and probably there exist many intelligences or civilizations other than ours that have no chance to present themselves to macrocosm world. Of course, on the microscale of high-dimensional space, smartness and civilization take entirely different forms from what we can imagine, they are simply something else all together.”



llama_print_timings:        load time =   690.73 ms
llama_print_timings:      sample time =    44.02 ms /    98 runs   (    0.45 ms per token,  2226.21 tokens per second)
llama_print_timings: prompt eval time =   690.69 ms /   116 tokens (    5.95 ms per token,   167.95 tokens per second)
llama_print_timings:        eval time =  8198.77 ms /    97 runs   (   84.52 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  9087.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What?!”



llama_print_timings:        load time =   590.48 ms
llama_print_timings:      sample time =     2.25 ms /     5 runs   (    0.45 ms per token,  2222.22 tokens per second)
llama_print_timings: prompt eval time =   590.45 ms /    33 tokens (   17.89 ms per token,    55.89 tokens per second)
llama_print_timings:        eval time =   315.79 ms /     4 runs   (   78.95 ms per token,    12.67 tokens per second)
llama_print_timings:       total time =   917.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the long history of scientific development, physicists have collided countless protons. They have also collided countless neutrons and electrons. It is probably more than a million times. Each collision can be毁灭性的 for that tiny cosmic world in which life or civilization exists. In fact, even in nature, the destruction of this tiny universe occurs constantly, such as the decay of protons and neutrons, as well as the possible destruction of thousands of such microcosms by high-energy cosmic rays entering the atmosphere. Won't it make you sad about this?



llama_print_timings:        load time =   756.24 ms
llama_print_timings:      sample time =    55.52 ms /   124 runs   (    0.45 ms per token,  2233.47 tokens per second)
llama_print_timings: prompt eval time =   756.19 ms /   146 tokens (    5.18 ms per token,   193.07 tokens per second)
llama_print_timings:        eval time = 10594.79 ms /   123 runs   (   86.14 ms per token,    11.61 tokens per second)
llama_print_timings:       total time = 11603.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are very humorous. I will immediately notify the propaganda minister to have this scientific fact reiterated repeatedly to people all over the universe, so that they can understand that the destruction of civilization is nothing new in the universe but a routine occurrence every day and every hour.



llama_print_timings:        load time =   629.04 ms
llama_print_timings:      sample time =    26.19 ms /    58 runs   (    0.45 ms per token,  2214.75 tokens per second)
llama_print_timings: prompt eval time =   629.00 ms /    77 tokens (    8.17 ms per token,   122.42 tokens per second)
llama_print_timings:        eval time =  4846.96 ms /    57 runs   (   85.03 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  5594.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

What's the meaning of this? Is it to make people brave enough to face the possible extinction of the Three-Body Civilization?



llama_print_timings:        load time =   576.71 ms
llama_print_timings:      sample time =    13.52 ms /    30 runs   (    0.45 ms per token,  2219.59 tokens per second)
llama_print_timings: prompt eval time =   576.67 ms /    48 tokens (   12.01 ms per token,    83.24 tokens per second)
llama_print_timings:        eval time =  2417.91 ms /    29 runs   (   83.38 ms per token,    11.99 tokens per second)
llama_print_timings:       total time =  3054.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, we need to make them face the extinction of Earth civilization calmly. As you know, after our basic policy towards Earth civilization was announced, certain peace-loving sentiment was created, which was extremely dangerous. We only realized now that there were many similar people like 1379 Monitor in the Trisolar System who were weak and had to be controlled and eliminated.”



llama_print_timings:        load time =   664.50 ms
llama_print_timings:      sample time =    38.68 ms /    85 runs   (    0.46 ms per token,  2197.29 tokens per second)
llama_print_timings: prompt eval time =   664.46 ms /    94 tokens (    7.07 ms per token,   141.47 tokens per second)
llama_print_timings:        eval time =  7098.50 ms /    84 runs   (   84.51 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  7937.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The leader, this emotion is mainly caused by recent information from Earth. Your prediction has come true, and the alien forces on Earth are indeed developing and have set up their own base to launch and send a large number of messages containing Earth civilization information incessantly to us. I have to admit that Earth civilization is very dangerous for our people. It is heavenly music that will make many Trisolarans go astray spiritually, while Earth civilization may also become a religion on Trisolaran worlds.”



llama_print_timings:        load time =   744.77 ms
llama_print_timings:      sample time =    50.04 ms /   111 runs   (    0.45 ms per token,  2218.18 tokens per second)
llama_print_timings: prompt eval time =   744.72 ms /   140 tokens (    5.32 ms per token,   187.99 tokens per second)
llama_print_timings:        eval time =  9562.69 ms /   110 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time = 10535.96 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You pointed out a huge danger and should strictly limit the flow of information from Earth, especially cultural information.



llama_print_timings:        load time =   597.89 ms
llama_print_timings:      sample time =    10.68 ms /    22 runs   (    0.49 ms per token,  2060.89 tokens per second)
llama_print_timings: prompt eval time =   597.86 ms /    50 tokens (   11.96 ms per token,    83.63 tokens per second)
llama_print_timings:        eval time =  1791.90 ms /    21 runs   (   85.33 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  2438.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   585.63 ms
llama_print_timings:      sample time =     3.12 ms /     7 runs   (    0.45 ms per token,  2240.72 tokens per second)
llama_print_timings: prompt eval time =   585.59 ms /    45 tokens (   13.01 ms per token,    76.85 tokens per second)
llama_print_timings:        eval time =   503.69 ms /     6 runs   (   83.95 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =  1102.59 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The third experiment of proton two-dimensional expansion was conducted at the end of the 30th time, and this time it took place in the night. From the ground we could not see the circular accelerator ring in space, but only the red light indicating its location nearby the fusion power station's radiators. After the acceleration system started working, the scientific ruler announced the expansion success. People looked up into the sky and saw that nothing was visible at first, but soon they noticed a remarkable sign: The stars divided into two parts, and the pattern of the star groups is not matched, as if two stars were superimposed on each other with one smaller one placed above the larger one, while the Milky Way was cut off in between. The smaller part was circular and expanded rapidly against the normal background.



llama_print_timings:        load time =   796.11 ms
llama_print_timings:      sample time =    77.44 ms /   169 runs   (    0.46 ms per token,  2182.31 tokens per second)
llama_print_timings: prompt eval time =   796.06 ms /   170 tokens (    4.68 ms per token,   213.55 tokens per second)
llama_print_timings:        eval time = 14495.51 ms /   168 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time = 15648.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The constellations in it are from the Southern Hemisphere!” said the education minister, pointing at the expanding circular sky. As people were trying to understand how the stars from planet Another could be printed on the northern night sky, a more amazing scene appeared: On the moving edge of the South Hemisphere's starfield, a large sphere appeared, which was brown and was being scanned out like an image in a very slow camera, that was a familiar ball shape. When the display was finished, it occupied the third of the sky, its surface details visible more clearly: the brown land had folds of mountains and pieces of clouds were like tightly attached snow …



llama_print_timings:        load time =   840.90 ms
llama_print_timings:      sample time =    63.43 ms /   142 runs   (    0.45 ms per token,  2238.86 tokens per second)
llama_print_timings: prompt eval time =   840.86 ms /   196 tokens (    4.29 ms per token,   233.09 tokens per second)
llama_print_timings:        eval time = 12446.51 ms /   141 runs   (   88.27 ms per token,    11.33 tokens per second)
llama_print_timings:       total time = 13578.56 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

And then someone said a fact: "That's our planet!"



llama_print_timings:        load time =   585.34 ms
llama_print_timings:      sample time =     6.70 ms /    15 runs   (    0.45 ms per token,  2240.48 tokens per second)
llama_print_timings: prompt eval time =   585.31 ms /    42 tokens (   13.94 ms per token,    71.76 tokens per second)
llama_print_timings:        eval time =  1206.74 ms /    14 runs   (   86.20 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1822.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, another Trisolarian world has appeared in space. Then, the sky became bright again as a second Trisolarian planet joined the expanding southern horizon with an apparent Sun half its size only scanning over the southern hemisphere.



llama_print_timings:        load time =   656.08 ms
llama_print_timings:      sample time =    23.27 ms /    52 runs   (    0.45 ms per token,  2234.73 tokens per second)
llama_print_timings: prompt eval time =   656.03 ms /    88 tokens (    7.45 ms per token,   134.14 tokens per second)
llama_print_timings:        eval time =  4220.63 ms /    51 runs   (   82.76 ms per token,    12.08 tokens per second)
llama_print_timings:       total time =  4982.91 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

At last, someone has figured out the truth: “That’s a mirror!”



llama_print_timings:        load time =   589.58 ms
llama_print_timings:      sample time =     8.34 ms /    18 runs   (    0.46 ms per token,  2157.24 tokens per second)
llama_print_timings: prompt eval time =   589.54 ms /    45 tokens (   13.10 ms per token,    76.33 tokens per second)
llama_print_timings:        eval time =  1477.97 ms /    17 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  2105.86 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This mirror, which appears on the surface of the world in Three-Body, is just the proton that is being expanded into a two-dimensional plane. This is a true meaning without thickness in geometry. When the 2D expansion is completed, the sky is completely covered by the south hemisphere's stars, and the middle of the sky is the solar system and the sun. Next, the stars around the horizon start to distort, the images of stars are stretched and warped like melting and flowing. This distortion is developing from periphery to center.



llama_print_timings:        load time =   720.17 ms
llama_print_timings:      sample time =    55.59 ms /   124 runs   (    0.45 ms per token,  2230.78 tokens per second)
llama_print_timings: prompt eval time =   720.12 ms /   130 tokens (    5.54 ms per token,   180.52 tokens per second)
llama_print_timings:        eval time = 10597.35 ms /   123 runs   (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:       total time = 11569.87 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The Holographic Plane is bending under the gravitational pull of our planet, ” said the science ruler. He then pointed to the many rings that were appearing in the starry sky like someone was shining a strobe light into a cave.



llama_print_timings:        load time =   637.32 ms
llama_print_timings:      sample time =    25.23 ms /    56 runs   (    0.45 ms per token,  2219.40 tokens per second)
llama_print_timings: prompt eval time =   637.27 ms /    77 tokens (    8.28 ms per token,   120.83 tokens per second)
llama_print_timings:        eval time =  4694.19 ms /    55 runs   (   85.35 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  5445.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That's the electromagnetic radiation emitted from the ground, which regulates the gravitational deformation to complete the enveloping of our planet, and then the electromagnetic radiation will continue to radiate, much like many root-like pieces maintaining the stability of this big spherical work platform that becomes a fixed two-dimensional quantum processor. Thus, circuit etching on the two-dimensional surface can begin.



llama_print_timings:        load time =   679.14 ms
llama_print_timings:      sample time =    41.39 ms /    89 runs   (    0.47 ms per token,  2150.33 tokens per second)
llama_print_timings: prompt eval time =   679.10 ms /   109 tokens (    6.23 ms per token,   160.51 tokens per second)
llama_print_timings:        eval time =  7557.83 ms /    88 runs   (   85.88 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  8427.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The two-dimensional plane of protons wrapping a three-body planet is a long process. When the star's deformation approaches the images of the planets, the stars fade away sequentially from top to bottom until the spherical surface of the proton sphere blocks them entirely and the last ray of sunlight disappears. The images of the three-dimensional world in the space's cosmic mirror are distorted as everything becomes dark following this event. When gravity and artificial electromagnetic radiation are balanced, the proton sphere forms a spherical shell of radius equal to the synchronous orbit around the planet center, completely wrapping it.



llama_print_timings:        load time =   808.38 ms
llama_print_timings:      sample time =    60.73 ms /   133 runs   (    0.46 ms per token,  2190.17 tokens per second)
llama_print_timings: prompt eval time =   808.35 ms /   178 tokens (    4.54 ms per token,   220.20 tokens per second)
llama_print_timings:        eval time = 11289.25 ms /   132 runs   (   85.52 ms per token,    11.69 tokens per second)
llama_print_timings:       total time = 12380.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The harsh winter has arrived, and the reflections of all sunlight return to space. The temperature in the Three Body World plunges drastically until it reaches the same level that led to the destruction of multiple civilizations. Most citizens of the Three Body World store up their water and are in a state of silence as the dark envelops the land. In the sky, there is only the faint glow from the electromagnetic radiation induced by the resonating quantum membrane, and occasionally one can see lights on the resonating orbit, which are the ships engraving microcircuits on the quantum membrane.



llama_print_timings:        load time =   733.25 ms
llama_print_timings:      sample time =    59.05 ms /   130 runs   (    0.45 ms per token,  2201.41 tokens per second)
llama_print_timings: prompt eval time =   733.21 ms /   137 tokens (    5.35 ms per token,   186.85 tokens per second)
llama_print_timings:        eval time = 11157.47 ms /   129 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 12163.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The principle of micro-integrated circuits is completely different from that of regular integrated circuits, because the substrate itself is not composed of atoms. The PN junction in a micro-integrated circuit is formed through the torque induced by the strong interaction force between the protons and their antiparticles to connect the wires. Due to its vast size on the macroscopic level, the micro-integrated circuit has thick lines with different thinness levels. If close enough to the quantum plane, you can clearly see a vast plain composed of extremely complex integrated circuits. The total surface area of the integrated circuits on the planet is tens of times larger than that covered by the landmasses of three planets.



llama_print_timings:        load time =   757.26 ms
llama_print_timings:      sample time =    68.32 ms /   151 runs   (    0.45 ms per token,  2210.28 tokens per second)
llama_print_timings: prompt eval time =   757.22 ms /   156 tokens (    4.85 ms per token,   206.02 tokens per second)
llama_print_timings:        eval time = 12875.21 ms /   150 runs   (   85.83 ms per token,    11.65 tokens per second)
llama_print_timings:       total time = 13949.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The proton circuit etching was a massive project, taking thousands of ships and working over ten thousand time units before finally completing. The software debugging took another five thousand time units until the first mind machine trialed.



llama_print_timings:        load time =   627.17 ms
llama_print_timings:      sample time =    20.67 ms /    46 runs   (    0.45 ms per token,  2225.77 tokens per second)
llama_print_timings: prompt eval time =   627.14 ms /    72 tokens (    8.71 ms per token,   114.81 tokens per second)
llama_print_timings:        eval time =  3792.62 ms /    45 runs   (   84.28 ms per token,    11.87 tokens per second)
llama_print_timings:       total time =  4512.94 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"欢迎来到新世界！"



llama_print_timings:        load time =   613.70 ms
llama_print_timings:      sample time =     3.64 ms /     8 runs   (    0.45 ms per token,  2198.41 tokens per second)
llama_print_timings: prompt eval time =   613.66 ms /    69 tokens (    8.89 ms per token,   112.44 tokens per second)
llama_print_timings:        eval time =   603.01 ms /     7 runs   (   86.14 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  1233.47 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

MicroWise 2.10 has been loaded, and the WiseChild-1 is waiting for instructions.



llama_print_timings:        load time =   577.61 ms
llama_print_timings:      sample time =    11.83 ms /    26 runs   (    0.45 ms per token,  2198.73 tokens per second)
llama_print_timings: prompt eval time =   577.57 ms /    47 tokens (   12.29 ms per token,    81.38 tokens per second)
llama_print_timings:        eval time =  2148.22 ms /    25 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2779.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Scientific Ruler said, "Now, a zhizi has been born with wisdom bestowed upon it by us. This is the smallest artificial intelligence that we can make."



llama_print_timings:        load time =   604.12 ms
llama_print_timings:      sample time =    17.42 ms /    38 runs   (    0.46 ms per token,  2180.90 tokens per second)
llama_print_timings: prompt eval time =   604.08 ms /    62 tokens (    9.74 ms per token,   102.64 tokens per second)
llama_print_timings:        eval time =  3184.31 ms /    37 runs   (   86.06 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3867.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"It is the largest artificial intelligence, " said the supreme leader.



llama_print_timings:        load time =   582.41 ms
llama_print_timings:      sample time =     7.15 ms /    16 runs   (    0.45 ms per token,  2239.01 tokens per second)
llama_print_timings: prompt eval time =   582.39 ms /    47 tokens (   12.39 ms per token,    80.70 tokens per second)
llama_print_timings:        eval time =  1293.46 ms /    15 runs   (   86.23 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1907.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What do you mean by adding dimensions?”



llama_print_timings:        load time =   609.94 ms
llama_print_timings:      sample time =     4.69 ms /    10 runs   (    0.47 ms per token,  2133.56 tokens per second)
llama_print_timings: prompt eval time =   609.90 ms /    63 tokens (    9.68 ms per token,   103.30 tokens per second)
llama_print_timings:        eval time =   782.53 ms /     9 runs   (   86.95 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  1415.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Zhizi One, Is the space dimension control function working properly?



llama_print_timings:        load time =   583.72 ms
llama_print_timings:      sample time =     6.64 ms /    15 runs   (    0.44 ms per token,  2260.40 tokens per second)
llama_print_timings: prompt eval time =   583.68 ms /    40 tokens (   14.59 ms per token,    68.53 tokens per second)
llama_print_timings:        eval time =  1176.96 ms /    14 runs   (   84.07 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  1789.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Okay, that's fine. The space dimension control function can be activated anytime by Zhi-zi One.



llama_print_timings:        load time =   573.20 ms
llama_print_timings:      sample time =    11.99 ms /    27 runs   (    0.44 ms per token,  2252.06 tokens per second)
llama_print_timings: prompt eval time =   573.16 ms /    42 tokens (   13.65 ms per token,    73.28 tokens per second)
llama_print_timings:        eval time =  2193.89 ms /    26 runs   (   84.38 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  2821.18 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Reduce the dimensions to three.



llama_print_timings:        load time =   566.63 ms
llama_print_timings:      sample time =     4.06 ms /     9 runs   (    0.45 ms per token,  2217.29 tokens per second)
llama_print_timings: prompt eval time =   566.58 ms /    35 tokens (   16.19 ms per token,    61.77 tokens per second)
llama_print_timings:        eval time =   689.10 ms /     8 runs   (   86.14 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  1274.14 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the command was issued, the two-dimensional proton shell of the Three Body World suddenly contracted rapidly as if a giant hand had opened up this world's curtain. Almost in an instant, sunshine shone over the earth. The protons contracted from two dimensions to three and formed a giant sphere resembling the Moon, it stood on the black side of the sphere but reflected the sunlight, making the darkness become light. At present, the people in the control room can only witness this from the screen.



llama_print_timings:        load time =   739.11 ms
llama_print_timings:      sample time =    50.01 ms /   112 runs   (    0.45 ms per token,  2239.64 tokens per second)
llama_print_timings: prompt eval time =   739.04 ms /   137 tokens (    5.39 ms per token,   185.38 tokens per second)
llama_print_timings:        eval time =  9736.02 ms /   111 runs   (   87.71 ms per token,    11.40 tokens per second)
llama_print_timings:       total time = 10706.28 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dimensionality reduction has been successfully completed, and the first Inteligent Unit (IU) is waiting for instructions.



llama_print_timings:        load time =   571.53 ms
llama_print_timings:      sample time =    11.40 ms /    25 runs   (    0.46 ms per token,  2192.02 tokens per second)
llama_print_timings: prompt eval time =   571.50 ms /    39 tokens (   14.65 ms per token,    68.24 tokens per second)
llama_print_timings:        eval time =  2032.77 ms /    24 runs   (   84.70 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2656.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Reduce the dimensionality of the data to four dimensions.



llama_print_timings:        load time =   576.31 ms
llama_print_timings:      sample time =     6.21 ms /    14 runs   (    0.44 ms per token,  2255.88 tokens per second)
llama_print_timings: prompt eval time =   576.27 ms /    36 tokens (   16.01 ms per token,    62.47 tokens per second)
llama_print_timings:        eval time =  1130.59 ms /    13 runs   (   86.97 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  1735.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In space, the giant ball quickly shrinks until it appears to be no larger than a small star. The night returns on this side of the planet.



llama_print_timings:        load time =   623.59 ms
llama_print_timings:      sample time =    14.37 ms /    32 runs   (    0.45 ms per token,  2226.24 tokens per second)
llama_print_timings: prompt eval time =   623.55 ms /    54 tokens (   11.55 ms per token,    86.60 tokens per second)
llama_print_timings:        eval time =  2682.80 ms /    31 runs   (   86.54 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  3372.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“President, this sphere you are looking at is not really a quantum. It is a four-dimensional giant standing on a two-dimensional piece of paper, and we can only see its bottom foot on contact with the paper.”



llama_print_timings:        load time =   655.10 ms
llama_print_timings:      sample time =    21.94 ms /    48 runs   (    0.46 ms per token,  2187.39 tokens per second)
llama_print_timings: prompt eval time =   655.06 ms /    88 tokens (    7.44 ms per token,   134.34 tokens per second)
llama_print_timings:        eval time =  4075.91 ms /    47 runs   (   86.72 ms per token,    11.53 tokens per second)
llama_print_timings:       total time =  4832.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dimensionality reduction has been successfully completed, and the first Inteligent Unit (IU) is waiting for instructions.



llama_print_timings:        load time =   578.24 ms
llama_print_timings:      sample time =    11.11 ms /    25 runs   (    0.44 ms per token,  2250.83 tokens per second)
llama_print_timings: prompt eval time =   578.22 ms /    39 tokens (   14.83 ms per token,    67.45 tokens per second)
llama_print_timings:        eval time =  2020.02 ms /    24 runs   (   84.17 ms per token,    11.88 tokens per second)
llama_print_timings:       total time =  2650.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Reduce the dimension to six.



llama_print_timings:        load time =   567.51 ms
llama_print_timings:      sample time =     4.04 ms /     9 runs   (    0.45 ms per token,  2227.17 tokens per second)
llama_print_timings: prompt eval time =   567.49 ms /    36 tokens (   15.76 ms per token,    63.44 tokens per second)
llama_print_timings:        eval time =   709.37 ms /     8 runs   (   88.67 ms per token,    11.28 tokens per second)
llama_print_timings:       total time =  1294.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The little ball disappeared in space.



llama_print_timings:        load time =   588.84 ms
llama_print_timings:      sample time =     3.54 ms /     8 runs   (    0.44 ms per token,  2261.16 tokens per second)
llama_print_timings: prompt eval time =   588.82 ms /    36 tokens (   16.36 ms per token,    61.14 tokens per second)
llama_print_timings:        eval time =   605.24 ms /     7 runs   (   86.46 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  1210.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“How big is a six-dimensional proton?” the leader asked.



llama_print_timings:        load time =   576.66 ms
llama_print_timings:      sample time =     7.35 ms /    16 runs   (    0.46 ms per token,  2177.17 tokens per second)
llama_print_timings: prompt eval time =   576.62 ms /    42 tokens (   13.73 ms per token,    72.84 tokens per second)
llama_print_timings:        eval time =  1252.11 ms /    15 runs   (   83.47 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =  1861.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The scientific dictator answered, “About fifty units.”



llama_print_timings:        load time =   583.24 ms
llama_print_timings:      sample time =     5.38 ms /    12 runs   (    0.45 ms per token,  2232.14 tokens per second)
llama_print_timings: prompt eval time =   583.22 ms /    42 tokens (   13.89 ms per token,    72.01 tokens per second)
llama_print_timings:        eval time =   934.01 ms /    11 runs   (   84.91 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1541.66 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The dimensionality reduction has been successfully completed, and the first Inteligent Unit (IU) is waiting for instructions.



llama_print_timings:        load time =   571.14 ms
llama_print_timings:      sample time =    11.10 ms /    25 runs   (    0.44 ms per token,  2252.46 tokens per second)
llama_print_timings: prompt eval time =   571.12 ms /    39 tokens (   14.64 ms per token,    68.29 tokens per second)
llama_print_timings:        eval time =  2032.07 ms /    24 runs   (   84.67 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2654.67 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Chizhiyi One, can you see us?



llama_print_timings:        load time =   580.90 ms
llama_print_timings:      sample time =     5.58 ms /    12 runs   (    0.47 ms per token,  2148.61 tokens per second)
llama_print_timings: prompt eval time =   580.86 ms /    38 tokens (   15.29 ms per token,    65.42 tokens per second)
llama_print_timings:        eval time =   962.15 ms /    11 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  1568.92 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, I can see the control room, see everyone in it, and even see their intestines, you know.



llama_print_timings:        load time =   596.52 ms
llama_print_timings:      sample time =    11.77 ms /    26 runs   (    0.45 ms per token,  2209.19 tokens per second)
llama_print_timings: prompt eval time =   596.50 ms /    54 tokens (   11.05 ms per token,    90.53 tokens per second)
llama_print_timings:        eval time =  2130.74 ms /    25 runs   (   85.23 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  2781.10 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“What is it saying?” the leader asked in wonder.



llama_print_timings:        load time =   580.10 ms
llama_print_timings:      sample time =     6.13 ms /    13 runs   (    0.47 ms per token,  2121.06 tokens per second)
llama_print_timings: prompt eval time =   580.05 ms /    41 tokens (   14.15 ms per token,    70.68 tokens per second)
llama_print_timings:        eval time =  1027.96 ms /    12 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  1635.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Zhizi sees three-dimensional space from the sixth dimension, just like we see a two-dimensional plane on a piece of art. We can also see our own interior."



llama_print_timings:        load time =   588.41 ms
llama_print_timings:      sample time =    16.73 ms /    38 runs   (    0.44 ms per token,  2271.23 tokens per second)
llama_print_timings: prompt eval time =   588.37 ms /    56 tokens (   10.51 ms per token,    95.18 tokens per second)
llama_print_timings:        eval time =  3184.80 ms /    37 runs   (   86.08 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  3849.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Chizhi One, enter the control room.



llama_print_timings:        load time =   578.28 ms
llama_print_timings:      sample time =     4.91 ms /    11 runs   (    0.45 ms per token,  2239.87 tokens per second)
llama_print_timings: prompt eval time =   578.25 ms /    37 tokens (   15.63 ms per token,    63.99 tokens per second)
llama_print_timings:        eval time =   860.59 ms /    10 runs   (   86.06 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  1460.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Can it penetrate the earth's crust?" the emperor asked.



llama_print_timings:        load time =   577.79 ms
llama_print_timings:      sample time =     8.49 ms /    19 runs   (    0.45 ms per token,  2238.19 tokens per second)
llama_print_timings: prompt eval time =   577.75 ms /    43 tokens (   13.44 ms per token,    74.43 tokens per second)
llama_print_timings:        eval time =  1583.30 ms /    18 runs   (   87.96 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  2198.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The supreme leader, not penetration, but from the high dimension to enter into any closed space. This is the relationship between three-dimensional and two-dimensional in our world, which makes us easy to enter a circular surface above, while it is impossible for a 2D creature to break through that surface unless it breaks it.”



llama_print_timings:        load time =   649.50 ms
llama_print_timings:      sample time =    31.37 ms /    71 runs   (    0.44 ms per token,  2263.24 tokens per second)
llama_print_timings: prompt eval time =   649.45 ms /    90 tokens (    7.22 ms per token,   138.58 tokens per second)
llama_print_timings:        eval time =  6053.98 ms /    70 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  6845.89 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The words of the science minister were barely spoken when a mirror sphere appeared in mid-air in the control room. The leader walked over to look at his refracted self in the full spectrum sphere. "This is a proton?! " He exclaimed with amazement and wonder.



llama_print_timings:        load time =   653.89 ms
llama_print_timings:      sample time =    26.90 ms /    60 runs   (    0.45 ms per token,  2230.57 tokens per second)
llama_print_timings: prompt eval time =   653.85 ms /    87 tokens (    7.52 ms per token,   133.06 tokens per second)
llama_print_timings:        eval time =  5070.14 ms /    59 runs   (   85.93 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  5846.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The leader, this is just the projection of a six-dimensional entity into three-dimensional space,” the leader extended his hand and looked at the scientific dictator without stopping him.



llama_print_timings:        load time =   650.48 ms
llama_print_timings:      sample time =    17.35 ms /    38 runs   (    0.46 ms per token,  2190.58 tokens per second)
llama_print_timings: prompt eval time =   650.46 ms /    87 tokens (    7.48 ms per token,   133.75 tokens per second)
llama_print_timings:        eval time =  3174.43 ms /    37 runs   (   85.80 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3904.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It seems very smooth. It has only the mass of a proton, but there is a slight resistance on my hand.



llama_print_timings:        load time =   598.31 ms
llama_print_timings:      sample time =    11.84 ms /    26 runs   (    0.46 ms per token,  2195.20 tokens per second)
llama_print_timings: prompt eval time =   598.27 ms /    55 tokens (   10.88 ms per token,    91.93 tokens per second)
llama_print_timings:        eval time =  2141.69 ms /    25 runs   (   85.67 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2793.46 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The reason why air resistance acts on a sphere.



llama_print_timings:        load time =   568.84 ms
llama_print_timings:      sample time =     4.89 ms /    11 runs   (    0.44 ms per token,  2249.03 tokens per second)
llama_print_timings: prompt eval time =   568.80 ms /    39 tokens (   14.58 ms per token,    68.57 tokens per second)
llama_print_timings:        eval time =   861.99 ms /    10 runs   (   86.20 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  1452.95 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte




llama_print_timings:        load time =   637.85 ms
llama_print_timings:      sample time =    33.59 ms /    74 runs   (    0.45 ms per token,  2202.97 tokens per second)
llama_print_timings: prompt eval time =   637.80 ms /    68 tokens (    9.38 ms per token,   106.62 tokens per second)
llama_print_timings:        eval time =  6195.65 ms /    73 runs   (   84.87 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  6987.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Pay attention, this is not an instruction!



llama_print_timings:        load time =   574.10 ms
llama_print_timings:      sample time =     4.71 ms /    10 runs   (    0.47 ms per token,  2125.40 tokens per second)
llama_print_timings: prompt eval time =   574.07 ms /    37 tokens (   15.52 ms per token,    64.45 tokens per second)
llama_print_timings:        eval time =   778.44 ms /     9 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  1374.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I understand.



llama_print_timings:        load time =   567.58 ms
llama_print_timings:      sample time =     1.80 ms /     4 runs   (    0.45 ms per token,  2227.17 tokens per second)
llama_print_timings: prompt eval time =   567.54 ms /    34 tokens (   16.69 ms per token,    59.91 tokens per second)
llama_print_timings:        eval time =   250.87 ms /     3 runs   (   83.62 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =   825.71 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Presidents, if we contract down to the eleventh dimension, we will lose it forever. When a quantum computer shrinks to the size of a single particle, its sensors and input/output interfaces become smaller than all electromagnetic wavelengths. This means that it can't perceive the macro world and receive our instructions.”



llama_print_timings:        load time =   647.95 ms
llama_print_timings:      sample time =    32.96 ms /    73 runs   (    0.45 ms per token,  2214.54 tokens per second)
llama_print_timings: prompt eval time =   647.91 ms /    91 tokens (    7.12 ms per token,   140.45 tokens per second)
llama_print_timings:        eval time =  6202.95 ms /    72 runs   (   86.15 ms per token,    11.61 tokens per second)
llama_print_timings:       total time =  7003.05 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But we're going to end up restoring it back into a microscopic particle eventually.



llama_print_timings:        load time =   578.65 ms
llama_print_timings:      sample time =     9.33 ms /    21 runs   (    0.44 ms per token,  2250.56 tokens per second)
llama_print_timings: prompt eval time =   578.61 ms /    44 tokens (   13.15 ms per token,    76.04 tokens per second)
llama_print_timings:        eval time =  1711.06 ms /    20 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  2331.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yes, but that will have to wait until the second, third, and fourth phases of Yisong are completed. With more than one Yisong, it is possible through certain quantum effects to construct a system capable of sensing the macro world. Take an example: suppose there are two protons inside an atomic nucleus. They follow some sort of motion pattern, like spinning, and it is required that their spins must be opposite to each other. When these protons are separated from the atomic nucleus, regardless of how far they separate from each other, this rule still applies; changing the spin direction of one proton immediately changes the spin direction of the other proton. If we construct these two protons as Yisong, they can interact with each other and form an inductive array that can receive all electromagnetic waves, and therefore sense the macro world. Of course, forming an array of Yisong requires a very complicated quantum effect, which is only a metaphor.”



llama_print_timings:        load time =   921.81 ms
llama_print_timings:      sample time =    92.15 ms /   207 runs   (    0.45 ms per token,  2246.26 tokens per second)
llama_print_timings: prompt eval time =   921.76 ms /   240 tokens (    3.84 ms per token,   260.37 tokens per second)
llama_print_timings:        eval time = 18171.95 ms /   206 runs   (   88.21 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 19522.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   586.29 ms
llama_print_timings:      sample time =     3.12 ms /     7 runs   (    0.45 ms per token,  2243.59 tokens per second)
llama_print_timings: prompt eval time =   586.25 ms /    45 tokens (   13.03 ms per token,    76.76 tokens per second)
llama_print_timings:        eval time =   512.21 ms /     6 runs   (   85.37 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  1111.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After the second, third and fourth proton are successfully deployed, the construction of each subsequent quantum dot takes only half the time of a previous one. The array is completed by combining all four dots to form an inductor-like device.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

The dictator and the council of governors once again arrived at the monument. Above it, four already reduced to six-dimensional wisdom beings were floating in clear glass spheres and in each sphere a rising sun was reflected, reminding them of those three-dimensional bodies which had appeared in space.



llama_print_timings:        load time =   663.26 ms
llama_print_timings:      sample time =    28.32 ms /    63 runs   (    0.45 ms per token,  2224.42 tokens per second)
llama_print_timings: prompt eval time =   663.23 ms /    94 tokens (    7.06 ms per token,   141.73 tokens per second)
llama_print_timings:        eval time =  5189.10 ms /    62 runs   (   83.70 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =  5981.48 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The array of intelligent children, continuous dimension contraction to the eleventh.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Scientific dictator said, “Head of State and No. 1 and 2 will fly to Earth with the knowledge bank they have stored in microcircuits, so that they can make use of space's properties. They can draw energy from empty space in a very short time and turn into high-energy particles which travel at nearly light speed. This appears to be against the law of conservation of energy, but the ‘borrowed’ energies will not be returned until protons decay, and that may happen sooner than anyone can imagine.”



llama_print_timings:        load time =   762.41 ms
llama_print_timings:      sample time =    50.69 ms /   114 runs   (    0.44 ms per token,  2248.92 tokens per second)
llama_print_timings: prompt eval time =   762.37 ms /   157 tokens (    4.86 ms per token,   205.94 tokens per second)
llama_print_timings:        eval time =  9907.10 ms /   113 runs   (   87.67 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 10902.63 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The first task of the two aliens arriving on Earth was to locate the high-energy accelerator used for physics research. Then they buried themselves in it. The method by which humans pursue a deep understanding of material structure is to bombard selected target particles with accelerated high-energy particles, analyze the results and find information reflecting material structure. In practical tests, materials containing target particles are used as targets, and most of the internal components of atoms are empty spaces; if an atom has a theatre the size of a theatre, its nucleus is just suspended in it like a walnut within the theatre. Therefore, successful collisions are rare, and they occur only after long-term bombardment with high-energy particles, which can be compared to finding a raindrop that has a slightly different color among summers' rainstorms.



llama_print_timings:        load time =   841.50 ms
llama_print_timings:      sample time =    78.65 ms /   177 runs   (    0.44 ms per token,  2250.39 tokens per second)
llama_print_timings: prompt eval time =   841.46 ms /   203 tokens (    4.15 ms per token,   241.25 tokens per second)
llama_print_timings:        eval time = 15222.19 ms /   176 runs   (   86.49 ms per token,    11.56 tokens per second)
llama_print_timings:       total time = 16450.24 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This gave Zhi a chance to replace the target particle and receive the impact. Due to its high intelligence, it can accurately judge the trajectory of the impacting particle within a very short period and move to an appropriate location. Therefore, the hit rate on Zhi is tens of billions times higher than that of the ordinary target particles.”



llama_print_timings:        load time =   761.97 ms
llama_print_timings:      sample time =    31.77 ms /    72 runs   (    0.44 ms per token,  2266.65 tokens per second)
llama_print_timings: prompt eval time =   761.93 ms /   148 tokens (    5.15 ms per token,   194.24 tokens per second)
llama_print_timings:        eval time =  6171.89 ms /    71 runs   (   86.93 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =  7080.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

So, isn't Zhizi also consumed? the military ruling officer asked.



llama_print_timings:        load time =   585.57 ms
llama_print_timings:      sample time =     8.74 ms /    20 runs   (    0.44 ms per token,  2289.12 tokens per second)
llama_print_timings: prompt eval time =   585.53 ms /    45 tokens (   13.01 ms per token,    76.85 tokens per second)
llama_print_timings:        eval time =  1634.98 ms /    19 runs   (   86.05 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  2261.01 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“No, the proton is already the basic structure of matter, and it has a fundamentally different nature from macroscopic matter. It can be broken but cannot be destroyed. In fact, when one intelligent body is split into several fragments, those fragments still have strong quantum connections just like how you can disconnect two magnets after breaking them apart and still obtain two magnets.”



llama_print_timings:        load time =   816.59 ms
llama_print_timings:      sample time =    35.97 ms /    81 runs   (    0.44 ms per token,  2252.19 tokens per second)
llama_print_timings: prompt eval time =   816.54 ms /   188 tokens (    4.34 ms per token,   230.24 tokens per second)
llama_print_timings:        eval time =  7007.08 ms /    80 runs   (   87.59 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  7989.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Someone asks, “Is there a possibility that humans could use some method to identify and capture the quarks using a strong magnetic field?” Quarks are positively charged.



llama_print_timings:        load time =   623.90 ms
llama_print_timings:      sample time =    16.48 ms /    36 runs   (    0.46 ms per token,  2183.80 tokens per second)
llama_print_timings: prompt eval time =   623.86 ms /    74 tokens (    8.43 ms per token,   118.62 tokens per second)
llama_print_timings:        eval time =  2968.69 ms /    35 runs   (   84.82 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  3669.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

It is impossible to identify Zhiqi, as the breakthrough in research on matter deep structure is needed. But with high-energy accelerators turned into a pile of junk iron, how can research on such subjects be conducted? The hunter's eyes have been blurred by what he intends to shoot.



llama_print_timings:        load time =   644.15 ms
llama_print_timings:      sample time =    31.02 ms /    68 runs   (    0.46 ms per token,  2192.42 tokens per second)
llama_print_timings: prompt eval time =   644.11 ms /    85 tokens (    7.58 ms per token,   131.97 tokens per second)
llama_print_timings:        eval time =  5674.19 ms /    67 runs   (   84.69 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  6461.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Another way that humans can achieve their goal, said the industrial ruler, is by building more than enough accelerators to exceed the speed of our constructs and always ensure a correct answer from one of them.



llama_print_timings:        load time =   623.78 ms
llama_print_timings:      sample time =    19.84 ms /    44 runs   (    0.45 ms per token,  2217.52 tokens per second)
llama_print_timings: prompt eval time =   623.75 ms /    77 tokens (    8.10 ms per token,   123.45 tokens per second)
llama_print_timings:        eval time =  3667.11 ms /    43 runs   (   85.28 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  4381.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is the most interesting thing about the ZhiZi project!” The science ruler got excited. “Mr. Industrial Ruler, you don't need to worry that manufacturing large amounts of ZhiZis will collapse the economy of the three-dimensional world. We won’t do this at all, and perhaps we may even build a few more. But it is enough to have just these two because each one acts in a multiprocessor manner.”



llama_print_timings:        load time =   675.86 ms
llama_print_timings:      sample time =    43.93 ms /    99 runs   (    0.44 ms per token,  2253.79 tokens per second)
llama_print_timings: prompt eval time =   675.82 ms /   105 tokens (    6.44 ms per token,   155.37 tokens per second)
llama_print_timings:        eval time =  8307.44 ms /    98 runs   (   84.77 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =  9187.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Multi-threading?



llama_print_timings:        load time =   579.04 ms
llama_print_timings:      sample time =     2.96 ms /     6 runs   (    0.49 ms per token,  2023.61 tokens per second)
llama_print_timings: prompt eval time =   579.00 ms /    35 tokens (   16.54 ms per token,    60.45 tokens per second)
llama_print_timings:        eval time =   410.23 ms /     5 runs   (   82.05 ms per token,    12.19 tokens per second)
llama_print_timings:       total time =  1003.64 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“This is an old term for a serial computer. At that time, the central processor could only run a single program at a time, but due to its fast speed and interrupt dispatching, it was perceived by observers at the low-speed level as if the computer were simultaneously running multiple programs.”



llama_print_timings:        load time =   748.17 ms
llama_print_timings:      sample time =    27.94 ms /    63 runs   (    0.44 ms per token,  2254.99 tokens per second)
llama_print_timings: prompt eval time =   748.13 ms /   148 tokens (    5.05 ms per token,   197.83 tokens per second)
llama_print_timings:        eval time =  5324.05 ms /    62 runs   (   85.87 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  6202.29 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They have calculated that each of them can control up to ten thousand high-energy accelerators. It takes about five years for humans to build such an accelerator, and from an economic or resource point of view, it is also not possible to build in large quantities. Of course, they can increase the distance between them by building them on their own planets throughout their galaxy, which would effectively disrupt their parallel operation, but even if ten more Trisolaran trisolarans were created in that planetary system over a long period of time, Earth scientists will never be able to see the secrets of matter at a deeper level. The control of microscopic dimensions by humans will remain constrained at five-dimensional levels or lower, no matter how much time passes, their science would remain at a primitive stage and they would never be able to free themselves from this lock with their own efforts alone.



llama_print_timings:        load time =   922.91 ms
llama_print_timings:      sample time =    84.85 ms /   187 runs   (    0.45 ms per token,  2203.86 tokens per second)
llama_print_timings: prompt eval time =   922.86 ms /   239 tokens (    3.86 ms per token,   258.98 tokens per second)
llama_print_timings:        eval time = 16302.85 ms /   186 runs   (   87.65 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 17622.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I'm sorry for my insult to the Zhixi Project in the past. This is so wonderful!



llama_print_timings:        load time =   593.50 ms
llama_print_timings:      sample time =    10.60 ms /    24 runs   (    0.44 ms per token,  2264.58 tokens per second)
llama_print_timings: prompt eval time =   593.47 ms /    53 tokens (   11.20 ms per token,    89.31 tokens per second)
llama_print_timings:        eval time =  1976.12 ms /    23 runs   (   85.92 ms per token,    11.64 tokens per second)
llama_print_timings:       total time =  2618.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In fact, there are only three accelerators on Earth that can reach the level of research required for groundbreaking achievements. The two intelligent agents arrived on Earth and were virtually unemployed for most of their working capacity. To make better use of their abilities, we scheduled other tasks for them in addition to interfering with their accelerators. They will be the main technological means for carrying out the miracles plan.



llama_print_timings:        load time =   686.88 ms
llama_print_timings:      sample time =    42.61 ms /    93 runs   (    0.46 ms per token,  2182.74 tokens per second)
llama_print_timings: prompt eval time =   686.84 ms /   102 tokens (    6.73 ms per token,   148.51 tokens per second)
llama_print_timings:        eval time =  7867.20 ms /    92 runs   (   85.51 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  8752.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Is Zhizi capable of producing miracles?



llama_print_timings:        load time =   574.66 ms
llama_print_timings:      sample time =     5.45 ms /    12 runs   (    0.45 ms per token,  2201.43 tokens per second)
llama_print_timings: prompt eval time =   574.62 ms /    38 tokens (   15.12 ms per token,    66.13 tokens per second)
llama_print_timings:        eval time =   965.30 ms /    11 runs   (   87.75 ms per token,    11.40 tokens per second)
llama_print_timings:       total time =  1565.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

If you look at the universe, it's easy to see that a high energy particle can make film sensitive. This is one of the ways that primitive accelerators used to make individual particles visible on film. Intelligent beings in high energy state cross each film multiple times per second, producing a dot where each time they pass. These dots are then linked into letters or numbers, even pictures. The speed at which this happens is much faster than the speed of photography with film. In addition, the human retina resembles that of the三体人; hence, intelligent beings can make letters, numbers, or graphics on their retinas using the same method as above… If all these small miracles can lead to confusion and fear among terrestrial humans, then the next giant miracle will frighten away any scientists who study universe background radiation: intelligent beings can cause them to flash.



llama_print_timings:        load time =   866.13 ms
llama_print_timings:      sample time =    83.72 ms /   189 runs   (    0.44 ms per token,  2257.53 tokens per second)
llama_print_timings: prompt eval time =   866.07 ms /   210 tokens (    4.12 ms per token,   242.48 tokens per second)
llama_print_timings:        eval time = 16624.95 ms /   188 runs   (   88.43 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 17881.37 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This also frightens our scientists. How can we do that?



llama_print_timings:        load time =   582.83 ms
llama_print_timings:      sample time =     6.98 ms /    15 runs   (    0.47 ms per token,  2149.61 tokens per second)
llama_print_timings: prompt eval time =   582.80 ms /    41 tokens (   14.21 ms per token,    70.35 tokens per second)
llama_print_timings:        eval time =  1248.92 ms /    14 runs   (   89.21 ms per token,    11.21 tokens per second)
llama_print_timings:       total time =  1862.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“It is very simple, we have compiled software that enables the intelligent children to expand into two dimensions on their own. After the expansion is complete, a huge plane covers the earth. The software can also make the expanded plane transparent in the cosmic background radiation band, which can be adjusted… of course, the intelligent children expanding in various dimensions can display spectacular ‘miracles’, corresponding software is being developed as well. These ‘miracles’ will create a powerful atmospheric pressure on earth world science thoughts other than physics, so we can use them to restrict the development of scientific ideas beyond physics with ‘the miracle project’.”



llama_print_timings:        load time =   769.52 ms
llama_print_timings:      sample time =    61.03 ms /   138 runs   (    0.44 ms per token,  2261.33 tokens per second)
llama_print_timings: prompt eval time =   769.46 ms /   155 tokens (    4.96 ms per token,   201.44 tokens per second)
llama_print_timings:        eval time = 11640.20 ms /   137 runs   (   84.96 ms per token,    11.77 tokens per second)
llama_print_timings:       total time = 12693.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“The last question: Why not send all four of the existing jewels to Earth?”



llama_print_timings:        load time =   579.50 ms
llama_print_timings:      sample time =     9.32 ms /    20 runs   (    0.47 ms per token,  2146.84 tokens per second)
llama_print_timings: prompt eval time =   579.46 ms /    47 tokens (   12.33 ms per token,    81.11 tokens per second)
llama_print_timings:        eval time =  1641.63 ms /    19 runs   (   86.40 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2265.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Quantum entanglement is superluminal, even if the two quantum systems are separated in different parts of the universe, entanglement can still be instantaneous. The quantum array created by these three and four-dimensional minds also exist in real time to receive information from Earth 1 and 2 from here. At the same time, it allows for the monitoring of the entirety of the trilateral world on earth in real time. And, it is also possible to have instantaneous communication with extraterrestrial beings in the human civilization.



llama_print_timings:        load time =   704.30 ms
llama_print_timings:      sample time =    52.57 ms /   117 runs   (    0.45 ms per token,  2225.73 tokens per second)
llama_print_timings: prompt eval time =   704.25 ms /   125 tokens (    5.63 ms per token,   177.49 tokens per second)
llama_print_timings:        eval time =  9928.53 ms /   116 runs   (   85.59 ms per token,    11.68 tokens per second)
llama_print_timings:       total time = 10877.80 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Here is an important strategic step,” the supreme commander said, “We will inform Earthlings of the true intentions of the Three-Body World through a Matrix array. ”



llama_print_timings:        load time =   607.12 ms
llama_print_timings:      sample time =    17.56 ms /    39 runs   (    0.45 ms per token,  2220.70 tokens per second)
llama_print_timings: prompt eval time =   607.07 ms /    63 tokens (    9.64 ms per token,   103.78 tokens per second)
llama_print_timings:        eval time =  3211.97 ms /    38 runs   (   84.53 ms per token,    11.83 tokens per second)
llama_print_timings:       total time =  3899.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That is to say, we will tell them that the Three-Body fleet will permanently forbid human procreation so that this species can disappear from Earth?



llama_print_timings:        load time =   597.83 ms
llama_print_timings:      sample time =    15.02 ms /    34 runs   (    0.44 ms per token,  2264.25 tokens per second)
llama_print_timings: prompt eval time =   597.80 ms /    55 tokens (   10.87 ms per token,    92.00 tokens per second)
llama_print_timings:        eval time =  2709.75 ms /    33 runs   (   82.11 ms per token,    12.18 tokens per second)
llama_print_timings:       total time =  3375.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yes, there are two possible outcomes: either the Earthlings abandon all their illusions and wage a decisive war against us, or they fall into despair and despair in their society and collapse. Based on the information we have received about Earth civilization so far, we believe that the latter possibility is more likely.



llama_print_timings:        load time =   649.05 ms
llama_print_timings:      sample time =    30.72 ms /    69 runs   (    0.45 ms per token,  2246.24 tokens per second)
llama_print_timings: prompt eval time =   649.01 ms /    84 tokens (    7.73 ms per token,   129.43 tokens per second)
llama_print_timings:        eval time =  5978.82 ms /    68 runs   (   87.92 ms per token,    11.37 tokens per second)
llama_print_timings:       total time =  6770.41 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Suddenly, the rising sun disappeared below the horizon and dawn turned into dusk. The third cosmos was in chaos again.



llama_print_timings:        load time =   602.82 ms
llama_print_timings:      sample time =    13.59 ms /    30 runs   (    0.45 ms per token,  2207.99 tokens per second)
llama_print_timings: prompt eval time =   602.78 ms /    59 tokens (   10.22 ms per token,    97.88 tokens per second)
llama_print_timings:        eval time =  2415.31 ms /    29 runs   (   83.29 ms per token,    12.01 tokens per second)
llama_print_timings:       total time =  3080.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   581.86 ms
llama_print_timings:      sample time =     3.09 ms /     7 runs   (    0.44 ms per token,  2264.64 tokens per second)
llama_print_timings: prompt eval time =   581.83 ms /    45 tokens (   12.93 ms per token,    77.34 tokens per second)
llama_print_timings:        eval time =   503.36 ms /     6 runs   (   83.89 ms per token,    11.92 tokens per second)
llama_print_timings:       total time =  1100.22 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As Ye Wenjie was reading the information about the Three-Body World, the Operations Center held a second important meeting to study the stolen information. Before the meeting began, General Chang Weisi warned the comrades present: “Please pay attention. Our meeting may already be under surveillance by Yisuo. From now on, any secrets will no longer exist.”



llama_print_timings:        load time =   659.16 ms
llama_print_timings:      sample time =    37.31 ms /    83 runs   (    0.45 ms per token,  2224.72 tokens per second)
llama_print_timings: prompt eval time =   659.12 ms /    93 tokens (    7.09 ms per token,   141.10 tokens per second)
llama_print_timings:        eval time =  6987.82 ms /    82 runs   (   85.22 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  7816.97 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

He said this while the familiar surroundings still stood before him, and the drawn curtains shook in the summer shadows. But to all the guests present, it was clear that this world had changed, for they felt an omnipresent eye boring into them, one that would permeate their lives for years, even generations to come, forcing humanity to adapt mentally to a new situation.



llama_print_timings:        load time =   695.61 ms
llama_print_timings:      sample time =    38.29 ms /    86 runs   (    0.45 ms per token,  2246.19 tokens per second)
llama_print_timings: prompt eval time =   695.57 ms /   116 tokens (    6.00 ms per token,   166.77 tokens per second)
llama_print_timings:        eval time =  7296.25 ms /    85 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  8167.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the three seconds after Chang Wenzhi said this, the people of Three Body World and humans outside the Rebellion made their first contact. After this, they interrupted their communication with the Rebel forces of Earth Three Body and never sent any messages again throughout all of their lives. At that moment, everyone in the Operations Center saw that message, just as Wang Mu did when he looked at the countdown. The message only lasted for less than two seconds before disappearing, but everyone accurately read its contents: it contained only five characters—



llama_print_timings:        load time =   750.20 ms
llama_print_timings:      sample time =    51.65 ms /   115 runs   (    0.45 ms per token,  2226.61 tokens per second)
llama_print_timings: prompt eval time =   750.15 ms /   139 tokens (    5.40 ms per token,   185.30 tokens per second)
llama_print_timings:        eval time =  9886.85 ms /   114 runs   (   86.73 ms per token,    11.53 tokens per second)
llama_print_timings:       total time = 10877.57 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

You are bugs!



llama_print_timings:        load time =   574.31 ms
llama_print_timings:      sample time =     2.25 ms /     5 runs   (    0.45 ms per token,  2221.24 tokens per second)
llama_print_timings: prompt eval time =   574.28 ms /    34 tokens (   16.89 ms per token,    59.20 tokens per second)
llama_print_timings:        eval time =   334.02 ms /     4 runs   (   83.50 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =   918.39 ms


translated 187.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

“After looking at those, you must have thought of the age when you made your greatest discoveries,” Wang Miao said to Ding Yi, who stood in their spacious and open living room. The billiards table was placed next to them.



llama_print_timings:        load time =   645.50 ms
llama_print_timings:      sample time =    24.35 ms /    55 runs   (    0.44 ms per token,  2258.73 tokens per second)
llama_print_timings: prompt eval time =   645.47 ms /    88 tokens (    7.33 ms per token,   136.33 tokens per second)
llama_print_timings:        eval time =  4657.90 ms /    54 runs   (   86.26 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  5412.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yeah, I’ve been working on the theory of macro-atoms, and now I’m inspired. Macro-atoms are probably ordinary atoms unfolded in low dimensions. The unfolding was caused by an unknown natural force, which took place shortly after the big bang or it may still be taking place currently. Maybe all atoms in this universe eventually expand into low dimensionality and our ultimate destination is a macrocosm composed of low-dimensional atomic matter which can also be understood as a process of entropy growth… I thought that the discovery of macro-atoms could bring a breakthrough to physics, but now it seems they’re not so remarkable after all.”



llama_print_timings:        load time =   784.29 ms
llama_print_timings:      sample time =    63.95 ms /   143 runs   (    0.45 ms per token,  2236.16 tokens per second)
llama_print_timings: prompt eval time =   784.24 ms /   168 tokens (    4.67 ms per token,   214.22 tokens per second)
llama_print_timings:        eval time = 12252.00 ms /   142 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time = 13327.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Why not? Since we can capture and study the fundamental structure of matter from macroscopic atomic nuclei, why can't we do the same with microscopic subatomic particles?”



llama_print_timings:        load time =   601.16 ms
llama_print_timings:      sample time =    18.28 ms /    41 runs   (    0.45 ms per token,  2243.38 tokens per second)
llama_print_timings: prompt eval time =   601.13 ms /    62 tokens (    9.70 ms per token,   103.14 tokens per second)
llama_print_timings:        eval time =  3374.42 ms /    40 runs   (   84.36 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  4057.38 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“At first, ” said Ding Yi from his study as he carried a delicate silver-rimmed photo frame in his hand, “it seemed ridiculous. ” He bent down and picked up a cigarette butt from the messy floor. “But let's consider this filtration canister instead, ” he continued. “We have discussed its two-dimensional area expansion, which is as big as our living room, but if you try to research its three-dimensional structure on that plane, it would be impossible. Clearly, the information about the three-dimensional structures would disappear during the process of unfolding. Just like when the cup breaks apart, you cannot reconstruct the original shape.” Ding Yi paused for a moment before adding, “That's why multibody scientists are so brilliant – they can preserve the information about the high-dimensional structure while unfolding it simultaneously on a two-dimensional plane. That is, we have to start from the ten-dimensional microscopic dimension to study the underlying material structures. In ot


llama_print_timings:        load time =   915.38 ms
llama_print_timings:      sample time =   106.55 ms /   236 runs   (    0.45 ms per token,  2215.01 tokens per second)
llama_print_timings: prompt eval time =   915.33 ms /   240 tokens (    3.81 ms per token,   262.20 tokens per second)
llama_print_timings:        eval time = 20724.05 ms /   235 runs   (   88.19 ms per token,    11.34 tokens per second)
llama_print_timings:       total time = 22133.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi asked Wang Miao to look at the photo. In the photo, a young beautiful female lieutenant colonel stood among a group of children. She looked clear-eyed and gave off an enthralling smile. She was standing on a well-manicured green lawn with some white small animals behind her, next to a very high and industrial-looking building with vividly colored cartoon animals painted on the walls and balloons, flowers and other decorative items hanging from its upper part.



llama_print_timings:        load time =   712.15 ms
llama_print_timings:      sample time =    48.62 ms /   109 runs   (    0.45 ms per token,  2241.83 tokens per second)
llama_print_timings: prompt eval time =   712.12 ms /   116 tokens (    6.14 ms per token,   162.89 tokens per second)
llama_print_timings:        eval time =  9318.31 ms /   108 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time = 10254.40 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"You met Yang Dong before?" Wang Sen looked at the photo. "Your life is so rich."



llama_print_timings:        load time =   584.79 ms
llama_print_timings:      sample time =    10.68 ms /    23 runs   (    0.46 ms per token,  2154.16 tokens per second)
llama_print_timings: prompt eval time =   584.75 ms /    48 tokens (   12.18 ms per token,    82.09 tokens per second)
llama_print_timings:        eval time =  1869.29 ms /    22 runs   (   84.97 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  2502.36 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She is Lin Yun, who made a significant contribution to the research on ball lightning and discovery of giant atomic molecules. Without her, there would be no discovery.



llama_print_timings:        load time =   605.98 ms
llama_print_timings:      sample time =    16.35 ms /    36 runs   (    0.45 ms per token,  2202.10 tokens per second)
llama_print_timings: prompt eval time =   605.93 ms /    61 tokens (    9.93 ms per token,   100.67 tokens per second)
llama_print_timings:        eval time =  2924.90 ms /    35 runs   (   83.57 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =  3606.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I don't know her.



llama_print_timings:        load time =   579.78 ms
llama_print_timings:      sample time =     3.49 ms /     8 runs   (    0.44 ms per token,  2292.26 tokens per second)
llama_print_timings: prompt eval time =   579.74 ms /    36 tokens (   16.10 ms per token,    62.10 tokens per second)
llama_print_timings:        eval time =   569.08 ms /     7 runs   (   81.30 ms per token,    12.30 tokens per second)
llama_print_timings:       total time =  1164.51 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Yeah, because there are some things you don't know about… But I always feel it isn't fair to her.



llama_print_timings:        load time =   590.45 ms
llama_print_timings:      sample time =    12.98 ms /    29 runs   (    0.45 ms per token,  2233.35 tokens per second)
llama_print_timings: prompt eval time =   590.40 ms /    50 tokens (   11.81 ms per token,    84.69 tokens per second)
llama_print_timings:        eval time =  2377.80 ms /    28 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  3027.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

She is where?



llama_print_timings:        load time =   567.02 ms
llama_print_timings:      sample time =     2.21 ms /     5 runs   (    0.44 ms per token,  2266.55 tokens per second)
llama_print_timings: prompt eval time =   567.00 ms /    36 tokens (   15.75 ms per token,    63.49 tokens per second)
llama_print_timings:        eval time =   334.46 ms /     4 runs   (   83.61 ms per token,    11.96 tokens per second)
llama_print_timings:       total time =   911.12 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“In a place, or some places, oh how nice it would be if she could show up.”



llama_print_timings:        load time =   586.64 ms
llama_print_timings:      sample time =     9.72 ms /    22 runs   (    0.44 ms per token,  2263.37 tokens per second)
llama_print_timings: prompt eval time =   586.60 ms /    52 tokens (   11.28 ms per token,    88.65 tokens per second)
llama_print_timings:        eval time =  1794.45 ms /    21 runs   (   85.45 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  2424.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But Wang Miao didn't take it seriously. He looked at the female in the picture but was not interested and handed her back the photo frame saying, “It doesn't matter anyway.”



llama_print_timings:        load time =   632.86 ms
llama_print_timings:      sample time =    18.06 ms /    41 runs   (    0.44 ms per token,  2270.71 tokens per second)
llama_print_timings: prompt eval time =   632.82 ms /    73 tokens (    8.67 ms per token,   115.36 tokens per second)
llama_print_timings:        eval time =  3422.77 ms /    40 runs   (   85.57 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4137.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Yeah, it doesn’t matter anymore.” Ding Yi put the frame on a billiard table and looked at him. She reached for the bottle of wine standing in the corner. …



llama_print_timings:        load time =   620.12 ms
llama_print_timings:      sample time =    20.15 ms /    45 runs   (    0.45 ms per token,  2233.36 tokens per second)
llama_print_timings: prompt eval time =   620.07 ms /    66 tokens (    9.40 ms per token,   106.44 tokens per second)
llama_print_timings:        eval time =  3783.67 ms /    44 runs   (   85.99 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  4496.65 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When Shi Qiang pushed the door open, both of them were already very drunk. They saw that it was Shi Zhi and they were excited. Wang Moxian stood up to wrap his arms around the stranger's shoulders, "Ah, big brother Shi! ..." Ting Yijun staggered a bit as he looked for a cup on the billiard table and poured drinks into it for him.



llama_print_timings:        load time =   705.75 ms
llama_print_timings:      sample time =    42.06 ms /    94 runs   (    0.45 ms per token,  2234.74 tokens per second)
llama_print_timings: prompt eval time =   705.72 ms /   123 tokens (    5.74 ms per token,   174.29 tokens per second)
llama_print_timings:        eval time =  8023.47 ms /    93 runs   (   86.27 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  8923.50 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$$Big Story sat down at the billiards table and looked with two bright eyes at the two people: "Is everything really over like you said? Nothing's missing?"$



llama_print_timings:        load time =   612.46 ms
llama_print_timings:      sample time =    16.78 ms /    38 runs   (    0.44 ms per token,  2265.01 tokens per second)
llama_print_timings: prompt eval time =   612.42 ms /    60 tokens (   10.21 ms per token,    97.97 tokens per second)
llama_print_timings:        eval time =  3218.92 ms /    37 runs   (   87.00 ms per token,    11.49 tokens per second)
llama_print_timings:       total time =  3908.09 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, everything is fine.



llama_print_timings:        load time =   574.54 ms
llama_print_timings:      sample time =     3.59 ms /     8 runs   (    0.45 ms per token,  2229.65 tokens per second)
llama_print_timings: prompt eval time =   574.50 ms /    36 tokens (   15.96 ms per token,    62.66 tokens per second)
llama_print_timings:        eval time =   586.04 ms /     7 runs   (   83.72 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  1176.34 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"If the accelerator doesn't work and we can't study matter, then what are we going to do?”



llama_print_timings:        load time =   586.38 ms
llama_print_timings:      sample time =    11.48 ms /    26 runs   (    0.44 ms per token,  2265.60 tokens per second)
llama_print_timings: prompt eval time =   586.34 ms /    45 tokens (   13.03 ms per token,    76.75 tokens per second)
llama_print_timings:        eval time =  2159.87 ms /    25 runs   (   86.39 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =  2798.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"And you -- what about you?"



llama_print_timings:        load time =   580.72 ms
llama_print_timings:      sample time =     4.00 ms /     9 runs   (    0.44 ms per token,  2247.75 tokens per second)
llama_print_timings: prompt eval time =   580.68 ms /    37 tokens (   15.69 ms per token,    63.72 tokens per second)
llama_print_timings:        eval time =   683.79 ms /     8 runs   (   85.47 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =  1282.60 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Technology is still progressing, and Dr. Wang's team has developed nanomaterials...


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Imagine a kingdom in ancient times that is also progressing technologically, making better weapons like swords and spears for their soldiers, even the possibility of developing machine guns, but. . . 🤔



llama_print_timings:        load time =   623.76 ms
llama_print_timings:      sample time =    20.73 ms /    47 runs   (    0.44 ms per token,  2267.79 tokens per second)
llama_print_timings: prompt eval time =   623.72 ms /    75 tokens (    8.32 ms per token,   120.25 tokens per second)
llama_print_timings:        eval time =  3935.76 ms /    46 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  4655.15 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

But if they don't know that matter is composed of atoms and molecules, they will never be able to create missiles or satellites, because the level of science would limit them.



llama_print_timings:        load time =   603.71 ms
llama_print_timings:      sample time =    18.48 ms /    41 runs   (    0.45 ms per token,  2218.97 tokens per second)
llama_print_timings: prompt eval time =   603.69 ms /    64 tokens (    9.43 ms per token,   106.01 tokens per second)
llama_print_timings:        eval time =  3352.76 ms /    40 runs   (   83.82 ms per token,    11.93 tokens per second)
llama_print_timings:       total time =  4040.77 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I've always known that Officer Shi is a very smart person, but watching you...



llama_print_timings:        load time =   600.69 ms
llama_print_timings:      sample time =     9.59 ms /    20 runs   (    0.48 ms per token,  2085.94 tokens per second)
llama_print_timings: prompt eval time =   600.63 ms /    54 tokens (   11.12 ms per token,    89.90 tokens per second)
llama_print_timings:        eval time =  1634.78 ms /    19 runs   (   86.04 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =  2282.27 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Then Wang Sen continued, “The study of material substructure is the basis for all other scientific research. If this does not advance, then everything will be an illusion. In your words: nonsense.”



llama_print_timings:        load time =   614.54 ms
llama_print_timings:      sample time =    19.62 ms /    44 runs   (    0.45 ms per token,  2242.15 tokens per second)
llama_print_timings: prompt eval time =   614.52 ms /    63 tokens (    9.75 ms per token,   102.52 tokens per second)
llama_print_timings:        eval time =  3669.43 ms /    43 runs   (   85.34 ms per token,    11.72 tokens per second)
llama_print_timings:       total time =  4374.98 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ding Yi pointed at Wang Sen and said, "Wang Sir, you won't be idle for the rest of your life. You will continue to improve these weapons. I fucking don't know what I am going to do in the future!". Then he threw an empty bottle on the table, picked up a billiard ball and aimed at Wang Sen, who jumped aside and avoided the attack.



llama_print_timings:        load time =   651.60 ms
llama_print_timings:      sample time =    41.34 ms /    91 runs   (    0.45 ms per token,  2201.42 tokens per second)
llama_print_timings: prompt eval time =   651.56 ms /    88 tokens (    7.40 ms per token,   135.06 tokens per second)
llama_print_timings:        eval time =  7682.59 ms /    90 runs   (   85.36 ms per token,    11.71 tokens per second)
llama_print_timings:       total time =  8521.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That's great!" said Wang Mo, lifting up his wine cup. "Now we can spend the rest of our lives in dissipation and depravity without any shame! We are worms. Exterminating insects that will soon become extinct, haha…."



llama_print_timings:        load time =   641.93 ms
llama_print_timings:      sample time =    28.23 ms /    62 runs   (    0.46 ms per token,  2196.09 tokens per second)
llama_print_timings: prompt eval time =   641.89 ms /    80 tokens (    8.02 ms per token,   124.63 tokens per second)
llama_print_timings:        eval time =  5265.98 ms /    61 runs   (   86.33 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  6038.61 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"That's great! "Ding Yi also lifted his cup, "Cheers to the bug! I never thought that the end of the world would be so much fun. To the Bug God and to your future son! Long live the End Time!"



llama_print_timings:        load time =   635.93 ms
llama_print_timings:      sample time =    24.66 ms /    55 runs   (    0.45 ms per token,  2229.97 tokens per second)
llama_print_timings: prompt eval time =   635.89 ms /    75 tokens (    8.48 ms per token,   117.94 tokens per second)
llama_print_timings:        eval time =  4672.39 ms /    54 runs   (   86.53 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =  5420.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Shaking his head, he finished the cup of wine in front of him and then shook his head again, “Muddy bear.”



llama_print_timings:        load time =   602.24 ms
llama_print_timings:      sample time =    13.88 ms /    29 runs   (    0.48 ms per token,  2089.19 tokens per second)
llama_print_timings: prompt eval time =   602.20 ms /    55 tokens (   10.95 ms per token,    91.33 tokens per second)
llama_print_timings:        eval time =  2415.36 ms /    28 runs   (   86.26 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  3084.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"What do you want?" Ding Yi stared at David with her intoxicated eyes. "Can you lift our spirits up?"



llama_print_timings:        load time =   597.71 ms
llama_print_timings:      sample time =    13.27 ms /    30 runs   (    0.44 ms per token,  2260.06 tokens per second)
llama_print_timings: prompt eval time =   597.69 ms /    54 tokens (   11.07 ms per token,    90.35 tokens per second)
llama_print_timings:        eval time =  2466.12 ms /    29 runs   (   85.04 ms per token,    11.76 tokens per second)
llama_print_timings:       total time =  3123.74 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$$Stand up and go.$$



llama_print_timings:        load time =   578.13 ms
llama_print_timings:      sample time =     3.77 ms /     8 runs   (    0.47 ms per token,  2123.14 tokens per second)
llama_print_timings: prompt eval time =   578.09 ms /    39 tokens (   14.82 ms per token,    67.46 tokens per second)
llama_print_timings:        eval time =   606.45 ms /     7 runs   (   86.64 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  1201.52 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Where are you going?”



llama_print_timings:        load time =   573.01 ms
llama_print_timings:      sample time =     3.33 ms /     7 runs   (    0.48 ms per token,  2100.21 tokens per second)
llama_print_timings: prompt eval time =   572.97 ms /    33 tokens (   17.36 ms per token,    57.59 tokens per second)
llama_print_timings:        eval time =   500.01 ms /     6 runs   (   83.34 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  1088.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Be cheerful.”



llama_print_timings:        load time =   571.25 ms
llama_print_timings:      sample time =     2.70 ms /     6 runs   (    0.45 ms per token,  2221.40 tokens per second)
llama_print_timings: prompt eval time =   571.23 ms /    36 tokens (   15.87 ms per token,    63.02 tokens per second)
llama_print_timings:        eval time =   424.33 ms /     5 runs   (   84.87 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  1007.19 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sit down and drink.



llama_print_timings:        load time =   579.62 ms
llama_print_timings:      sample time =     3.06 ms /     7 runs   (    0.44 ms per token,  2286.83 tokens per second)
llama_print_timings: prompt eval time =   579.58 ms /    40 tokens (   14.49 ms per token,    69.02 tokens per second)
llama_print_timings:        eval time =   514.77 ms /     6 runs   (   85.79 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  1108.06 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They were dragged along by the big man, "Come on. If we don't leave now, we'll be late."



llama_print_timings:        load time =   592.11 ms
llama_print_timings:      sample time =    12.80 ms /    29 runs   (    0.44 ms per token,  2266.33 tokens per second)
llama_print_timings: prompt eval time =   592.07 ms /    51 tokens (   11.61 ms per token,    86.14 tokens per second)
llama_print_timings:        eval time =  2402.34 ms /    28 runs   (   85.80 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3052.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After getting down from the building, three people got into the car driven by Dasheng. While the car was moving, Wangmiao asked where they were going and Dasheng replied, “My hometown, not far.”



llama_print_timings:        load time =   619.99 ms
llama_print_timings:      sample time =    21.74 ms /    49 runs   (    0.44 ms per token,  2254.01 tokens per second)
llama_print_timings: prompt eval time =   619.95 ms /    67 tokens (    9.25 ms per token,   108.07 tokens per second)
llama_print_timings:        eval time =  3999.52 ms /    48 runs   (   83.32 ms per token,    12.00 tokens per second)
llama_print_timings:       total time =  4719.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The car drove out of the city and sped westward along the Beijing-Xi'an Expressway. Just when it entered Hebei Province, Dai Shiji stopped the car and dragged them two men outside. Both Dingyi and Wangmou closed their eyes in the bright afternoon sun, with vast wheat fields spread out before them on the North China Plain.



llama_print_timings:        load time =   677.14 ms
llama_print_timings:      sample time =    37.25 ms /    82 runs   (    0.45 ms per token,  2201.52 tokens per second)
llama_print_timings: prompt eval time =   677.09 ms /    99 tokens (    6.84 ms per token,   146.21 tokens per second)
llama_print_timings:        eval time =  6941.58 ms /    81 runs   (   85.70 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  7788.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"I brought us here because I wanted to show you the beauty of nature," Wang Mu said.



llama_print_timings:        load time =   578.68 ms
llama_print_timings:      sample time =     9.36 ms /    21 runs   (    0.45 ms per token,  2243.35 tokens per second)
llama_print_timings: prompt eval time =   578.64 ms /    45 tokens (   12.86 ms per token,    77.77 tokens per second)
llama_print_timings:        eval time =  1751.38 ms /    20 runs   (   87.57 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =  2371.93 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Look at the bugs," said Colonel Stanton, pointing to the wheat fields in front of him with his cigar.



llama_print_timings:        load time =   608.36 ms
llama_print_timings:      sample time =    12.01 ms /    27 runs   (    0.44 ms per token,  2249.06 tokens per second)
llama_print_timings: prompt eval time =   608.32 ms /    61 tokens (    9.97 ms per token,   100.28 tokens per second)
llama_print_timings:        eval time =  2227.24 ms /    26 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  2890.70 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

They realized that the fields were covered with a thick layer of locusts, with several bugs per stem and more moving on the ground like a viscous liquid.



llama_print_timings:        load time =   648.55 ms
llama_print_timings:      sample time =    15.71 ms /    35 runs   (    0.45 ms per token,  2227.45 tokens per second)
llama_print_timings: prompt eval time =   648.51 ms /    81 tokens (    8.01 ms per token,   124.90 tokens per second)
llama_print_timings:        eval time =  2848.25 ms /    34 runs   (   83.77 ms per token,    11.94 tokens per second)
llama_print_timings:       total time =  3569.33 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Is there a locust plague here?” Wang Miao drives away a small area of grasshoppers on the field and then sits down.



llama_print_timings:        load time =   611.11 ms
llama_print_timings:      sample time =    14.42 ms /    32 runs   (    0.45 ms per token,  2218.68 tokens per second)
llama_print_timings: prompt eval time =   611.07 ms /    57 tokens (   10.72 ms per token,    93.28 tokens per second)
llama_print_timings:        eval time =  2621.70 ms /    31 runs   (   84.57 ms per token,    11.82 tokens per second)
llama_print_timings:       total time =  3299.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Sandstorms like this have been around for a decade, but this one is the worst.



llama_print_timings:        load time =   577.07 ms
llama_print_timings:      sample time =    10.57 ms /    23 runs   (    0.46 ms per token,  2176.79 tokens per second)
llama_print_timings: prompt eval time =   577.04 ms /    45 tokens (   12.82 ms per token,    77.98 tokens per second)
llama_print_timings:        eval time =  1889.21 ms /    22 runs   (   85.87 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =  2514.79 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“That doesn’t matter, ” said Ding Yi with a drunkenly indifferent air. “Everything is fine.”



llama_print_timings:        load time =   589.73 ms
llama_print_timings:      sample time =    13.25 ms /    30 runs   (    0.44 ms per token,  2265.01 tokens per second)
llama_print_timings: prompt eval time =   589.69 ms /    53 tokens (   11.13 ms per token,    89.88 tokens per second)
llama_print_timings:        eval time =  2456.53 ms /    29 runs   (   84.71 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  3108.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I just want to ask you a question: "Is the technology level gap between humans and Trisqueligors larger, or is it between humans and Primeval Hordelings?"



llama_print_timings:        load time =   605.09 ms
llama_print_timings:      sample time =    16.91 ms /    38 runs   (    0.45 ms per token,  2247.19 tokens per second)
llama_print_timings: prompt eval time =   605.06 ms /    61 tokens (    9.92 ms per token,   100.82 tokens per second)
llama_print_timings:        eval time =  3172.86 ms /    37 runs   (   85.75 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =  3857.83 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This problem is like a cold water splashed on the heads of two drunken scientists, who are staring at the pile of locusts in front of them, their expressions gradually becoming grave. In no time, both scientists understand what Prof. Dai's words mean.



llama_print_timings:        load time =   617.21 ms
llama_print_timings:      sample time =    27.83 ms /    62 runs   (    0.45 ms per token,  2227.65 tokens per second)
llama_print_timings: prompt eval time =   617.19 ms /    64 tokens (    9.64 ms per token,   103.70 tokens per second)
llama_print_timings:        eval time =  5195.17 ms /    61 runs   (   85.17 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =  5940.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1. How are you?



llama_print_timings:        load time =   586.62 ms
llama_print_timings:      sample time =     3.10 ms /     7 runs   (    0.44 ms per token,  2260.98 tokens per second)
llama_print_timings: prompt eval time =   586.58 ms /    45 tokens (   13.04 ms per token,    76.72 tokens per second)
llama_print_timings:        eval time =   482.88 ms /     6 runs   (   80.48 ms per token,    12.43 tokens per second)
llama_print_timings:       total time =  1083.72 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte





llama_print_timings:        load time =   824.93 ms
llama_print_timings:      sample time =     0.88 ms /     2 runs   (    0.44 ms per token,  2272.73 tokens per second)
llama_print_timings: prompt eval time =   824.88 ms /   190 tokens (    4.34 ms per token,   230.34 tokens per second)
llama_print_timings:        eval time =    85.45 ms /     1 runs   (   85.45 ms per token,    11.70 tokens per second)
llama_print_timings:       total time =   914.76 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The sun was blocked by a small patch of black clouds, and the shadow spread over the earth. It wasn't just an ordinary cloud; it was a group of locusts that had come in soon afterward. They started to land on nearby fields, and three people immersed themselves in life's rain shower, feeling the dignity of earthly life. Ding Yi and Wang Meng put their bottles of wine in their hands onto the ground of Hebei Plain, paying tribute to the locusts.



llama_print_timings:        load time =   697.62 ms
llama_print_timings:      sample time =    50.56 ms /   114 runs   (    0.44 ms per token,  2254.79 tokens per second)
llama_print_timings: prompt eval time =   697.58 ms /   119 tokens (    5.86 ms per token,   170.59 tokens per second)
llama_print_timings:        eval time =  9647.55 ms /   113 runs   (   85.38 ms per token,    11.71 tokens per second)
llama_print_timings:       total time = 10579.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“Thank you, David.” Wang Miao extended her hand to David.



llama_print_timings:        load time =   589.75 ms
llama_print_timings:      sample time =     7.48 ms /    16 runs   (    0.47 ms per token,  2139.32 tokens per second)
llama_print_timings: prompt eval time =   589.70 ms /    48 tokens (   12.29 ms per token,    81.40 tokens per second)
llama_print_timings:        eval time =  1301.52 ms /    15 runs   (   86.77 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  1925.23 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

“I also thank you, ” Ding Yi grabbed the other hand of big history.



llama_print_timings:        load time =   587.58 ms
llama_print_timings:      sample time =     9.91 ms /    22 runs   (    0.45 ms per token,  2220.20 tokens per second)
llama_print_timings: prompt eval time =   587.54 ms /    46 tokens (   12.77 ms per token,    78.29 tokens per second)
llama_print_timings:        eval time =  1790.54 ms /    21 runs   (   85.26 ms per token,    11.73 tokens per second)
llama_print_timings:       total time =  2425.43 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"Let's go back, there are so many things to do," said Wang Miao.



llama_print_timings:        load time =   583.44 ms
llama_print_timings:      sample time =     9.54 ms /    21 runs   (    0.45 ms per token,  2200.10 tokens per second)
llama_print_timings: prompt eval time =   583.42 ms /    48 tokens (   12.15 ms per token,    82.27 tokens per second)
llama_print_timings:        eval time =  1736.79 ms /    20 runs   (   86.84 ms per token,    11.52 tokens per second)
llama_print_timings:       total time =  2367.66 ms


translated 49.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

No one believed that Ye Wenjie could climb Radar Peak again by her own strength, but she still managed to do it without anyone's help. She only rested twice at an abandoned lookout post on the ridge in her journey.



llama_print_timings:        load time =   644.88 ms
llama_print_timings:      sample time =    24.85 ms /    55 runs   (    0.45 ms per token,  2213.37 tokens per second)
llama_print_timings: prompt eval time =   644.85 ms /    86 tokens (    7.50 ms per token,   133.36 tokens per second)
llama_print_timings:        eval time =  4582.02 ms /    54 runs   (   84.85 ms per token,    11.79 tokens per second)
llama_print_timings:       total time =  5339.69 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

After learning the truth about the Trisolarian civilization, Ye Wenjie remained silent and seldom spoke. The only request she made was to visit Red Bottom Base ruins.



llama_print_timings:        load time =   622.63 ms
llama_print_timings:      sample time =    19.11 ms /    42 runs   (    0.45 ms per token,  2198.38 tokens per second)
llama_print_timings: prompt eval time =   622.59 ms /    59 tokens (   10.55 ms per token,    94.76 tokens per second)
llama_print_timings:        eval time =  3481.57 ms /    41 runs   (   84.92 ms per token,    11.78 tokens per second)
llama_print_timings:       total time =  4192.35 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

When the group climbed to the top of Radar Peak, just as a glimpse of the peak was revealed in the cloudy atmosphere. The group traveled all day long and suddenly saw the brilliant sunshine and blue sky that radiated towards the west like entering another world. From the summit of the mountain, we could see the silver white clouds spreading below, resembling abstractly the features of the Great Xiang'an Mountain underneath.



llama_print_timings:        load time =   696.68 ms
llama_print_timings:      sample time =    45.56 ms /   101 runs   (    0.45 ms per token,  2216.61 tokens per second)
llama_print_timings: prompt eval time =   696.65 ms /   122 tokens (    5.71 ms per token,   175.12 tokens per second)
llama_print_timings:        eval time =  8749.51 ms /   100 runs   (   87.50 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  9656.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

As they walked around the site, they came across a few remaining pieces of rock with iron rust on them. It wasn't long before they realized that those seemingly innocent-looking rocks were in fact a massive metal base.



llama_print_timings:        load time =   704.16 ms
llama_print_timings:      sample time =    21.77 ms /    48 runs   (    0.45 ms per token,  2204.46 tokens per second)
llama_print_timings: prompt eval time =   704.11 ms /   122 tokens (    5.77 ms per token,   173.27 tokens per second)
llama_print_timings:        eval time =  4052.04 ms /    47 runs   (   86.21 ms per token,    11.60 tokens per second)
llama_print_timings:       total time =  4856.31 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

This is the base of the antenna, " said Ye Wenjie. "The first voice of human civilization that was heard by extraterrestrial worlds, was sent through this antenna to the sun, amplified and then broadcasted to the whole universe."



llama_print_timings:        load time =   644.66 ms
llama_print_timings:      sample time =    25.76 ms /    58 runs   (    0.44 ms per token,  2251.90 tokens per second)
llama_print_timings: prompt eval time =   644.61 ms /    80 tokens (    8.06 ms per token,   124.11 tokens per second)
llama_print_timings:        eval time =  4900.44 ms /    57 runs   (   85.97 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =  5663.17 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

People found a small stone tablet next to the base, it is almost completely covered by weeds, and reads:



llama_print_timings:        load time =   593.33 ms
llama_print_timings:      sample time =    12.20 ms /    26 runs   (    0.47 ms per token,  2131.50 tokens per second)
llama_print_timings: prompt eval time =   593.26 ms /    54 tokens (   10.99 ms per token,    91.02 tokens per second)
llama_print_timings:        eval time =  2166.32 ms /    25 runs   (   86.65 ms per token,    11.54 tokens per second)
llama_print_timings:       total time =  2817.16 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The original site of Hongbao Base.



llama_print_timings:        load time =   573.37 ms
llama_print_timings:      sample time =     4.45 ms /    10 runs   (    0.44 ms per token,  2248.20 tokens per second)
llama_print_timings: prompt eval time =   573.33 ms /    34 tokens (   16.86 ms per token,    59.30 tokens per second)
llama_print_timings:        eval time =   810.77 ms /     9 runs   (   90.09 ms per token,    11.10 tokens per second)
llama_print_timings:       total time =  1403.54 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

1968-1987



llama_print_timings:        load time =   627.89 ms
llama_print_timings:      sample time =     4.71 ms /    10 runs   (    0.47 ms per token,  2122.24 tokens per second)
llama_print_timings: prompt eval time =   627.85 ms /    40 tokens (   15.70 ms per token,    63.71 tokens per second)
llama_print_timings:        eval time =   770.91 ms /     9 runs   (   85.66 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  1420.25 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The Chinese Academy of Sciences



llama_print_timings:        load time =  1061.28 ms
llama_print_timings:      sample time =     2.79 ms /     6 runs   (    0.47 ms per token,  2149.00 tokens per second)
llama_print_timings: prompt eval time =  1061.22 ms /    30 tokens (   35.37 ms per token,    28.27 tokens per second)
llama_print_timings:        eval time =   461.77 ms /     5 runs   (   92.35 ms per token,    10.83 tokens per second)
llama_print_timings:       total time =  1536.78 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

March 21, 1989



llama_print_timings:        load time =   574.51 ms
llama_print_timings:      sample time =     5.31 ms /    12 runs   (    0.44 ms per token,  2259.04 tokens per second)
llama_print_timings: prompt eval time =   574.47 ms /    39 tokens (   14.73 ms per token,    67.89 tokens per second)
llama_print_timings:        eval time =   971.92 ms /    11 runs   (   88.36 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =  1571.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

The monument is so small that it's more like a way to erase the memory.



llama_print_timings:        load time =   579.48 ms
llama_print_timings:      sample time =     8.94 ms /    20 runs   (    0.45 ms per token,  2238.39 tokens per second)
llama_print_timings: prompt eval time =   579.44 ms /    44 tokens (   13.17 ms per token,    75.94 tokens per second)
llama_print_timings:        eval time =  1641.10 ms /    19 runs   (   86.37 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =  2262.02 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Ye Wenjie stood at the edge of the cliff. She had personally ended the lives of two soldiers here before. Unlike her fellow spies, she did not look up at the clouds but looked to a certain direction, where there was a small village called Qijiatun below……



llama_print_timings:        load time =   650.49 ms
llama_print_timings:      sample time =    28.89 ms /    64 runs   (    0.45 ms per token,  2215.30 tokens per second)
llama_print_timings: prompt eval time =   650.44 ms /    81 tokens (    8.03 ms per token,   124.53 tokens per second)
llama_print_timings:        eval time =  5318.48 ms /    63 runs   (   84.42 ms per token,    11.85 tokens per second)
llama_print_timings:       total time =  6103.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

叶文洁's heart was beating with difficulty, like a violin string that is about to break, black fog began appearing in her eyesight. She struggled desperately using the last moments of energy before everything turned dark forever, she wanted to see the sunset once again before the base fell into darkness. The setting sun in the western sky was slowly sinking into the clouds and blooming like a huge blood-red sky.



llama_print_timings:        load time =   745.52 ms
llama_print_timings:      sample time =    41.16 ms /    92 runs   (    0.45 ms per token,  2235.13 tokens per second)
llama_print_timings: prompt eval time =   745.49 ms /   130 tokens (    5.73 ms per token,   174.38 tokens per second)
llama_print_timings:        eval time =  7799.54 ms /    91 runs   (   85.71 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  8732.81 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

"This is the setting sun……" Yefanjie said gently.



llama_print_timings:        load time =   589.43 ms
llama_print_timings:      sample time =     8.41 ms /    19 runs   (    0.44 ms per token,  2260.29 tokens per second)
llama_print_timings: prompt eval time =   589.39 ms /    44 tokens (   13.40 ms per token,    74.65 tokens per second)
llama_print_timings:        eval time =  1574.40 ms /    18 runs   (   87.47 ms per token,    11.43 tokens per second)
llama_print_timings:       total time =  2202.21 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$(1)$



llama_print_timings:        load time =   576.61 ms
llama_print_timings:      sample time =     1.86 ms /     4 runs   (    0.47 ms per token,  2150.54 tokens per second)
llama_print_timings: prompt eval time =   576.58 ms /    34 tokens (   16.96 ms per token,    58.97 tokens per second)
llama_print_timings:        eval time =   256.66 ms /     3 runs   (   85.55 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =   842.51 ms


translated 15.


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

If there is an alien civilization, does the universe have a common moral code? From a narrow perspective, this is an interesting question for sci-fi fans; on a broad perspective, it may be of vital importance to the survival of humanity's civilization.



llama_print_timings:        load time =   624.34 ms
llama_print_timings:      sample time =    26.03 ms /    58 runs   (    0.45 ms per token,  2228.20 tokens per second)
llama_print_timings: prompt eval time =   624.31 ms /    72 tokens (    8.67 ms per token,   115.33 tokens per second)
llama_print_timings:        eval time =  4876.80 ms /    57 runs   (   85.56 ms per token,    11.69 tokens per second)
llama_print_timings:       total time =  5620.75 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

In the 1980s, Chinese science fiction writers tended to favor positive responses. At that time, aliens in sci-fi novels were portrayed as benevolent and tolerant characters who guided humanity these blind sheep and provided comfort to humans who had been injured. In Golden Ocean's The Moonlight Island, the aliens soothe humans' wounded souls; in Faraway Love by Tong Nianzheng, there is a tragic yet majestic love between human beings and aliens. In Earth Mirror by Zheng Wenqiang, even with advanced technological level but possessing菩萨心肠, the alien civilization were frightened away due to the lower morality of humans!



llama_print_timings:        load time =   777.90 ms
llama_print_timings:      sample time =    70.12 ms /   155 runs   (    0.45 ms per token,  2210.46 tokens per second)
llama_print_timings: prompt eval time =   777.86 ms /   161 tokens (    4.83 ms per token,   206.98 tokens per second)
llama_print_timings:        eval time = 13399.84 ms /   154 runs   (   87.01 ms per token,    11.49 tokens per second)
llama_print_timings:       total time = 14504.49 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

However, the saying "human nature is inherently good" is questionable in human society and unacceptable in the universe.



llama_print_timings:        load time =   596.40 ms
llama_print_timings:      sample time =    13.02 ms /    27 runs   (    0.48 ms per token,  2074.05 tokens per second)
llama_print_timings: prompt eval time =   596.37 ms /    55 tokens (   10.84 ms per token,    92.22 tokens per second)
llama_print_timings:        eval time =  2201.73 ms /    26 runs   (   84.68 ms per token,    11.81 tokens per second)
llama_print_timings:       total time =  2859.44 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

To answer the question of cosmic morality, it is necessary to convince people with scientific reasoning. Here we can naturally think about comparing the evolution of human civilizations across the ages to that of the cosmological civilization system. But the research on such a thing in the former is extremely difficult due to the presence of too many unmeasurable factors. On the other hand, the study of the relationship between various civilizations may be more quantitative and mathematics-based since they are all pointlike due to the great distance between them that makes it difficult to see the players' technology in action on the football pitch (a particular point is the ball which is the only sport with a clearly defined mathematical structure consisting of 23 points, representing the matrix of a basketball game).



llama_print_timings:        load time =   833.98 ms
llama_print_timings:      sample time =    72.04 ms /   159 runs   (    0.45 ms per token,  2207.17 tokens per second)
llama_print_timings: prompt eval time =   833.93 ms /   186 tokens (    4.48 ms per token,   223.04 tokens per second)
llama_print_timings:        eval time = 13647.06 ms /   158 runs   (   86.37 ms per token,    11.58 tokens per second)
llama_print_timings:       total time = 14818.03 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I used to be addicted to this game of thinking about civilizations in the universe and could not help myself. In the early 1990s, to kill time, I would often make some mundane but self-consciously amusing software applications. Now online reemerging as e-poems are those from that period. The same period, I also made a simulation software for the evolution of the overall state of civilizations in the universe which reduced civilizations into points and only have 10-20 simple parameters to describe their basic characteristics and set the number of civilizations to be very enormous. To correct my model, I consulted a respectable scholar who is an expert in network theory and also has a fondness for science fiction; he helped me correct some mistakes in my incorrect models. The software ran for several hours on 286 machines, setting ten thousand light years radius to thirty thousand civilizations, the results were interesting. I am only an engineer, not capable of doing this level of resea


llama_print_timings:        load time =  1039.11 ms
llama_print_timings:      sample time =   111.33 ms /   248 runs   (    0.45 ms per token,  2227.65 tokens per second)
llama_print_timings: prompt eval time =  1039.06 ms /   292 tokens (    3.56 ms per token,   281.02 tokens per second)
llama_print_timings:        eval time = 21843.51 ms /   247 runs   (   88.44 ms per token,    11.31 tokens per second)
llama_print_timings:       total time = 23404.90 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I think it is possible for a civilization without morals to exist. How would ethical humanity survive in such a universe? This was the reason I wrote 'The Seventh Sun'.



llama_print_timings:        load time =   628.76 ms
llama_print_timings:      sample time =    19.08 ms /    42 runs   (    0.45 ms per token,  2201.26 tokens per second)
llama_print_timings: prompt eval time =   628.74 ms /    63 tokens (    9.98 ms per token,   100.20 tokens per second)
llama_print_timings:        eval time =  3446.66 ms /    41 runs   (   84.06 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =  4163.32 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Of course, "The Three-Body Problem" did not reveal the picture of this civilization. Both civilizations were unaware of it and only revealed a part of it. For example, since there is a large number of intelligent civilizations near our own star, why does this universe seem so sparse? I hope to continue describing this in the second volume of "The Earth As It Was".



llama_print_timings:        load time =   700.51 ms
llama_print_timings:      sample time =    37.13 ms /    83 runs   (    0.45 ms per token,  2235.15 tokens per second)
llama_print_timings: prompt eval time =   700.46 ms /   108 tokens (    6.49 ms per token,   154.18 tokens per second)
llama_print_timings:        eval time =  7074.83 ms /    82 runs   (   86.28 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =  7945.11 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

That scene that gradually unfolds in "The Final Frontier" is sure to make those readers with a sense of morality uncomfortable, but it's just science fiction. No need to take it personally. 😃



llama_print_timings:        load time =   610.11 ms
llama_print_timings:      sample time =    22.97 ms /    51 runs   (    0.45 ms per token,  2220.00 tokens per second)
llama_print_timings: prompt eval time =   610.09 ms /    64 tokens (    9.53 ms per token,   104.90 tokens per second)
llama_print_timings:        eval time =  4281.25 ms /    50 runs   (   85.62 ms per token,    11.68 tokens per second)
llama_print_timings:       total time =  4998.84 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

I was surprised to learn that Chinese sci-fi readers like to describe the ultimate cosmic picture, which is quite unexpected. I come from the high tide of science fiction in the 1980s, and at that time the authors created a kind of true, futuristic Chinese-style science fiction that has never happened again. The most prominent feature of this science fiction was its complete detailing of technology, with no sense of transcendence whatsoever. Currently, sci-fi lovers have opened their eyes to embrace the entire universe. This also poses a higher requirement on the authors of science fiction novels, and I am very sorry that The Three Body Problem is not an “ultimate” science fiction novel. Creating a science fiction novel like 2001: A Space Odyssey is difficult, especially in length, as it may be neither vivid nor correct in terms of science and become a pile of empty shelves without any style, which I am not confident about yet.



llama_print_timings:        load time =   843.52 ms
llama_print_timings:      sample time =    96.31 ms /   214 runs   (    0.45 ms per token,  2221.97 tokens per second)
llama_print_timings: prompt eval time =   843.49 ms /   194 tokens (    4.35 ms per token,   230.00 tokens per second)
llama_print_timings:        eval time = 18662.31 ms /   213 runs   (   87.62 ms per token,    11.41 tokens per second)
llama_print_timings:       total time = 19959.07 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

$This proposed series is called "The Earth As It Was", and there is not much meaning behind it. The difference between science fiction and fantasy literature lies in the fact that, while both have their roots in imagined worlds, science fiction is closer to reality as a thin thread is tied between the two. This makes science fiction more modern than fairy tales (ancient fairy tales were believed by readers at the time to be real).



llama_print_timings:        load time =   750.49 ms
llama_print_timings:      sample time =    42.30 ms /    92 runs   (    0.46 ms per token,  2174.68 tokens per second)
llama_print_timings: prompt eval time =   750.45 ms /   132 tokens (    5.69 ms per token,   175.89 tokens per second)
llama_print_timings:        eval time =  7729.35 ms /    91 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =  8676.45 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Imagined title for the next book in The Enderverse series is Dark Forest, which is a reference to an 80s catchphrase “the city is a forest, and every man is a hunter, and every woman a trap.”



llama_print_timings:        load time =   657.02 ms
llama_print_timings:      sample time =    23.53 ms /    53 runs   (    0.44 ms per token,  2252.73 tokens per second)
llama_print_timings: prompt eval time =   656.98 ms /    70 tokens (    9.39 ms per token,   106.55 tokens per second)
llama_print_timings:        eval time =  4288.53 ms /    52 runs   (   82.47 ms per token,    12.13 tokens per second)
llama_print_timings:       total time =  5054.39 ms
llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_inte

Oh, and finally, thank you all!


llama.cpp: loading model from /home/brian/github/llama.cpp/models/7B/Chinese-Alpaca-2/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 55296
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 5504
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: using CUDA for GPU acceleration
llama_model_load_internal: mem required  = 42

Luo Cixin



llama_print_timings:        load time =   567.03 ms
llama_print_timings:      sample time =     2.75 ms /     6 runs   (    0.46 ms per token,  2181.82 tokens per second)
llama_print_timings: prompt eval time =   566.99 ms /    32 tokens (   17.72 ms per token,    56.44 tokens per second)
llama_print_timings:        eval time =   428.42 ms /     5 runs   (   85.68 ms per token,    11.67 tokens per second)
llama_print_timings:       total time =  1006.98 ms


translated 13.


In [None]:
for i in range (1,51):
    print("################")
    translate_chapter(llm, "dark_forest", i)
# took 446 minutes

In [None]:
for i in range (1,59):
    print("################")
    translate_chapter(llm, "deaths_end", i)