In [None]:
import requests
from pathlib import Path
from tqdm import tqdm

local_path = './models-old/gpt4all-lora-quantized-ggml.bin'
Path(local_path).parent.mkdir(parents=True, exist_ok=True)

url = 'https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized-ggml.bin'

# send a GET request to the URL to download the file.
response = requests.get(url, stream=True)

# open the file in binary mode and write the contents of the response
# to it in chunks.
with open(local_path, 'wb') as f:
    for chunk in tqdm(response.iter_content(chunk_size=8192)):
        if chunk:
            f.write(chunk)

In [1]:
from langchain.llms import GPT4All
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [2]:
template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])

In [4]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = GPT4All(model="./models/ggml-model-q4_0.bin", callback_manager=callback_manager, verbose=True)
llm_chain = LLMChain(prompt=prompt, llm=llm)

llama_model_load: loading model from './models/ggml-model-q4_0.bin' - please wait ...
llama_model_load: n_vocab = 32001
llama_model_load: n_ctx   = 512
llama_model_load: n_embd  = 4096
llama_model_load: n_mult  = 256
llama_model_load: n_head  = 32
llama_model_load: n_layer = 32
llama_model_load: n_rot   = 128
llama_model_load: f16     = 2
llama_model_load: n_ff    = 11008
llama_model_load: n_parts = 1
llama_model_load: type    = 1
llama_model_load: ggml map size = 4017.70 MB
llama_model_load: ggml ctx size =  81.25 KB
llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)
llama_model_load: loading tensors from './models/ggml-model-q4_0.bin'
llama_model_load: model size =  4017.27 MB / num tensors = 291
llama_init_from_file: kv self size  =  512.00 MB


In [5]:
question = "What happens when it rains somewhere?"
llm_chain.run(question)

 Question:

llama_generate: seed = 1691727277

system_info: n_threads = 4 / 12 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
sampling: temp = 0.800000, top_k = 40, top_p = 0.950000, repeat_last_n = 64, repeat_penalty = 1.300000
generate: n_ctx = 512, n_batch = 1, n_predict = 256, n_keep = 0




 What happens when it rains somewhere?

Answer: Let's think step by step. Whenever there is rain, the water that comes down from above as a result of condensation or evaporation gets collected in various ways depending upon its pathway and landscapes such as oceans & seas (rivers flowing into them), lakes ,ponds etc., it then flows back to earth by way of rivers, streams(drainage) from the mountains. Ultimately It finds their place downward like waterfall or a river which goes underground and forms aquifers . The rainwater can be collected for agricultural purposes such as irrigation ,hydroelectricity generation etc.,

 [end of text]

llama_print_timings:        load time = 12858.49 ms
llama_print_timings:      sample time =   162.29 ms /   131 runs   (    1.24 ms per run)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token)
llama_print_timings:        eval time = 98149.46 ms /   153 runs   (  641.50 ms per run)
llama_print_timings:       total time = 108447.75 ms


" Question: What happens when it rains somewhere?\n\nAnswer: Let's think step by step. Whenever there is rain, the water that comes down from above as a result of condensation or evaporation gets collected in various ways depending upon its pathway and landscapes such as oceans & seas (rivers flowing into them), lakes ,ponds etc., it then flows back to earth by way of rivers, streams(drainage) from the mountains. Ultimately It finds their place downward like waterfall or a river which goes underground and forms aquifers . The rainwater can be collected for agricultural purposes such as irrigation ,hydroelectricity generation etc.,"

In [10]:
template = """Question: {question}

Answer: Let's answer in two sentence while being funny."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [11]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = GPT4All(model="./models/ggml-model-q4_0.bin", callback_manager=callback_manager, verbose=True)
llm_chain = LLMChain(prompt=prompt, llm=llm)

llama_model_load: loading model from './models/ggml-model-q4_0.bin' - please wait ...
llama_model_load: n_vocab = 32001
llama_model_load: n_ctx   = 512
llama_model_load: n_embd  = 4096
llama_model_load: n_mult  = 256
llama_model_load: n_head  = 32
llama_model_load: n_layer = 32
llama_model_load: n_rot   = 128
llama_model_load: f16     = 2
llama_model_load: n_ff    = 11008
llama_model_load: n_parts = 1
llama_model_load: type    = 1
llama_model_load: ggml map size = 4017.70 MB
llama_model_load: ggml ctx size =  81.25 KB
llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)
llama_model_load: loading tensors from './models/ggml-model-q4_0.bin'
llama_model_load: model size =  4017.27 MB / num tensors = 291
llama_init_from_file: kv self size  =  512.00 MB


In [12]:
question = "What happens when it rains somewhere?"
llm_chain.run(question)

llama_generate: seed = 1691727705

system_info: n_threads = 4 / 12 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
sampling: temp = 0.800000, top_k = 40, top_p = 0.950000, repeat_last_n = 64, repeat_penalty = 1.300000
generate: n_ctx = 512, n_batch = 1, n_predict = 256, n_keep = 0




 Question: What happens when it rains somewhere?

Answer: Let's answer in two sentence while being funny. When it is raining, water falls from the sky onto surfaces below because God has become angry with us and He wants to punish all living beings for their sinners!

 [end of text]

llama_print_timings:        load time =  5243.61 ms
llama_print_timings:      sample time =    49.27 ms /    37 runs   (    1.33 ms per run)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token)
llama_print_timings:        eval time = 39326.91 ms /    63 runs   (  624.24 ms per run)
llama_print_timings:       total time = 43825.92 ms


" Question: What happens when it rains somewhere?\n\nAnswer: Let's answer in two sentence while being funny. When it is raining, water falls from the sky onto surfaces below because God has become angry with us and He wants to punish all living beings for their sinners!"