# Run Ollama
Modified by Jon Chun 7 Oct 2024

# Install

In [1]:
!sudo apt-get install -y pciutils

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libpci3 pci.ids
The following NEW packages will be installed:
  libpci3 pci.ids pciutils
0 upgraded, 3 newly installed, 0 to remove and 49 not upgraded.
Need to get 343 kB of archives.
After this operation, 1,581 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 pci.ids all 0.0~2022.01.22-1 [251 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpci3 amd64 1:3.7.0-6 [28.9 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 pciutils amd64 1:3.7.0-6 [63.6 kB]
Fetched 343 kB in 2s (214 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 3.)
debconf: falling back to frontend: Readline
debconf: unable to initializ

In [2]:
!curl -fsSL https://ollama.com/install.sh | sh # download ollama api

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> NVIDIA GPU installed.
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


# Import

In [3]:
from IPython.display import clear_output

In [31]:
from google.colab import files

In [30]:
import os
import threading
import subprocess
import requests
import json
from tqdm import tqdm

# Ollama

In [5]:
# Create a Python script to start the Ollama API server in a separate thread

def ollama():
    os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
    os.environ['OLLAMA_ORIGINS'] = '*'
    subprocess.Popen(["ollama", "serve"])

ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()

# Ollama Model

In [6]:
%%time

# NOTE:

from IPython.display import clear_output
!ollama pull llama3.1:8b
clear_output()

CPU times: user 1.08 s, sys: 143 ms, total: 1.22 s
Wall time: 1min 33s


# Serve Ollama in Colab

In [7]:
!pip install -U lightrag[ollama]

Collecting lightrag[ollama]
  Downloading lightrag-0.1.0b6-py3-none-any.whl.metadata (14 kB)
Collecting backoff<3.0.0,>=2.2.1 (from lightrag[ollama])
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting jsonlines<5.0.0,>=4.0.0 (from lightrag[ollama])
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Collecting python-dotenv<2.0.0,>=1.0.1 (from lightrag[ollama])
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting tiktoken<0.8.0,>=0.7.0 (from lightrag[ollama])
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting ollama<0.3.0,>=0.2.1 (from lightrag[ollama])
  Downloading ollama-0.2.1-py3-none-any.whl.metadata (4.2 kB)
Collecting httpx<0.28.0,>=0.27.0 (from ollama<0.3.0,>=0.2.1->lightrag[ollama])
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx<0.28.0,>=0.27.0->ollama<0.3.0,>=0.2.1->lightrag[ollama])
  Downloading ht

In [9]:
from lightrag.core.generator import Generator
from lightrag.core.component import Component
from lightrag.core.model_client import ModelClient
from lightrag.components.model_client import OllamaClient, GroqAPIClient

import time

# Queries

In [10]:
qa_template = r"""<SYS>
You are a helpful assistant.
</SYS>
User: {{input_str}}
You:"""

class SimpleQA(Component):
    def __init__(self, model_client: ModelClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=qa_template,
        )

    def call(self, input: dict) -> str:
        return self.generator.call({"input_str": str(input)})

    async def acall(self, input: dict) -> str:
        return await self.generator.acall({"input_str": str(input)})

In [11]:
from lightrag.components.model_client import OllamaClient
from IPython.display import Markdown, display

model = {
    "model_client": OllamaClient(),
    "model_kwargs": {"model": "llama3.1:8b"}
}

# Prompt-Response

In [12]:
qa = SimpleQA(**model)
output=qa("what is happiness")
display(Markdown(f"**Answer:** {output.data}"))

**Answer:** A profound question!

Happiness is a complex and multifaceted concept that can be subjective and personal, but I'll try to break it down in simple terms.

Happiness is often described as a positive emotional state characterized by feelings of joy, contentment, and well-being. It's a sense of satisfaction with one's life, circumstances, and experiences. People who experience happiness tend to feel good about themselves, their relationships, and the world around them.

Some common aspects that contribute to happiness include:

1. **Positive emotions**: Feelings of joy, love, excitement, gratitude, and wonder.
2. **Life satisfaction**: A sense of contentment with one's life, including achievements, relationships, and personal growth.
3. **Well-being**: Good physical and mental health, as well as a strong sense of purpose and meaning in life.
4. **Relationships**: Strong connections with others, built on trust, empathy, and love.
5. **Personal growth**: A sense of progress, learning, and self-improvement.

Happiness can manifest in different ways, such as:

1. **Intrinsic happiness**: Feeling happy from within, often due to personal values, relationships, or activities that bring joy.
2. **Extrinsic happiness**: Experiencing happiness through external factors, like travel, achievements, or material possessions.
3. **Flow experiences**: Engaging in activities that fully absorb and challenge one's attention, leading to a sense of complete absorption and enjoyment.

Ultimately, happiness is unique to each individual, and what brings happiness to one person might not be the same for another. Do you have any specific thoughts on happiness?

In [13]:
qa = SimpleQA(**model)
output=qa("Why do fools fall in love?")
display(Markdown(f"**Answer:** {output.data}"))

**Answer:** A classic question!

According to the 1956 song "Fools Rush In" by Tommie Connor, made famous by Elvis Presley and other artists, the lyrics say:

"Why do fools fall in love? That's what I'd like to know. Some things about that fool you can't explain, But there they go again..."

In a more literal sense, people often ask this question when it seems like two individuals who may not be the best match are still drawn to each other.

Would you like me to elaborate on why people might fall in love with someone who's not necessarily "the best" match? Or is there something else I can help with?

In [14]:
qa = SimpleQA(**model)
output=qa("Who invented the lightbulb?")
display(Markdown(f"**Answer:** {output.data}"))

**Answer:** The invention of the lightbulb is a bit more complex than just one person. While Thomas Edison is often credited with inventing the lightbulb, he actually improved upon an existing design.

The first incandescent lightbulb was actually developed by Humphry Davy in 1802, using a battery and a thin strip of carbon. Later, scientists like Warren de la Rue and Frederick de Moleyns worked on similar designs.

However, Thomas Edison's contributions were significant. He developed the first commercially practical incandescent lightbulb in 1879, which used a carbon filament and lasted for hours. His design improved upon earlier versions by using a longer-lasting bamboo filament and a more efficient vacuum pump to remove air from the bulb.

So while it's not entirely accurate to say that Thomas Edison invented the lightbulb, his improvements made it a practical reality, paving the way for widespread use of electric lighting in homes and businesses. Would you like to know more about the history of lightbulbs?

In [21]:
qa = SimpleQA(**model)
output=qa("What were the main causes of the US Civil War?")
display(Markdown(f"**Answer:** {output.data}"))

**Answer:** The US Civil War, also known as the American Civil War, was a pivotal event in American history that occurred from 1861 to 1865. The main causes of the war can be attributed to several key factors:

1. **Slavery**: The disagreement over slavery and its expansion into new territories was a primary cause of the conflict. Southern states, which relied heavily on agriculture and slave labor, wanted to protect and expand the institution of slavery, while Northern states, which were more industrialized, wanted it abolished.
2. **States' rights vs. Federal power**: The debate over states' rights versus federal power was also a significant factor in the lead-up to the war. Southern states felt that the federal government was infringing on their rights as sovereign states and sought greater autonomy.
3. **Economic differences**: The North and South had distinct economies, with the North being more industrialized and the South relying heavily on agriculture. This led to disagreements over tariffs, trade policies, and other economic issues.
4. **Westward expansion**: As the United States expanded westward, conflicts arose between Southern states, which wanted to expand slavery into new territories, and Northern states, which opposed this expansion.
5. **The Kansas-Nebraska Act (1854)**: This law allowed new states to decide for themselves whether to allow slavery, effectively repealing the Missouri Compromise of 1820, which had prohibited slavery in certain territories.
6. **Abraham Lincoln's election**: Abraham Lincoln's victory in the 1860 presidential election was seen as a threat by Southern states, who feared he would restrict their rights and abolish slavery.

These factors ultimately led to the secession of 11 Southern states from the Union and the formation of the Confederacy. The war that ensued resulted in the deaths of an estimated 620,000 to 750,000 soldiers and civilians and led to a profound transformation in American society.

Would you like me to elaborate on any of these points or provide more information?

In [22]:
type(output)

lightrag.core.types.GeneratorOutput

In [23]:
dir(output)

['__annotations__',
 '__class__',
 '__class_getitem__',
 '__dataclass_fields__',
 '__dataclass_params__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__match_args__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__post_init__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_is_protocol',
 'data',
 'error',
 'format_class_str',
 'format_example_str',
 'from_dict',
 'from_json',
 'from_yaml',
 'metadata',
 'raw_response',
 'to_dict',
 'to_dict_class',
 'to_json',
 'to_json_obj',
 'to_json_signature',
 'to_schema',
 'to_schema_str',
 'to_yaml',
 'to_yaml_obj',
 'to_yaml_signature',
 'usage']

In [25]:
type(output.data)

str

# Generate Synthetic Data

In [27]:
def make_llama_3_prompt(user, system="", assistant=""):
    system_prompt = ""
    if system:
        system_prompt = (
            f"<|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>"
        )

    user_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|>"
    assistant_prompt = f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant}<|eot_id|>" if assistant else "<|start_header_id|>assistant<|end_header_id|>\n\n"

    return f"<|begin_of_text|>{system_prompt}{user_prompt}{assistant_prompt}"

def get_movie_schema():
    return """\
    0|Title|TEXT eg. "Inception"
    1|Director|TEXT eg. "Christopher Nolan"
    2|Year|INT eg. "2010"
    3|Rating|TEXT eg. "PG-13"
    4|Runtime|TEXT eg. "148 min" castable to int
    5|Genre|TEXT eg. "Sci-Fi"
    6|Box_Office|TEXT eg. "$829,895,144" and when null has a value "N/A"
    """

def generate_question_and_query():
    system = "You are a data analyst with 10 years of experience writing complex SQL queries.\n"
    system += (
        "Consider a table called 'movies' with the following schema (columns)\n"
    )
    system += get_movie_schema()
    system += "Consider the following questions, and queries used to answer them:\n"

    question = """What is the highest-grossing movie of all time?"""

    sql = "SELECT Title, Box_Office FROM movies WHERE Box_Office != 'N/A' ORDER BY CAST(REPLACE(Box_Office, ',', '') AS INTEGER) DESC LIMIT 1;"

    system += "Question: " + question + "\n"
    system += "Query: " + sql + "\n"

    user = "Write a question and a query that are similar but different to those above.\n"
    user += "Format the question and query as a JSON object, i.e.\n"
    user += '{"question" : str, "sql_query": str }.\n'

    user += "Make sure to only return me valid sqlite SQL query generated as response to the question. Don't give me any comments. Just return question and query as JSON objects. Make sure query is relevant to the question. Make sure each query is complete and ends with a ;\n"

    prompt = make_llama_3_prompt(user, system)

    # Generate the result from the model
    # result = ollama.generate(model='llama3.1', prompt=prompt)
    result = qa(prompt)

    # Inspect and parse the result['response']
    response_str = result.data # result['response']
    try:
        response_dict = json.loads(response_str)
    except json.JSONDecodeError as e:
        print("Failed to parse response as JSON:", e)
        response_dict = {}

    return response_dict

def save_to_jsonl(data, file_path):
    with open(file_path, 'a') as f:
        for entry in data:
            f.write(json.dumps(entry) + '\n')

In [32]:
output_file_path = 'questions_queries.jsonl'
num_iterations = 10  # Define how many questions and queries you want to generate
all_questions_queries = []

for _ in tqdm(range(num_iterations), desc="Generating Questions", unit="question"):
    question_and_query = generate_question_and_query()
    all_questions_queries.append(question_and_query)

save_to_jsonl(all_questions_queries, output_file_path)
files.download(output_file_path)

print(f"Saved {num_iterations} questions and queries to {output_file_path}")




Generating Questions: 100%|██████████| 10/10 [00:18<00:00,  1.84s/question]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Saved 10 questions and queries to questions_queries.jsonl
