In [None]:
# Import the Llama class
from llama_cpp import Llama

In [None]:
llama_path = "models/llama_model"

# Initialize the Llama model
llm = Llama(model_path=llama_path)

### Tune Parameters

In [None]:
question = "What is the most used database for data storage?"

response = llm(
  question,
  
  # Tuning parameters 
  temperature=0.5,      # Higher temperature means more creative answers, 0 to 1
  top_k=20,             # Higher top_k means more diverse response, 1 to infinity
  top_p=0.4,            # Lower top_p means more focused response, 0 to 1
  max_tokens=100,       # Maximum number of tokens in the response to define the type of response (short, medium, long), 1 to infinity
)

print(response)

In [None]:
# Extract the first choice and generated text
extracted = response["choices"][0]["text"]

# Print the extracted text
print(extracted)

### Assign Roles

In [None]:
# Roles for the Llama model
# System : Sets the personality and style of the llama
# User : Represents the person asking the questions

system_message = "You are a business consultant who gives data-driven answers."
user_message = "What are the key factors in a successful marketing strategy?"

message_list = [
  {
   "role" : "system",
    "content" : system_message 
  },
  {
    "role" : "user",
    "content" : user_message
  }
]

response = llm.create_chat_completion(
  messages = message_list,
)

In [None]:
response_text = response["choices"][0]["message"]["content"]
print(response_text)

### Guide Outputs

##### Zero-shot prompting

A single instruction to the model to perform a task

In [None]:
text = "Summarize recent mergers in the airline industry."
output = llm(text)

In [None]:
# Add formatting to the prompt
prompt="""
Instruction: Explain the concept of gravity in simple terms.
Question: What is gravity?
Answer:
"""

# Send the prompt to the model
output = llm(prompt, max_tokens=15, stop=["Question:"]) 
print(output['choices'][0]['text'])

##### Few-shot prompting

A few examples to the model to perform a task

In [None]:
text = """
Aircraft Model: Boeing 787-9
Passenger Capacity: 296
Fuel Consumption: 2.5 liters per seat per 100 km

Aircraft Model: Airbus A321XLR
Passenger Capacity: 244
Fuel Consumption: 2.9 liters per seat per 100 km
"""

output = llm(text)

##### Using Stop Words

Question answering application

In [None]:
text = "Which airlines operate direct flights from London to Singapore?"

output = llm(text, stop=["Q:"]) # Stop words are used to separate the question from the answer
print(output["choices"][0]["text"])

In [None]:
# Complete the few-shot prompt
prompt="""Review 1: I ordered from this place last night, and I'm impressed! 
Sentiment 1: Positive,
Review 2: My order was delayed by over an hour without any updates. Disappointing!  
Sentiment 2: Negative,
Review 3: The food quality is top-notch. Highly recommend! 
Sentiment 3: Positive,
Review 4: Delicious food, and excellent customer service! 
Sentiment 4:"""

# Send the prompt to the model with a stop word
output = llm(prompt, max_tokens=2, stop=["Review"]) 
print(output['choices'][0]['text'])

### Structured Output

In [None]:
# JSON responses with chat completion
response_format = {
  "type" : "json_object",
  "schema" : {
    "type" : "object",
    "properties" : {
      "Product Name" : { "type" : "string" },
      "Category" : { "type" : "string" },
      "Sales Growth" : { "type" : "float" }
    }
  }
}

message_list = [
  {         # System role defined as the market analyst
    "role": "system",
    "content": "You are a food industry market analyst. You analyze sales data and generate structured JSON reports of top-selling beverages."
  },
  {          # User role to pass the request
    "role": "user",
    "content": "Provide a structured JSON report of the top-selling beverages this year."
  }
]

output = llm.create_chat_completion(
  messages=message_list,
  response_format=response_format
)

print(output["choices"][0]["message"]["content"])

### Build Multi-Turn Conversations

In [None]:
class Conversation:
  def __init__(self, llm:Llama, system_prompt='', history=[]):
    self.llm = llm
    self.system_prompt = system_prompt
    self.history = [{"role": "system", "content": self.system_prompt}] + history
  
  def create_completion(self, user_prompt=''):
    self.history.append({"role": "user", "content": user_prompt})  # Added input
    output = self.llm.create_chat_completion(messages=self.history)
    conversation_result = output["choices"][0]["message"]
    
    self.history.append(conversation_result)  # Added response
    return conversation_result["content"] # Return the content of the response

In [None]:
conversation = Conversation(llm, system_prompt="You are a virtual travel assistant helping with planning trips.")

response1 = conversation.create_completion("What are some destinations in France for a short weekend break?")
print(f"Response 1: {response1}")

response2 = conversation.create_completion("How about Spain?")
print(f"Response 2: {response2}")