In [1]:
from llama_cpp import Llama

# Initialize the Llama model
model_path = "../models/gguf/-unsloth.F16.gguf"  # Update this to your actual model path
llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=8,
    n_gpu_layers=0,
)


llama_model_loader: loaded meta data with 26 key-value pairs and 291 tensors from ../models/gguf/-unsloth.F16.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = gguf
llama_model_loader: - kv   2:                          llama.block_count u32              = 32
llama_model_loader: - kv   3:                       llama.context_length u32              = 4096
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 3072
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 8192
llama_model_loader: - kv   6:                 llama.attention.head_count u32              = 32
llama_model_loader: - kv   7:              llama.attention.head_count_kv u32        

In [2]:
import requests
api_key = 'c6dfc4d92a8f972d237ef696ec87b37a'
def get_weather(city):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)
    return response.json() if response.status_code == 200 else None

def get_forecast(city):
    url = f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)
    return response.json() if response.status_code == 200 else None

In [3]:
import json
import re
from datetime import datetime, timedelta
from llama_cpp import Llama

# Function to extract intent data from the response
def extract_intent_data(intent_response):
    json_pattern = re.compile(r'\{.*\}')
    json_match = json_pattern.search(intent_response)
    if json_match:
        intent_data_str = json_match.group()
        intent_data_str = intent_data_str.replace("'", "\"")
        try:
            return json.loads(intent_data_str)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
    return {}

# Function to fetch weather data based on intent
def fetch_weather_data(intent, city):
    if intent == "current_weather":
        return get_weather(city)
    elif intent == "forecast" or intent == 'forecast_weather':
        return get_forecast(city)
    return None

# Function to parse forecast data
def parse_forecast(api_response, city, date):
    if not api_response:
        return "Sorry, I couldn't fetch the weather information."

    forecast_list = api_response.get('list', [])
    if not forecast_list:
        return f"Sorry, I couldn't fetch the weather information for {city}."

    filtered_forecasts = [forecast for forecast in forecast_list if date in forecast['dt_txt']]
    
    if filtered_forecasts:
        selected_forecast = filtered_forecasts[0]
        forecast_temp = selected_forecast['main']['temp']
        forecast_weather = selected_forecast['weather'][0]['description']
        wind_speed = selected_forecast['wind']['speed']
        humidity = selected_forecast['main']['humidity']

        return (
            f"The forecast for {city} on {date} is {forecast_weather} with a temperature of {forecast_temp}°C, "
            f"wind speed of {wind_speed} meters per second, and humidity of {humidity}%."
        )
    return f"Sorry, I couldn't fetch the weather information for {city} on {date}."

# Function to parse current weather data
def parse_current_weather(api_response, city):
    if not api_response:
        return "Sorry, I couldn't fetch the weather information."
        
    print(api_response)
    main = api_response.get('main', {})
    weather = api_response.get('weather', [{}])[0]
    wind = api_response.get('wind', {})
    
    temp = main.get('temp')
    description = weather.get('description')
    wind_speed = wind.get('speed')
    humidity = main.get('humidity')

    return (
        f"The current weather in {city} is {description} with a temperature of {temp}°C, "
        f"wind speed of {wind_speed} meters per second, and humidity of {humidity}%."
    )

# Function to generate assistant response using Llama model
def generate_assistant_response(user_input, api_response, llm):
    prompt = f"<s>\nUser: {user_input}\nAssistant:\n{api_response}\n"
    output = llm(
        prompt,
        max_tokens=64,
        stop=["\n"],
        echo=True,
    )
    response_text = output['choices'][0]['text']
    return extract_relevant_part(response_text)

# Function to extract relevant part from the response
def extract_relevant_part(response_text):
    pattern = re.compile(r'The (current )?weather in .+ is .+ with a temperature of .+°C, wind speed of .+ meters per second, and humidity of .+%.')
    match = pattern.search(response_text)
    if match:
        return match.group()
    return response_text

# Inference pipeline function
def inference_pipeline(user_input, llm):
    # Step 1: Intent Identification
    intent_prompt = f"<s>\n{user_input}\n"
    intent_output = llm(
        intent_prompt,
        max_tokens=64,
        stop=["\n"],
        echo=True,
    )
    intent_response = intent_output['choices'][0]['text']

    # Extract intent data
    intent_data = extract_intent_data(intent_response)
    intent = intent_data.get("intent")
    city = intent_data.get("entities", {}).get("city")
    date = intent_data.get("entities", {}).get("date")

    print('Extracted Intent: {} City : {} ; Data : {} '.format(intent,city,date))

    # Step 2: Fetch weather data from API
    api_response = fetch_weather_data(intent, city)
    if intent == "current_weather":
        parsed_response = parse_current_weather(api_response, city)
    else:
        parsed_response = parse_forecast(api_response, city, date)

    # Step 3: Generate the final assistant response using LLM
    final_response = generate_assistant_response(user_input, parsed_response, llm)
    return final_response

# Example user input
user_input = "What is the weather in Kolkata today?"
final_response = inference_pipeline(user_input, llm)
print("Final Assistant Response:", final_response)


llama_tokenize_internal: Added a BOS token to the prompt as specified by the model but the prompt also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. Are you sure this is what you want?

llama_print_timings:        load time =     394.87 ms
llama_print_timings:      sample time =       5.71 ms /    42 runs   (    0.14 ms per token,  7354.23 tokens per second)
llama_print_timings: prompt eval time =     394.81 ms /    15 tokens (   26.32 ms per token,    37.99 tokens per second)
llama_print_timings:        eval time =    9125.30 ms /    41 runs   (  222.57 ms per token,     4.49 tokens per second)
llama_print_timings:       total time =    9550.75 ms /    56 tokens
llama_tokenize_internal: Added a BOS token to the prompt as specified by the model but the prompt also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. Are you sure this is what you want?
Llama.generate: prefix-match hit


Extracted Intent: None City : None ; Data : None 



llama_print_timings:        load time =     394.87 ms
llama_print_timings:      sample time =       9.63 ms /    64 runs   (    0.15 ms per token,  6646.59 tokens per second)
llama_print_timings: prompt eval time =     557.62 ms /    30 tokens (   18.59 ms per token,    53.80 tokens per second)
llama_print_timings:        eval time =   14021.84 ms /    63 runs   (  222.57 ms per token,     4.49 tokens per second)
llama_print_timings:       total time =   14620.27 ms /    93 tokens


Final Assistant Response: <s>
User: What is the weather in Kolkata today?
Assistant:
Sorry, I couldn't fetch the weather information.
Because my training data only includes knowledge up to a certain date and does not have real-time access to internet for live updates. However, as of March 2023, typically in Kolkata, India during this period you can expect mild to warm temperatures with occasional rainfall due


In [4]:
user_input = "What is the weather in Kalyani in the Evening ?"
final_response = inference_pipeline(user_input, llm)
print("Final Assistant Response:", final_response)

llama_tokenize_internal: Added a BOS token to the prompt as specified by the model but the prompt also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. Are you sure this is what you want?
Llama.generate: prefix-match hit

llama_print_timings:        load time =     394.87 ms
llama_print_timings:      sample time =       8.93 ms /    64 runs   (    0.14 ms per token,  7167.66 tokens per second)
llama_print_timings: prompt eval time =     455.63 ms /    14 tokens (   32.54 ms per token,    30.73 tokens per second)
llama_print_timings:        eval time =   13917.51 ms /    63 runs   (  220.91 ms per token,     4.53 tokens per second)
llama_print_timings:       total time =   14410.92 ms /    77 tokens
llama_tokenize_internal: Added a BOS token to the prompt as specified by the model but the prompt also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. Are you sure this is what you want?
Llama.generate: prefix-match hit


Extracted Intent: None City : None ; Data : None 



llama_print_timings:        load time =     394.87 ms
llama_print_timings:      sample time =       0.60 ms /     4 runs   (    0.15 ms per token,  6711.41 tokens per second)
llama_print_timings: prompt eval time =     625.73 ms /    33 tokens (   18.96 ms per token,    52.74 tokens per second)
llama_print_timings:        eval time =     666.04 ms /     3 runs   (  222.01 ms per token,     4.50 tokens per second)
llama_print_timings:       total time =    1293.76 ms /    36 tokens


Final Assistant Response: <s>
User: What is the weather in Kalyani in the Evening ?
Assistant:
Sorry, I couldn't fetch the weather information.
Tutor>


In [None]:
user_input = "What is the weather in London in tommorow ?"
final_response = inference_pipeline(user_input, llm)
print("Final Assistant Response:", final_response)

llama_tokenize_internal: Added a BOS token to the prompt as specified by the model but the prompt also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. Are you sure this is what you want?
Llama.generate: prefix-match hit


In [None]:
user_input = "What is the weather in London in tommorow afternoon ?"
final_response = inference_pipeline(user_input, llm)
print("Final Assistant Response:", final_response)