# Ollama Inference Notebook

This notebook contains the scripts for LLM inference using Ollama containing running on the docker host. This is the playground for testing purpose.

## Stage 0 - import libraries
At stage 0 we define all imports necessary to run our subsequent code depending on various libraries.

In [1]:
# this definition exposes all python module imports that should be available in all subsequent commands
import json
import numpy as np
import pandas as pd
import requests
# ...
# global constants
ollama_url = "http://ollama:11434"
MODEL_DIRECTORY = "/srv/app/model/data/"

In [2]:
# THIS CELL IS NOT EXPORTED - free notebook cell for testing or development purposes
print("numpy version: " + np.__version__)
print("pandas version: " + pd.__version__)

numpy version: 1.22.1
pandas version: 2.0.3


## Stage 1 - get a data sample from Splunk
In Splunk run a search to pipe a dataset into your notebook environment. Note: mode=stage is used in the | fit command to do this.

In [4]:
# this cell is not executed from MLTK and should only be used for staging data into the notebook environment
def stage(name):
    with open("data/"+name+".csv", 'r') as f:
        df = pd.read_csv(f)
    with open("data/"+name+".json", 'r') as f:
        param = json.load(f)
    return df, param

In [None]:
# THIS CELL IS NOT EXPORTED - free notebook cell for testing or development purposes
df, param = stage("llm_rag_ollama_text_processing")
print(df.describe())
print(param)

## Stage 2 - create and initialize a model

In [None]:
# initialize your model
# available inputs: data and parameters
# returns the model object which will be used as a reference to call fit, apply and summary subsequently
def init(df,param):
    model = {}
    model['hyperparameter'] = 42.0
    return model

In [None]:
# THIS CELL IS NOT EXPORTED - free notebook cell for testing or development purposes
print(init(df,param))

## Stage 3 - fit the model

In [None]:
# train your model
# returns a fit info json object and may modify the model object
def fit(model,df,param):
    # model.fit()
    info = {"message": "model trained"}
    return info

In [None]:
# THIS CELL IS NOT EXPORTED - free notebook cell for testing or development purposes
print(fit(model,df,param))

## Stage 4 - apply the model

In [12]:
# from fit command, we will pass parameters model and prompt.
# sample prompt: You will examine if the email content given by the user is phishing. 
#                Only output **Phishing** if the content is phishing. 
#                Only output **Legit** if the email is legitimate. Do not give extra information.
def apply(model,df,param):
    try:
        X = df["text"].values.tolist()
    except:
        cols={'Result': ["ERROR: Please make sure you have a field in the search result named \'text\'"], 'Duration': ["ERROR"]}
        returns=pd.DataFrame(data=cols)
        return returns

    try:
        prompt = param['options']['params']['prompt'].strip("\"")
    except:
        cols={'Result': ["ERROR: Please make sure you set the parameter \'prompt\'"], 'Duration': ["ERROR"]}
        returns=pd.DataFrame(data=cols)
        return returns

    try:
        model_name = param['options']['params']['model_name'].strip("\"")
    except:
        cols={'Result': ["ERROR: Please make sure you set the parameter \'model_name\'"], 'Duration': ["ERROR"]}
        returns=pd.DataFrame(data=cols)
        return returns
        
    uri = f"{ollama_url}/api/chat"
    headers = {'Content-Type': 'application/json'}
    outputs_label = []
    outputs_duration = []

    
    for i in range(len(X)):
        messages = [
            {"role": "user", "content": prompt},
            {"role": "user", "content": X[i]}
        ]
        
        data = {
            "model": param['options']['params']['model_name'].strip("\""),
            "messages": messages,
            "stream": False,
        }
        
        data = json.dumps(data)
        
        response = requests.post(uri, headers=headers, data=data).json()
        try:   
            outputs_label.append(response['message']['content'])
            duration = round(int(response['total_duration']) / 1000000000, 2)
            duration = str(duration) + " s"
            outputs_duration.append(duration)
        except Exception as e:
            if response:
                outputs_label.append(response)
                outputs_duration.append("ERROR")
            else:
                outputs_label.append(f"ERROR: {e}")
                outputs_duration.append("ERROR")
        
    cols={'Result': outputs_label, 'Duration': outputs_duration}
    returns=pd.DataFrame(data=cols)
    return returns

In [None]:
# THIS CELL IS NOT EXPORTED - free notebook cell for testing or development purposes
print(apply(model,df,param))

## Stage 5 - save the model

In [None]:
# save model to name in expected convention "<algo_name>_<model_name>"
def save(model,name):
    with open(MODEL_DIRECTORY + name + ".json", 'w') as file:
        json.dump(model, file)
    return model

## Stage 6 - load the model

In [None]:
# load model from name in expected convention "<algo_name>_<model_name>"
def load(name):
    model = {}
    with open(MODEL_DIRECTORY + name + ".json", 'r') as file:
        model = json.load(file)
    return model

## Stage 7 - provide a summary of the model

In [21]:
# return a model summary
def summary(model=None):
    returns = {"version": {"numpy": np.__version__, "pandas": pd.__version__} }
    return returns

def compute(model,df,param):
    try:
        X = df["text"].values.tolist()
    except:
        cols={'Result': ["ERROR: Please make sure you have a field in the search result named \'text\'"], 'Duration': ["ERROR"]}
        returns=pd.DataFrame(data=cols)
        return returns

    try:
        prompt = param['options']['params']['prompt'].strip("\"")
    except:
        cols={'Result': ["ERROR: Please make sure you set the parameter \'prompt\'"], 'Duration': ["ERROR"]}
        returns=pd.DataFrame(data=cols)
        return returns

    try:
        model_name = param['options']['params']['model_name'].strip("\"")
    except:
        cols={'Result': ["ERROR: Please make sure you set the parameter \'model_name\'"], 'Duration': ["ERROR"]}
        returns=pd.DataFrame(data=cols)
        return returns
        
    uri = f"{ollama_url}/api/chat"
    headers = {'Content-Type': 'application/json'}
    outputs_label = []
    outputs_duration = []

    
    for i in range(len(X)):
        messages = [
            {"role": "user", "content": prompt},
            {"role": "user", "content": X[i]}
        ]
        
        data = {
            "model": param['options']['params']['model_name'].strip("\""),
            "messages": messages,
            "stream": False,
        }
        
        data = json.dumps(data)
        
        response = requests.post(uri, headers=headers, data=data).json()
        try:   
            outputs_label.append(response['message']['content'])
            duration = round(int(response['total_duration']) / 1000000000, 2)
            duration = str(duration) + " s"
            outputs_duration.append(duration)
        except Exception as e:
            if response:
                outputs_label.append(response)
                outputs_duration.append("ERROR")
            else:
                outputs_label.append(f"ERROR: {e}")
                outputs_duration.append("ERROR")
        
    cols={'Result': outputs_label, 'Duration': outputs_duration}
    returns=pd.DataFrame(data=cols)
    return returns

After implementing your fit, apply, save and load you can train your model:<br>
| makeresults count=10<br>
| streamstats c as i<br>
| eval s = i%3<br>
| eval feature_{s}=0<br>
| foreach feature_* [eval &lt;&lt;FIELD&gt;&gt;=random()/pow(2,31)]<br>
| fit MLTKContainer algo=barebone s from feature_* into app:barebone_model<br>

Or apply your model:<br>
| makeresults count=10<br>
| streamstats c as i<br>
| eval s = i%3<br>
| eval feature_{s}=0<br>
| foreach feature_* [eval &lt;&lt;FIELD&gt;&gt;=random()/pow(2,31)]<br>
| apply barebone_model as the_meaning_of_life

## End of Stages
All subsequent cells are not tagged and can be used for further freeform code