In [5]:
# Imports
import pandas as pd
from dotenv import load_dotenv
import os
import sqlite3

# Import utils
from utils.functions import llm_process_minibatch,generate_batch
from utils.testing_harness import unit_test

# Load environment variables from .env file
_ = load_dotenv()
# Extract path variables
cdc_tickets_path = os.getenv('CDC_TICKETS_PATH')
mcmn_tickets_path = os.getenv('MCMN_TICKETS_PATH')
system_message_path = os.getenv('SYSTEM_MESSAGE_PATH')
db_path = os.getenv('DB_PATH')

# Load system message
SYSTEM_MESSAGE = open(system_message_path, 'r', encoding='utf-8').read()

# Load batch of 100 unclassified tickets
df = generate_batch(
    db_path=db_path,
    mcmn_tickets_path=mcmn_tickets_path,
    batchsize=10
)

In [6]:
minibatch_size = 2

# Iterate batch in minibatches
ctr = 1 
for i in range(0,df.shape[0],minibatch_size):
    msg = f'Processing batch {ctr}, records {i}:{i+minibatch_size}...'
    print(msg,end='',flush=True)
    # Create minibatch
    minibatch_df = df[i:i+minibatch_size].copy()
    # Process minibatch
    res = llm_process_minibatch(
        cdf=minibatch_df,
        system_message=SYSTEM_MESSAGE,
        verbose=False
    )
    # Attach results to minibatch dataframe
    minibatch_df.loc[:,'evident_root_cause'] = minibatch_df['number'].apply(lambda x: res[x])
    # Store to database
    with sqlite3.connect(db_path) as conn:
        minibatch_df.to_sql("inc", conn, if_exists="append", index=False)
    ctr += 1
    print('\r' + msg + ' Done!')


Processing batch 1, records 0:2... Done!
Processing batch 2, records 2:4... Done!
Processing batch 3, records 4:6... Done!
Processing batch 4, records 6:8... Done!
Processing batch 5, records 8:10... Done!
