In [92]:
# imports
import os
from glob import glob                       # creating lists of file in directories
import groq
from groq import Groq
import json                                 # pretty print of groq ChatCompletion object
from dotenv import load_dotenv              # load of .env variables
from datetime import datetime               # for timestamping output


Completed 2024-09-27_13-03


In [None]:
# Import variables from .env file
load_dotenv()
# Global variables
cwd = os.getcwd()

CODEDATA = os.path.join(cwd, 'CodeData')
COMMENTEDATA = os.path.join(cwd, 'CommentData')
SUMMARYDATA = os.path.join(cwd, 'SummaryData')
GROQOUTPUT = os.path.join(cwd, 'GroqOutput')

In [None]:
# File lists
codedata_list = glob(os.path.join(CODEDATA, '*.c'))
codedata_list.sort()
commentdata_list = glob(os.path.join(COMMENTEDATA, '*.txt'))
summarydata_list = glob(os.path.join(SUMMARYDATA, '*.txt'))

In [78]:
# chat_completion input variables
base_prompt = 'output the code below with developer comments for each line, use the header "Part I". add a 200 word summary, use the header "Part II":\n'
message_role = 'system'
chat_model = 'llama3-8b-8192'

In [79]:
test_file_1 = codedata_list[0]
line_list = []
print(f"Testing with file: {os.path.basename(test_file_1)}")
with open(test_file_1, 'r') as f:
    line_list.append(sum(1 for line in f))
    message_content = base_prompt + '\n' + f.read()
# print(f"{message_content}")
line_list

Testing with file: code-001-0013.c


[37]

In [80]:
# fetch chat completion with input prompt
def get_chat_completion(message: str, model: str = 'llama3-8b-8192', role: str = 'system') -> str:
    """ 
    source: https://console.groq.com/docs/libraries#Usage
    source: https://github.com/groq/groq-python
    """
    
    client = Groq(
        api_key=os.environ.get("GROQ_API_KEY"),
    )
        
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": role,
                    "content": message
                },
            ],
            model=model,
        )
        
    except groq.APIConnectionError as e:
        print("The server could not be reached")
        print(e.__cause__)  # an underlying Exception, likely raised within httpx.
    except groq.RateLimitError as e:
        print("A 429 status code was received; we should back off a bit.")
    except groq.APIStatusError as e:
        print("Another non-200-range status code was received")
        print(e.status_code)
        print(e.response)
    
    return chat_completion


In [83]:
# loop through first n=LIMIT CodeData files & get chatcompletion 
LIMIT = 1_000
loop_log = []
chat_completion_list = []
for i, c_file in enumerate(codedata_list[:LIMIT]):
    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M')
    # append log entry (format count | time_stamp | step | filename)
    loop_log.append(f"{i:03,} | {time_stamp} | Processing | {os.path.basename(c_file)}")
    # open file to count lines
    with open(c_file, 'r') as f:
        linecount = sum(1 for _ in f)
        loop_log.append(f"{i:03,d} Opened {os.path.basename(c_file)} with {linecount:,d} lines")
    # open file again to read contents
    with open(c_file, 'r') as f:
        message_content = base_prompt + '\n' + f.read()
        
    # call groq api function
    chat_completion = get_chat_completion(message=message_content, role=message_role, model=chat_model)
    
    # append groq ouput to list for use later
    chat_completion_list.append(chat_completion)
    
    # set output path
    groqout_file = os.path.join(GROQOUTPUT, os.path.splitext(os.path.basename(c_file))[0] + '.txt')
    # write to file
    with open(groqout_file, 'w') as f:
        for line in chat_completion.choices[0].message.content:
            f.write(line)
    # add log entry
    loop_log.append(f"{i:03,d} | {time_stamp} | Write | {os.path.basename(groqout_file)}")

print(f"Completed {datetime.now().strftime('%Y-%m-%d_%H-%M')}")
# loop_log
# TODO: error handling, particularly 429 status code (rate limit error)

A 429 status code was received; we should back off a bit.


UnboundLocalError: cannot access local variable 'chat_completion' where it is not associated with a value

In [None]:
loop_log

In [114]:
print(f"{len(chat_completion_list):_d} outputs in chat_completion")
print(f"{len(loop_log):_d} lines in loop_log")
new_line ='\n'
# declare output file path
chat_completion_out = os.path.join(GROQOUTPUT, 'chat_completion_out.txt')
# write output to file
with open(chat_completion_out, 'w') as f:
    # loop through all chat.completion objects in chat_completion_list
    for chat_completion in chat_completion_list:
        # convert each chat completion to json
        parsed_chat_completion = json.loads(chat_completion.json())
        # pretty format the output for readability, and add a new line at the end of each one
        pretty_chat_completion = json.dumps(parsed_chat_completion, indent=4) + new_line
        # read through the json & write to file
        for line in pretty_chat_completion:
            f.write(line)

664 outputs in chat_completion
1_994 lines in loop_log


In [None]:
groqout_file = os.path.join(GROQOUTPUT, os.path.splitext(os.path.basename(test_file_1))[0] + '.txt')
print(f"{groqout_file}")
with open(groqout_file, 'w') as f:
    for line in grog_output_test:
        f.write(line)
        # f.write('\n')