In [None]:
'''
GPT-4 analysis of the test codes
This code calls GPT-4 api to ask the following question:

"What type of testing is done in the following code?"
'''

In [1]:
import os
from os.path import join as pjoin
import pandas as pd
from shutil import copytree
from transformers import AutoTokenizer
import openai
import torch
from docx import Document
import json
import markdown
from tqdm import tqdm
from jinja2 import Environment, FileSystemLoader
import time

In [2]:
openai.api_key = open("/home/safwat/chatgpt_api.key").read()

In [3]:
test_code_repository="/home/safwat/Documents/IoT_Testing/integration_samples/core/tests/components"
inspection_directory="gpt_test_code_inspection"

In [4]:
tc_analysis_df=pd.read_csv("integration_test_codes_analysis.csv",index_col=0)
apps=tc_analysis_df.nlargest(50, 'avg lines')['app'].tolist()
apps.sort()

In [5]:
# #Top 50 apps when sorted by the avg lines
# #Copy the apps to inspection directory
# for app in apps:
#     copytree(pjoin(test_code_repository,app),pjoin(inspection_directory,app),dirs_exist_ok=False)

In [6]:
#GPT Prompts
def getGPTAnswer(question):
    response = openai.ChatCompletion.create(
                      model="gpt-4",
                      messages=[{"role": "user", "content": question}
                        ]
                    )
    pred=json.loads(json.dumps(response["choices"]))[0]['message']['content']
    
    return pred

In [7]:
def return_file_chunk(file_path):
    # parse the xml file
    tokenizer = AutoTokenizer.from_pretrained("gpt2")
    # read the file
    file_content = open(file_path).read()

    # get the file content line by line
    myFile=file_content.split('\n')
    totalTokens=0
    myChunk=""
    for i,line in enumerate(myFile):
        #Count number of tokens in each line
        totalTokens+=torch.tensor(tokenizer.encode(line)).unsqueeze(0).shape[1]
        if totalTokens<8000:
            myChunk=myChunk+line+'\n'
        else:
            return myChunk
    return myChunk

In [8]:
def getCodeDescriptionGPT(fileDir,headerQuestion):
    myChunk=return_file_chunk(fileDir)
    myPrompt=headerQuestion+"\n"+myChunk
    answer=getGPTAnswer(myPrompt)
    return answer

In [9]:
# def createDocx(content,outputFile):
#     # Create a new Document
#     doc = Document()
    
#     # Add the string content to the document
#     doc.add_paragraph(content)
    
#     # Save the document as a .docx file
#     doc.save(outputFile)

In [10]:
def createReport(data,outputFile):
    # Convert the title to HTML with bold formatting
    data["title"] = markdown.markdown(data["title"])
    
    # Set up Jinja2 environment
    env = Environment(loader=FileSystemLoader("."))
    template = env.get_template("template.html")
    
    # Render the template with the data
    output = template.render(data)
    
    # Save the output to a file
    with open(outputFile, "w") as file:
        file.write(output)


In [16]:
def combineReports(reports,outputFile):
    # Initialize the combined output content
    combined_content = ''
    
    # Read the content of each input HTML file and append it to the combined content
    for input_file in reports:
        with open(pjoin(inspection_directory,input_file), 'r') as file:
            content = file.read()
            combined_content += content
    
    # Write the combined content to an output HTML file
    with open(outputFile, 'w') as output_file:
        output_file.write(combined_content)

In [12]:
# data = {
#         "title": "Title with bold font",
#         "items": [
#             "First item",
#             "Second item",
#             "Third item"
#         ]
#     }

# createReport(data,"sample.html")

In [13]:
headerQuestion="What type of testing is done in the following code?"

for app in tqdm(apps):
    if os.path.exists(pjoin(inspection_directory,app+".html")):
        continue
    report={}
    report["title"]=app
    report["items"]=[]
    myFileDirs=[pjoin(dir,file) for dir,_,files in os.walk(pjoin(inspection_directory,app)) for file in files if file.endswith(".py")]
    for fileDir in tqdm(myFileDirs):
        try:
            answer=getCodeDescriptionGPT(fileDir,headerQuestion)
            time.sleep(1)
        except Exception as e:
            answer=str(e)
        shortDir='/'.join(fileDir.split("/")[2:])
        report["items"].append(shortDir+": "+answer)
    createReport(report,pjoin(inspection_directory,app+".html"))   
            

  0%|                                                    | 0/50 [00:00<?, ?it/s]
  0%|                                                    | 0/25 [00:00<?, ?it/s][A
  4%|█▊                                          | 1/25 [00:24<09:41, 24.22s/it][A
  8%|███▌                                        | 2/25 [00:32<05:47, 15.09s/it][A
 12%|█████▎                                      | 3/25 [00:42<04:37, 12.63s/it][A
 16%|███████                                     | 4/25 [00:53<04:12, 12.02s/it][A
 20%|████████▊                                   | 5/25 [01:17<05:24, 16.24s/it][A
 24%|██████████▌                                 | 6/25 [01:24<04:08, 13.07s/it][A
 28%|████████████▎                               | 7/25 [01:33<03:30, 11.72s/it][A
 32%|██████████████                              | 8/25 [01:46<03:26, 12.15s/it][A
 36%|███████████████▊                            | 9/25 [01:56<03:03, 11.48s/it][A
 40%|█████████████████▏                         | 10/25 [02:08<02:53, 11.55s/it

FileNotFoundError: [Errno 2] No such file or directory: 'prometheus.html'

In [18]:
allReports=[report for report in os.listdir(inspection_directory) if report.endswith("html") and report!="report_combined.html"]
allReports.sort()
combineReports(allReports,pjoin(inspection_directory,"reports_combined.html"))

In [None]:
import winsound
duration = 1000  # milliseconds
freq = 440  # Hz
winsound.Beep(freq, duration)

In [None]:
# myFiles=[pjoin(a,file) for a,_,files in os.walk(pjoin(inspection_directory,"homeassistant")) for file in files if file.endswith(".py")]
# text='/'.join(myFiles[6].split("/")[2:])
# text