In [24]:
%pip install --upgrade pip

%pip install pandas openai requests openpyxl

[0mNote: you may need to restart the kernel to use updated packages.
Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2
[0mNote: you may need to restart the kernel to use updated packages.


In [25]:
import pandas
import openai
import openpyxl
import requests
import time

In [9]:
# Function to load the API key from a file
def load_api_key(api_key_file_path):
    with open(api_key_file_path, 'r') as file:
        api_key = file.read().strip()
    return api_key

# Load your API key from a file
api_key = load_api_key('api-key.txt')

# Set up the OpenAI API client
openai.api_key = api_key

github_pat = load_api_key('github-pat.txt')
headers = {'Authorization': 'token ' + github_pat}

In [10]:
def get_chat_response(user_prompt, system_message, model="gpt-3.5-turbo-16k", max_tokens=1000):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[{"role": "system", "content": system_message},
                  {"role": "user", "content": user_prompt}],
        max_tokens=max_tokens,
        temperature = 0.2
    )
    return response.choices[0].message['content'].strip()

In [12]:
def generate_build_pipeline(repo_structure, languages, dependencies):
    system_message = "Your name is Dev bot. You are a brilliant and meticulous engineer assigned to write a GitHub Actions workflow in YAML for the following Github Repository. When you write code, the code works on the first try, is syntactically perfect and is fully complete. The workflow should be able to build and run the application and run the tests if present in the repository. Take into account the current repository's language, frameworks, and dependencies. "

    user_prompt = """
    Analyze the github repository structure, language, framework and dependencies provide below to create a github action build workflow. You will provide the github action workflow as the answer. Only include the yaml file in the output. Do not add any other text before or after the code.
    
    Repository structure:
    {repo_structure}

    Languages: 
    {languages}

    Dependencies: 
    {dependencies}
    
    """

    request_input = {
        'repo_structure': repo_structure,
        'languages': languages,
        'dependencies': dependencies
    }

    user_prompt = user_prompt.format(**request_input)

    response = get_chat_response(user_prompt=user_prompt, system_message=system_message)

    return response.strip("```")

In [13]:
def get_repository_tree(repository_identifier, branch='main'):

    repository_tree = []
    response = requests.get(f'https://api.github.com/repos/{repository_identifier}/git/trees/{branch}', headers=headers)
    data = response.json()

    if response.status_code == 200:
        root_tree = data['tree']
        max_depth = 2

        def get_tree_recursive(tree_sha, current_depth):
            if current_depth > max_depth:
                return

            response = requests.get(f'https://api.github.com/repos/{repository_identifier}/git/trees/{tree_sha}', headers=headers)
            data = response.json()

            if response.status_code == 200:
                for item in data['tree']:
                    if item['type'] == 'tree':
                        repository_tree.append(f"Directory: {item['path']}")
                        get_tree_recursive(item['sha'], current_depth + 1)
                    else:
                        repository_tree.append(f"File: {item['path']}")

        get_tree_recursive(data['sha'], 1)
    else:
        print(f"Failed to fetch tree: {data['message']}")

    return repository_tree

In [14]:
def get_list_of_languages(repository_identifier):
    url = 'https://api.github.com/repos/' + repository_identifier + '/languages'

    response = requests.get(url, headers=headers)
    
    return response.json().keys()

In [15]:
def get_list_of_dependencies(repository_identifier):
    url = 'https://api.github.com/repos/' + repository_identifier + '/dependency-graph/sbom'

    response = requests.get(url, headers=headers)
    response.json()['sbom']['packages']
    dependency_names = [package['name'] + ', version = ' + package['versionInfo'] for package in response.json()['sbom']['packages']]
    
    return dependency_names


In [16]:
def get_default_branch(repository_identifier):
    url = 'https://api.github.com/repos/' + repository_identifier

    response = requests.get(url, headers=headers)
    
    return response.json()['default_branch']

In [17]:
def run_experiment(csvFile):
    for i in range(0,len(csvFile)):
        repo_identifier = csvFile.iloc[i]['GitHub_Repo_Link'].split('github.com/')[1]
        print(repo_identifier)
        try:
            repo_structure = get_repository_tree(repo_identifier, get_default_branch(repo_identifier))
            languages = get_list_of_languages(repo_identifier)
            dependencies = get_list_of_dependencies(repo_identifier)
            csvFile.loc[i,'Generated_Build_Pipeline_File_Content'] = generate_build_pipeline(repo_structure, languages, dependencies)
            # Add delay to avoid rate limiting
            time.sleep(30)
        except Exception as e:
            print(e)
            continue

    csvFile.to_csv('DevOps_LLM_Bot_Test_Data - C#.csv', index=False)

In [43]:
excel_file = 'DevOps_LLM_Bot_Test_Data.xlsx'
excel = pandas.ExcelFile(excel_file)
sheet_names = excel.sheet_names

for sheet_name in sheet_names:
    csv_file = sheet_name + '.csv'
    df = pandas.read_excel(excel_file, sheet_name=sheet_name)
    df.to_csv('data/'+csv_file, index=False)

    

In [46]:
for sheet_name in sheet_names:
    csv_file = sheet_name + '.csv'
    df = pandas.read_csv('data/'+csv_file)
    print("Running experiment for sheet: " + sheet_name + " ... ")
    run_experiment(df)

FileNotFoundError: [Errno 2] No such file or directory: 'data'

In [19]:
run_experiment(csvFile)

DapperLib/Dapper
huiyadanli/RevokeMsgPatcher
jasontaylordev/CleanArchitecture
This model's maximum context length is 16385 tokens. However, your messages resulted in 27982 tokens. Please reduce the length of the messages.
nilaoda/N_m3u8DL-CLI
shadowsocks/shadowsocks-windows
ShareX/ShareX
DapperLib/Dapper
aalhour/C-Sharp-Algorithms
Cysharp/UniTask
EduardoPires/EquinoxProject
felixse/FluentTerminal
graphql-dotnet/graphql-dotnet
This model's maximum context length is 16385 tokens. However, your messages resulted in 25300 tokens. Please reduce the length of the messages.
gui-cs/Terminal.Gui
hellzerg/optimizer
JeffreySu/WeiXinMPSDK
jstedfast/MailKit
JustArchiNET/ArchiSteamFarm
kgrzybek/modular-monolith-with-ddd
MahApps/MahApps.Metro
MassTransit/MassTransit
This model's maximum context length is 16385 tokens. However, your messages resulted in 39191 tokens. Please reduce the length of the messages.
