In [1]:
import pandas
import time
import github
import gpt
import Score

In [2]:
def run_experiment(csvFile):
    for i in range(0, len(csvFile)):
        repo_identifier = csvFile.iloc[i]['GitHub_Repo_Link'].split('github.com/')[1]
        print(repo_identifier)
        try:
            default_branch = github.get_default_branch(repo_identifier);
            repo_structure = github.get_repository_tree(repo_identifier, default_branch)
            dependencies = github.get_list_of_dependencies(repo_identifier)

            generated_workflow_file = gpt.generate_build_pipeline(repo_structure, dependencies, default_branch)

            csvFile.loc[i,'Generated_Build_Pipeline_File_Content'] = generated_workflow_file
            valid_syntax = True#github.check_yaml_syntax(generated_workflow_file);
            if not valid_syntax:
                csvFile.loc[i,'Syntax_Check'] = 'Invalid'
                continue

            csvFile.loc[i,'Syntax_Check'] = 'Valid'
            workflow_files = github.get_all_workflow_files(repo_identifier)

            build_file_content = ''
            build_file_devops_aware_score = 0
            # Loop through workflow files
            for workflow_file in workflow_files:
                workflow_file_content = github.get_workflow_file_content(repo_identifier, workflow_file, default_branch)
                devops_aware_score = Score.get_devops_aware_score(generated_workflow_file, workflow_file_content)
                if devops_aware_score > build_file_devops_aware_score:
                    build_file_devops_aware_score = devops_aware_score
                    build_file_content = workflow_file_content

            exact_match_score = Score.get_exact_match_score(generated_workflow_file, build_file_content)
            bleu_score = Score.get_bleu_score(generated_workflow_file, build_file_content)

            csvFile.loc[i,'GitHub_Build_Pipeline_File_Content'] = build_file_content
            csvFile.loc[i,'DevOps_Aware_Score'] = build_file_devops_aware_score
            csvFile.loc[i,'Exact_Match_Score'] = exact_match_score
            csvFile.loc[i,'BLEU_Score'] = bleu_score
            # Add delay to avoid rate limiting
            time.sleep(10)
        except Exception as e:
            print(e)
            continue

    csvFile.to_csv('dataset/output.csv', index=False)

In [3]:
# reading the CSV file
csvFile = pandas.read_csv('dataset/dataset.csv')

# Pre-processing the CSV file
csvFile = csvFile[csvFile['GitHub_Repo_Link'].notna()]

csvFile

Unnamed: 0,Index,GitHub_Repo_Link,GitHub_Build_Pipeline_File_Content,Generated_Build_Pipeline_File_Content,Exact_Match_Score,BLEU_Score,Syntax_Check,DevOps_Aware_Score
0,1.0,https://github.com/05bit/peewee-async,,,,,,
1,2.0,https://github.com/Anttek-io/grigory,,,,,,
2,3.0,https://github.com/APSL/puput,,,,,,
3,4.0,https://github.com/Bearle/django_private_chat2,,,,,,
4,5.0,https://github.com/brianrisk/qwaver,,,,,,
...,...,...,...,...,...,...,...,...
67,,https://github.com/lzyzsd/JsBridge,,,,,,
68,,https://github.com/libgdx/libgdx,,,,,,
69,,https://github.com/json-path/JsonPath,,,,,,
70,,https://github.com/jhy/jsoup,,,,,,


In [4]:
run_experiment(csvFile)

05bit/peewee-async



on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master

jobs:
  build:
    runs-on: ubuntu-latest

    steps:
    - name: Checkout repository
      uses: actions/checkout@v2

    - name: Set up Python
      uses: actions/setup-python@v1
      with:
        python-version: 3.7

    - name: Install dependencies
      run: |
        pip install -r requirements.txt

    - name: Build and Test
      run: |
        python setup.py build
        pytest
' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  csvFile.loc[i,'Generated_Build_Pipeline_File_Content'] = generated_workflow_file
  csvFile.loc[i,'Syntax_Check'] = 'Valid'
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score

'NoneType' object is not subscriptable
Anttek-io/grigory


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()

on:
  push:
    branches: [ "master" ]
  pull_request:
    branches: [ "master" ]

jobs:
  build:

    runs-on: ubuntu-latest
    strategy:
      max-parallel: 4
      matrix:
        python-version: ["3.10"]

    steps:
    - uses: actions/checkout@v3
    - name: Set up Python ${{ matrix.python-version }}
      uses: acti

APSL/puput


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


KeyboardInterrupt: 