In [1]:
import pandas as pd
import requests
import json
import time
from git import Repo
import re
from pathlib import Path
import git
from projects import project_list

In [2]:
fe = open('file_extensions.json', 'r')
file_extensions = json.load(fe)
types = list(file_extensions.keys())

In [3]:
def ignoreTheFormate(string_code):
    int1 = re.sub('/\/\/(.*)|\/\*(\*(?!\/)|[^*])*\*\//g', '', string_code) #removing single and multiline java comments
    int2 = re.sub('/([\\\]+<>=*(){}[,;-])/g', "\n$1\n", int1) 
    int3 = re.sub('/\s(\s*)/g', "\n", int2) #removing white spaces
    return int3.strip()


In [4]:
def getType(extension):
    
    for i in range(len(types)):
        if extension in file_extensions[types[i]]:
            return types[i]

    if (extension == ''):
        file_name = path.basename(file)
        if (file_name.indexOf('.git') == 0 |
            file_name.indexOf('.bzr') == 0 |
            file_name.indexOf('.svn') == 0 |
            file_name.indexOf('.cvs') == 0):
            return 'meta'
    
    return "unknown"


In [None]:
discard_list = []
for p in project_list[:3]:
    tr_data = pd.read_csv('data/25_1_travis_data/' + p)
    
    branch_type = tr_data['git_branch'].tolist()
    if 'master' in branch_type:
        data = tr_data[ tr_data['git_branch'] == 'master']
    else:
        data = tr_data[ tr_data['git_branch'] == 'trunk']
        
    if len(data) == 0:
        print('Could not continue for {}'.format(p))
        discard_list.append(p)
        continue
    
    repo_name = p.split('/')[0]
    clone_dir = 'project_clones/' + repo_name
    
    repo = Repo(clone_dir)
    verdict_list = [] #final decision

    for x in range(len(tr_data)):
        row = tr_data.iloc[x]
        commit_id = row['git_trigger_commit']
        print(commit_id)
        
        try:
            commit = repo.commit(commit_id)
        except:
            print('omitting {}'.format(commit_id))
            verdict_list.append(0)
            continue

        file_flag_list = []
        code_flag_list = []
        format_flag_list = []

        verdict = 1
    
        for file_name in commit.stats.files:

            extension = '.' + file_name.split('.')[-1]
            t = getType(extension)

            if (t == 'meta') | (t == 'media') | (t == 'doc'):
                file_flag_list.append(1)
                verdict = verdict & 1
                #we have found non-source code changes
                #we have found meta file changes

                #we have found that this file can be skipped 

            elif extension == '.java':
                try:
                    new_version = ignoreTheFormate(repo.git.show('{}:{}'.format(commit_id, file_name)))
                    old_version = ignoreTheFormate(repo.git.show('{}:{}'.format(commit_id + '~', file_name)))

                    if old_version != new_version:
                        #we have found code changes
                        #we have found that this file cannot be skipped
                        code_flag_list.append(0)
                        verdict = verdict & 0
                    else:
                        #we have found file formatting or comment changes
                        #we have found that this file can be skipped 
                        format_flag_list.append(1)
                        verdict = verdict & 1
                except:
                    #file added or deleted
                    print('File {} added or deleted'.format(file_name))
                    verdict = verdict & 0

            else:
                #out of skip rules scope
                verdict = verdict & 0
                continue

        verdict_list.append(verdict)
    
    tr_data['verdict'] = verdict_list
    p_name = p.split('/')[1]
    tr_data.to_csv( 'ci_skip_data/extracted_project_travis/'+ p_name )
    
    print(verdict_list.count(1), len(verdict_list), 100*verdict_list.count(1)/len(verdict_list))

70329a4aaa53659e943e96cd5f2238f016489fbd
0730aa491f4ac1fbd29f3866c5601d5415384e2a
ebde06a0cd90d0125835108ade3c713c7cabf203
f4fa14b95685cada4a1d3942e1294c4c047bd319
4def61f4506f1bef2bf9ca714fcb9209efb01f78
decc8ccf10244262638c6936ff4d531b3015c9d7
01bf28a77d3a9d913f5f8417a0f9e658f35876cc
810ff8b07d1a0b36f5a1e11e0fc2620ab2898703
ff66d5d63a6e0da6ba346b15caecb6583ba36a38
aca16785915f880d8b2c877a597b07e650c963bd
e230d9a3f23fa8f23b43716c154ba23e8c583c01
049f9c41bacfb1728919befd9fe0135e2096b5fb
37b765083ce3b4994d67b8cb03faaee0ebc8dea9
61157af356f93a99986e465671ede6c8d0af0cbd
5033dace6cb6e57e0e7845f2148773560345a440
efecafdf9a2c0affe8bc008e1d566fd7183b6767
c31654c864dd333ff24c31543f9b6665bdb1aa23
ae1776e449e06dd1aa7b71fc4afb1a7ecf407f81
aa68c3cd8154cefd632db5185e64688706a3f34a
File api/buildcraft/api/mj/MjAPILegacy.java added or deleted
File common/buildcraft/energy/BlockEnergyConverter.java added or deleted
File common/buildcraft/energy/ItemEnergyConverter.java added or deleted
File common/bui

9b02f71b549f5aaeb0f80551ac11330341fa8485
9b02f71b549f5aaeb0f80551ac11330341fa8485
9b02f71b549f5aaeb0f80551ac11330341fa8485
bc1a7486cd465f5d7a701dff7c7f3a7b717b888e
File common/buildcraft/transport/schematics/SchematicFilteredBuffer.java added or deleted
a7fe30f38af9b673d8071c8e68055ccf8b148d9f
f1d9aee0b8e1c052fff5b14e464e88ec33d15e17
File api/buildcraft/api/blueprints/MappingNotFoundException.java added or deleted
1c90496cde15fa0d85c9db6fed3aa19ca710d68e
c0412cc8311018688570e17c0010ceebd7e1ac56
986390babc7a014160ea228ebe0ee9371ee8d021
159eba4619453810f200caa93140f1bd717c43ce
d584a18c44613a7cf8bf5cf559a88201583f5f1e
c17041ac17af2edb7dd44bb4ed69ef95eab2213b
File api/buildcraft/api/blueprints/MappingNotFoundException.java added or deleted
File api/buildcraft/api/mj/ISidedBatteryProvider.java added or deleted
File common/buildcraft/builders/schematics/SchematicRedstoneLamp.java added or deleted
File common/buildcraft/silicon/schematics/SchematicLaserTableBase.java added or deleted
File com

File api/buildcraft/api/mj/package-info.java added or deleted
173db45f4b7c910989d0970a739a8df77905bd77
d3e3f8bf21a051100a934a92129ceb5524037135
File common/buildcraft/core/network/INBTSerializable.java added or deleted
File common/buildcraft/core/network/serializers/SerializerINBTSerializable.java added or deleted
a3c2854791b0392177f38d765d0d5ec0b33ef57b
a106b17da6281a8640ae579081173f19efc83ec6
File api/buildcraft/api/mj/package-info.java added or deleted
e204fa275d4959621d48a1edc0d2bc10c584206d
File api/buildcraft/api/mj/package-info.java added or deleted
10e7429847956f2aa9f3e462ba5c217ed59eacbe
b7a89c6cc87d4fbf6830bdc9a7ba9da8d1cacc9a
File common/buildcraft/core/network/INBTSerializable.java added or deleted
File common/buildcraft/core/network/serializers/SerializerINBTSerializable.java added or deleted
d8d57e822f9bfb77960855fa24d144be90762b56
ce55fe5eb1ab45012a229d9569ac287c30bd0ea4
3f925ae0e940417f4662d2becd504401e577dd66
File api/buildcraft/api/boards/IRedstoneBoardRobot.java adde

File common/buildcraft/core/utils/WorldPropertyIsLeave.java added or deleted
66bfac926bd730e2163a058f9d140d285ae15dd2
5e6bc27144d03ae37e07e26e17c2d6a3be3bef84
98f7196d0f07e059bd03786a1cfc32cf2d7aa055
b1076d85566d660a48fc2cf29f73812d3577ca49
File api/buildcraft/api/core/BlockIndex.java added or deleted
File api/buildcraft/api/core/IWorldProperty.java added or deleted
File api/buildcraft/api/robots/AIRobot.java added or deleted
File api/buildcraft/api/robots/DockingStationRegistry.java added or deleted
File api/buildcraft/api/robots/EntityRobotBase.java added or deleted
File api/buildcraft/api/robots/IDockingStation.java added or deleted
File api/buildcraft/robots/AIRobot.java added or deleted
File api/buildcraft/robots/DockingStation.java added or deleted
File api/buildcraft/robots/DockingStationRegistry.java added or deleted
File api/buildcraft/robots/EntityRobotBase.java added or deleted
File common/buildcraft/core/BlockIndex.java added or deleted
File common/buildcraft/core/robots/Do