In [1]:
import pandas as pd
import re

In [2]:
df = pd.read_csv("../../data/merge_types_data.csv")
df = df.drop('branch_merge', 1)
df['merge_SHA'] = df["merge_SHA"].str[:10]

In [3]:
sample = df.sample(frac = 1.0, random_state=123).groupby('project').head(5).sample(n=20, random_state=123)
display(sample)

Unnamed: 0,chunk_id,project,merge_SHA,devs1,devs2,commit_message
139384,1405151,cismet/cismet-commons,02f552a7ad,1.0,1.0,Merge remote-tracking branch 'origin/pw-enc-op...
92319,1162628,forcedotcom/SalesforceMobileSDK-Android,683b0f306a,3.0,2.0,Merge remote-tracking branch 'upstream/master'...
51394,1026140,BetterShop/BetterShop,ac4fcac8d4,1.0,1.0,Merge branch 'master' of https://github.com/lo...
43477,998523,droolsjbpm/guvnor,76c10c2fa7,1.0,4.0,Merge remote branch 'remotes/upstream/master' ...
89449,1154752,iron-io/iron_mq_java,f6b88e2b6c,3.0,1.0,Merge branch 'master' into v3\n\nConflicts:\n\...
98837,1226168,ladriangb/jSwitch-base-src,a238af2139,1.0,1.0,18-032012
45380,1005133,dropwizard/dropwizard,841f07cd8c,20.0,1.0,Merge branch 'dynamic-ports' of https://github...
26976,941598,aptana/studio3-php,bf7b549fd4,1.0,8.0,Merge branch 'development' into debugger
2116,781374,bpellin/keepassdroid,9141ffc4fc,1.0,3.0,Merge branch 'master' into save_kdbx\n\nConfli...
108889,1313350,Rajawali/RajawaliExamples,5de49a7c87,4.0,1.0,Merge remote-tracking branch 'remotes/origin/a...


### Option 1: devs1 >= 2 and devs2 >=2 and merge branch in commit_message

In [4]:
def is_branch_merge1(devs1, devs2, commit_message):
    return devs1 >=2 and devs2 >=2 and 'merge branch' in str(commit_message).strip().lower()

option1 = sample.copy()
option1['merge_branch'] = df.apply(lambda x: is_branch_merge1(x['devs1'], x['devs2'], x['commit_message']), axis=1)
display(option1)

Unnamed: 0,chunk_id,project,merge_SHA,devs1,devs2,commit_message,merge_branch
139384,1405151,cismet/cismet-commons,02f552a7ad,1.0,1.0,Merge remote-tracking branch 'origin/pw-enc-op...,False
92319,1162628,forcedotcom/SalesforceMobileSDK-Android,683b0f306a,3.0,2.0,Merge remote-tracking branch 'upstream/master'...,False
51394,1026140,BetterShop/BetterShop,ac4fcac8d4,1.0,1.0,Merge branch 'master' of https://github.com/lo...,False
43477,998523,droolsjbpm/guvnor,76c10c2fa7,1.0,4.0,Merge remote branch 'remotes/upstream/master' ...,False
89449,1154752,iron-io/iron_mq_java,f6b88e2b6c,3.0,1.0,Merge branch 'master' into v3\n\nConflicts:\n\...,False
98837,1226168,ladriangb/jSwitch-base-src,a238af2139,1.0,1.0,18-032012,False
45380,1005133,dropwizard/dropwizard,841f07cd8c,20.0,1.0,Merge branch 'dynamic-ports' of https://github...,False
26976,941598,aptana/studio3-php,bf7b549fd4,1.0,8.0,Merge branch 'development' into debugger,False
2116,781374,bpellin/keepassdroid,9141ffc4fc,1.0,3.0,Merge branch 'master' into save_kdbx\n\nConfli...,False
108889,1313350,Rajawali/RajawaliExamples,5de49a7c87,4.0,1.0,Merge remote-tracking branch 'remotes/origin/a...,False


### Option 2: merge[d]  branch [] [i|o]nto in commit_message or (devs1 >= 2 and devs2 >=2)

In [5]:
pattern1 = re.compile(r"[m|M]erge[d]? [b|B]ranch (.*) [i|o]nto", re.IGNORECASE)

def is_branch_merge2(devs1, devs2, commit_message):
    commit_message = str(commit_message)
    has_message_evidence = pattern1.search(commit_message) != None 
    if has_message_evidence or (int(devs1) >= 2 and int(devs2) >=2):
        return True
    return False
    
option2 = sample.copy()
option2['merge_branch'] = df.apply(lambda x: is_branch_merge2(x['devs1'], x['devs2'], x['commit_message']), axis=1)
display(option2)

Unnamed: 0,chunk_id,project,merge_SHA,devs1,devs2,commit_message,merge_branch
139384,1405151,cismet/cismet-commons,02f552a7ad,1.0,1.0,Merge remote-tracking branch 'origin/pw-enc-op...,False
92319,1162628,forcedotcom/SalesforceMobileSDK-Android,683b0f306a,3.0,2.0,Merge remote-tracking branch 'upstream/master'...,True
51394,1026140,BetterShop/BetterShop,ac4fcac8d4,1.0,1.0,Merge branch 'master' of https://github.com/lo...,False
43477,998523,droolsjbpm/guvnor,76c10c2fa7,1.0,4.0,Merge remote branch 'remotes/upstream/master' ...,False
89449,1154752,iron-io/iron_mq_java,f6b88e2b6c,3.0,1.0,Merge branch 'master' into v3\n\nConflicts:\n\...,True
98837,1226168,ladriangb/jSwitch-base-src,a238af2139,1.0,1.0,18-032012,False
45380,1005133,dropwizard/dropwizard,841f07cd8c,20.0,1.0,Merge branch 'dynamic-ports' of https://github...,True
26976,941598,aptana/studio3-php,bf7b549fd4,1.0,8.0,Merge branch 'development' into debugger,True
2116,781374,bpellin/keepassdroid,9141ffc4fc,1.0,3.0,Merge branch 'master' into save_kdbx\n\nConfli...,True
108889,1313350,Rajawali/RajawaliExamples,5de49a7c87,4.0,1.0,Merge remote-tracking branch 'remotes/origin/a...,False
