In [15]:
import pandas as pd
import numpy as np
import os.path as osp
import ast
import utils.helpers as hpr

In [11]:
df = hpr.combine_openstack_data(filter_merged=False)

In [43]:
len(df[df['status']=='MERGED'])/len(df)

0.7905754033547976

In [33]:
df_all_dependencies = pd.read_csv("./Files/all_dependencies.csv")
df_all_dependencies = df_all_dependencies[(
    df_all_dependencies.is_source_bot==0)&
    (df_all_dependencies.is_target_bot==0)
]

### Cross/within-project abandoned changes

In [39]:
# ALL MERGED CHANGES
all_merged_changes = df.loc[df.status=='MERGED', 'number'].tolist()

# ALL ABANDONED CHANGES
all_abandoned_changes = df.loc[df.status=='ABANDONED', 'number'].tolist()

# ALL DEPENDENT CHANGES
all_depedent_changes = set(hpr.flatten_list(df_all_dependencies[['Source', 'Target']].values))

# ALL NON-DEPENDENT CHANGES
all_non_depedent_changes = df.loc[~df['number'].isin(all_depedent_changes), 'number'].tolist()

# ABANDONED NON-DEPENDENT CHANGES
abandoned_non_depedent_changes = df.loc[(df['status']=='ABANDONED')&df['number'].isin(all_non_depedent_changes), 'number'].tolist()

# ALL CROSS-PROJECT CHANGES
all_cro_pro_cha = set(hpr.flatten_list(df_all_dependencies.loc[df_all_dependencies.is_cross=='Cross', ['Source', 'Target']].values))

# ALL WITHIN-PROJECT CHANGES
all_wit_pro_cha = [id for id in all_depedent_changes if id not in all_cro_pro_cha]

# MERGED CROSS-PROJECT CHANGES
mer_cros_pro_cha = df.loc[
    (df['status']=='MERGED')&
    df['number'].isin(all_cro_pro_cha), 
'number'].tolist()

# ABAMDONED CROSS-PROJECT CHANGES
aba_cros_pro_cha = df.loc[
    (df['status']=='ABANDONED')&
    df['number'].isin(all_cro_pro_cha), 
'number'].tolist()

# MERGED WIHTIN-PROJECT CHANGES
mer_wit_pro_cha = df.loc[
    (df['status']=='MERGED')&
    df['number'].isin(all_wit_pro_cha), 
'number'].tolist()

# ABAMDONED CROSS-PROJECT CHANGES
aba_wit_pro_cha = df.loc[
    (df['status']=='ABANDONED')&
    df['number'].isin(all_wit_pro_cha), 
'number'].tolist()

In [41]:
len(aba_wit_pro_cha)/len(all_wit_pro_cha)

0.07908998181737549

### Cross/within-team abandoned changes

In [42]:
# ALL CROSS-PROJECT CHANGES
all_cro_team_cha = set(hpr.flatten_list(df_all_dependencies.loc[df_all_dependencies.is_cross_team=='Cross', ['Source', 'Target']].values))

# ALL WITHIN-PROJECT CHANGES
all_wit_team_cha = set(hpr.flatten_list(df_all_dependencies.loc[df_all_dependencies['is_cross_team']=='Within', ['Source', 'Target']].values))
all_wit_team_cha = [id for id in all_wit_team_cha if id not in all_cro_team_cha]

# MERGED CROSS-PROJECT CHANGES
mer_cros_team_cha = df.loc[
    (df['status']=='MERGED')&
    df['number'].isin(all_cro_team_cha), 
'number'].tolist()

# ABAMDONED CROSS-PROJECT CHANGES
aba_cros_team_cha = df.loc[
    (df['status']=='ABANDONED')&
    df['number'].isin(all_cro_team_cha), 
'number'].tolist()

# MERGED WIHTIN-PROJECT CHANGES
mer_wit_team_cha = df.loc[
    (df['status']=='MERGED')&
    df['number'].isin(all_wit_team_cha), 
'number'].tolist()

# ABAMDONED CROSS-PROJECT CHANGES
aba_wit_team_cha = df.loc[
    (df['status']=='ABANDONED')&
    df['number'].isin(all_wit_team_cha), 
'number'].tolist()

In [44]:
len(aba_wit_team_cha)/len(all_wit_team_cha)

0.09710039098967528

### Cross/within-team abandoned chains of dependent changes

In [None]:
df_os_teams = pd.read_csv(osp.join('.', 'RQs', 'PQ', 'Files', 'inv_pro_team.csv'))

In [25]:
def identify_projects(path):
    return df.loc[df['number'].isin(path), 'project'].tolist()

def identify_teams(projects):
    teams = df_os_teams.loc[df_os_teams['project'].isin(projects), 'team'].tolist()
    return teams

def is_cross_team(row):
    team_size = len(row['teams'])
    project_size = len(row['projects'])
    team_size_red = len(set(row['teams']))
    project_size_red = len(set(row['projects']))
    if team_size_red > 1:
        return 'Cross'
    elif team_size_red == 1 and ((project_size_red == team_size_red) | (team_size == project_size)):
        return 'Within'
    else:
        return 'None'

In [None]:
df_chains = pd.read_csv(osp.join('.', 'Files', 'Number', 'extended_merged.csv'))
df_chains['Path'] = df_chains['Path'].map(ast.literal_eval)

In [None]:
df_chains['projects'] = df_chains['Path'].map(identify_projects)

In [22]:
df_chains['teams'] = df_chains['projects'].map(identify_teams)

In [50]:
df_chains['team_size'] = df_chains['teams'].map(lambda x: len(set(x)))
df_chains['proj_size'] = df_chains['projects'].map(lambda x: len(set(x)))

In [27]:
df_chains['is_cross_team'] = df_chains.apply(is_cross_team, axis=1)

In [44]:
len(df_chains[
    (df_chains['all_abandoned']==1)&
    (df_chains['is_cross_team']=='Cross')
])/len(df_chains[(df_chains['is_cross_team']=='Cross')])

0.10741602926504822

In [58]:
df_chains.to_csv(osp.join('.', 'Files', 'Number', 'extended_merged.csv'), index=None)

### Choosing a representative sample for manual analysis (i.e., abandoned changes)

In [None]:
repr_samp = df_all_dependencies[
    (df_all_dependencies.cross_project==1)&
    (df_all_dependencies.is_owner_bot_source==0)&
    (df_all_dependencies.is_owner_bot_target==0)&
    (((df_all_dependencies.status_source=='ABANDONED')&(df_all_dependencies.status_target=='MERGED'))|
    ((df_all_dependencies.status_source=='MERGED')&(df_all_dependencies.status_target=='ABANDONED')))
]
repr_samp["abandoned_number"] = repr_samp.apply(lambda row: row['Source'] if row['status_source']=='ABANDONED' else row['Target'], axis=1)
repr_samp = repr_samp.abandoned_number.unique()
repr_samp = repr_samp[np.random.choice(len(repr_samp), size=364, replace=False)]

In [241]:
pd.DataFrame({'Change': repr_samp}).to_csv(osp.join('.', 'Files', 'aba_cha_rep_sam.csv'), index=None)