# RQ3 - How are software artifacts used in the software development?

In [None]:
import pandas as pd
import json
from upsetplot import plot
from matplotlib import pyplot
from upsetplot import from_contents
from matplotlib import pyplot as plt
from upsetplot import UpSet
from helpers import *

int_types_parser = {
 '':'',
 'id':'id',
 'cli (standard input)': 'cli',
 'api' : 'api',
 'api (plugin for github)': 'api',
 'gui (plugin for another automation artifact)': 'gui',
 'gui': 'gui',
 'api (integrates with ci)': 'api',
 'gui (plugin for ide)': 'gui'
}

path = '../data/studies-and-artifacts.json'

artifacts_df = pd.read_json(path, orient='index')

tdma_list = ['repayment',
            'monitoring',
            'measurement',
            'identification',
            'communication',
            'prevention',
            'priorization',
            'representation/documentation']

td_types_list = ['code',
                 'design',
                 'architectural',
                 'test',
                 'documentation',
                 'requirements',
                 'build',
                 'infrastructure',
                 'versioning',
                 'satd']

### RQ3.1 - What triggers are used to initiate the automation process?

In [None]:
#Building dataset with triggers, types, and activities
col = ['id','trigger']
col.extend(tdma_list)
col.extend(td_types_list)

data = []
for i in artifacts_df.index:
  row = [i, artifacts_df['trigger'][i]]

  for tda in tdma_list:
    if tda in artifacts_df['tdma'][i]:
      row.append(1)
    else:
      row.append(0)

  for tdt in td_types_list:
    if tdt in artifacts_df['td_type'][i]:
      row.append(1)
    else:
      row.append(0)

  data.append(row)

tdt_tdma_triggers = pd.DataFrame(data, columns=col)

In [None]:
# Trigger
artifacts_df.loc[:,['name','trigger']].groupby(['trigger']).count()

In [None]:
# Software Type x Trigger
artifacts_df.loc[:,['name','trigger', 'type']].groupby(['trigger','type']).count()

In [None]:
# Software Type x Trigger x Could Integrate?
artifacts_df.loc[:,['name','type', 'can-integrated', 'trigger']].groupby(['type','trigger', 'can-integrated']).count()

In [None]:
# Software Type x Trigger x Could Integrate?
artifacts_df.loc[:,['name','type', 'devlopment-activity-mapping', 'trigger']].groupby(['devlopment-activity-mapping','trigger']).count()

In [None]:
# Software Type x Trigger x Could Integrate?
artifacts_df.loc[artifacts_df['is-integrated'] == 'Yes', ['name','type', 'devlopment-activity-mapping']].groupby(['devlopment-activity-mapping']).count()

### RQ3.2 - Can the artifacts be integrated?

In [None]:
artifacts_df.loc[:,['name', 'is-integrated']].groupby(['is-integrated']).count()

In [None]:
# Trigger x Is Integrated?

artifacts_df.loc[:,['name','trigger', 'is-integrated']].groupby(['trigger', 'is-integrated']).count()

In [None]:
# Trigger x Could Integrated?

artifacts_df.loc[:,['name','trigger', 'can-integrated']].groupby(['trigger', 'can-integrated']).count()

In [None]:
artifacts_df.loc[:,['name', 'can-integrated']].groupby(['can-integrated']).count()

In [None]:
#Build Dataset for Uppset interfaces
columns = ['id']
columns.extend(['api', 'gui', 'cli'])
rows = []
for i in artifacts_df.index:
  r = []
  r.append(i)

  if artifacts_df['interface-type'][i] != 'multiple':
    api = True if artifacts_df['interface-type'][i] == "api" else False
    gui = True if artifacts_df['interface-type'][i] == "gui" else False
    cli = True if artifacts_df['interface-type'][i] == "cli" else False
  else:
    api = True if len([x for x in artifacts_df['interface-subtype'][i] if 'api' in x]) else False
    gui = True if len([x for x in artifacts_df['interface-subtype'][i] if 'gui' in x]) else False
    cli = True if len([x for x in artifacts_df['interface-subtype'][i] if 'cli' in x]) else False

  r.extend([api,gui,cli])
  rows.append(r)

uppset_interfaces_df = pd.DataFrame(rows, columns=columns)


artifacts_interfaces = {}
for i in ['api', 'gui', 'cli']:
    ids = uppset_interfaces_df.loc[uppset_interfaces_df[i] == True]['id'].to_list()
    artifacts_interfaces[i] = ids
plot_upset(items_dict = artifacts_interfaces, element_size = 50, color = 'grey', output_name = 'rq3-upset-interfaces')

In [None]:
#Build dataset with interfaces used during integrtaions
artifacts_is_integrated = artifacts_df.loc[(artifacts_df['is-integrated'] == 'Yes') & (artifacts_df['interface-type'] != 'gui')]
integrations_types_existing = to_1D(artifacts_is_integrated['is-integrated-using-interface-subtype']).unique().tolist()
integrations_types_existing.sort()
columns = ['id']
columns.extend(integrations_types_existing)

rows = []

for i in artifacts_is_integrated.index:
  r = []
  r.append(i)
  for a in integrations_types_existing:
    if a in artifacts_is_integrated['is-integrated-using-interface-subtype'][i]:
      r.append(True)
    else:
      r.append(False)
  rows.append(r)

uppset_is_integrated_using_interface_subtype = pd.DataFrame(rows, columns=columns)

artifacts_interface_exisiting = {}
for i in uppset_is_integrated_using_interface_subtype:
  parsed_type = int_types_parser[i]
  ids = uppset_is_integrated_using_interface_subtype.loc[uppset_is_integrated_using_interface_subtype[i] == True]['id'].to_list()
  artifacts_interface_exisiting = add_list_to_dict(dict_variable = artifacts_interface_exisiting,
                                         key = parsed_type,
                                         items_list = uppset_is_integrated_using_interface_subtype.loc[uppset_is_integrated_using_interface_subtype[i] == True]['id'].to_list())

artifacts_interface_exisiting.pop('id')
plot_upset(items_dict = artifacts_interface_exisiting, element_size = 50, color = 'grey', output_name = 'rq3-upset-existing-integration')

In [None]:
#Build dataset with interfaces for possible integration
artifacts_can_integrated = artifacts_df.loc[(artifacts_df['can-integrated'] == 'Yes')]
integrations_types_possible = to_1D(artifacts_can_integrated['interface-subtype']).unique().tolist()
integrations_types_possible.sort()
columns = ['id']
columns.extend(integrations_types_possible)

rows = []

for i in artifacts_can_integrated.index:
  r = []
  r.append(i)
  for a in integrations_types_possible:
    if a in artifacts_can_integrated['interface-subtype'][i] and \
       a not in artifacts_can_integrated['is-integrated-using-interface-subtype'][i]: 
        r.append(True)
    else:
      r.append(False)
  rows.append(r)

uppset_interface_subtype = pd.DataFrame(rows, columns=columns)

artifacts_interface = {}
for i in uppset_interface_subtype:
  parsed_type = int_types_parser[i]
  artifacts_interface = add_list_to_dict(dict_variable = artifacts_interface,
                                         key = parsed_type,
                                         items_list = uppset_interface_subtype.loc[uppset_interface_subtype[i] == True]['id'].to_list())

artifacts_interface.pop('id')
plot_upset(items_dict = artifacts_interface, element_size = 50, color = 'grey', output_name = 'rq3-upset-possible-integration')