# RQ1 - What software artifacts are available for the automation of TD management?

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


path = '../data/Step1_artifacts.json'

artifacts_df = pd.read_json(path, orient='index')

tdma_list = ['repayment',
            'monitoring',
            'measurement',
            'identification',
            'communication',
            'prevention',
            'priorization',
            'representation/documentation']

td_types_list = ['code',
                 'design',
                 'architectural',
                 'test',
                 'documentation',
                 'requirements',
                 'build',
                 'infrastructure',
                 'versioning',
                 'satd']

def to_1D(series):
 return pd.Series([x for _list in series for x in _list])

### RQ1.1 - What is the type of software of the artifact (e.g., script, tool, library)?

In [None]:
#Software Types
artifacts_df.loc[:,['type', 'name']].groupby('type').count().sort_values(by='name', ascending=False)

In [None]:
# Number of plugins that are versions of tools
values = artifacts_df.loc[:,['type', 'name']].groupby('name').count()
len(values.loc[values['type'] > 1])

### RQ1.2 - What are the inputs and outputs of the artifact?

In [None]:
input_info = to_1D(artifacts_df['input_info']).value_counts()

width = 0.25
x_pos = [0 + (2* x * width) for x in range(len(input_info))]

plt.rcParams["figure.figsize"] = (5,4)
fig, ax = plt.subplots(layout='tight')
ax.bar(x_pos, height=input_info, width = width, color = 'grey')

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(False)

scale = 30

ax.set_ylim([0,scale])
ax.set_yticks([])

for index, value in enumerate(input_info):
    if value > 100:
        factor = 0.5
    elif value > 10:
        factor = 0.32
    else:
        factor = 0.2
        
    ax.text(x_pos[index] - factor * width, value + 1 if value <= scale else scale + 1,
             str(value))

plt.xticks(x_pos, input_info.index, rotation=30, ha='right')
plt.savefig('../figures/rq1-inuput-info.pdf', bbox_inches='tight')

In [None]:
input_format = to_1D(artifacts_df['input_fmt']).value_counts()
width = 0.25
x_pos = [0 + (2* x * width) for x in range(len(input_format))]

plt.rcParams["figure.figsize"] = (5,4)
fig, ax = plt.subplots(layout='tight')
ax.bar(x_pos, height=input_format, width = width, color = 'grey')

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(False)

scale = 30

ax.set_ylim([0,scale])
ax.set_yticks([])

for index, value in enumerate(input_format):
    if value > 100:
        factor = 0.5
    elif value > 10:
        factor = 0.32
    else:
        factor = 0.2
        
    ax.text(x_pos[index] - factor * width, value + 1 if value <= scale else scale + 1,
             str(value))

    
fmts = ['source code', 'json', 'csv', 'uml', 'graphml', 'sql', 'issues', 'xml']    
plt.xticks(x_pos, fmts, rotation=30, ha='right')
plt.savefig('../figures/rq1-inuput-fmt.pdf', bbox_inches='tight')

In [None]:
#Output Info
output_info = to_1D(artifacts_df['output_info']).value_counts()
pd.DataFrame(output_info.items(), columns=['output-info','n-artifacts']).to_csv('../data/rq1-output-infos.csv')
print(output_info)

In [None]:
#Output Formats
output_fmts = to_1D(artifacts_df['output_fmt']).value_counts()
pd.DataFrame(output_fmts.items(), columns=['output-fmt','n-artifacts']).to_csv('../data/rq1-output-fmts.csv')
print(output_fmts)

### RQ1.3 - Which is the maturity level of the software artifact?

In [None]:
# Maturity
artifacts_df.loc[:,['maturity', 'name']].groupby(['maturity']).count()

In [None]:
#Maturity x TD Type
artifacts_df.loc[:,['maturity', 'name', 'type']].groupby(['type', 'maturity']).count()

In [None]:
# Maturity x Input Format
input_format = set([x for i in artifacts_df.index for x in artifacts_df['input_info'][i]])

In [None]:
for x in input_format:
    maturity = {
    'Examples':0,
    'Academic Studies':0,
    'Industrial Studies':0,
    'Industrial Applications':0
    }
    
    for i in artifacts_df.index:
        if x in artifacts_df['input_info'][i]:
            maturity[artifacts_df['maturity'][i]] += 1
    print(f'{x} : {maturity}')