## Required imports

In [1]:
# For plots.
from tkinter.messagebox import NO
from pip import main

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import plotly.io as pio
pio.kaleido.scope.mathjax = None
pio.templates.default = "gridon"

WIDTH = 1000
HALF_WIDTH = WIDTH/2
QUARTER_WIDTH = WIDTH/4
HEIGHT = 1000
HALF_HEIGHT = HEIGHT/2
QUARTER_HEIGHT = HEIGHT/4

# For csv reading.
import pandas as pd

# For maths.
import numpy as np

# For fuzzy markers
import random

# Other constants
NOTASSIGNED = 'Not assigned'

## Functions to retrieve journal and conference rankings

In [2]:
def retrieve_sjr(row, dic_sjr_dfs):
    if row['Publication Source'] != 'Journal':
        return None
    title = row['Publication Title']
    sourceID = row['SourceID']
    year = row['Publication Year']
    if (year == 2022):
        return NOTASSIGNED

    print(f'Title: {title}\nSourceID: {sourceID}')
    df_sjr = dic_sjr_dfs[f'{year}']

    df_title_contains = df_sjr[df_sjr['Sourceid'] == sourceID]

    if df_title_contains.empty:
        print('Empty!')
        return NOTASSIGNED

    print(f'Article from year {year}')

    if pd.isna(df_title_contains.iloc[0]['SJR']) or df_title_contains.iloc[0]['SJR'] == '-':
        return NOTASSIGNED
    print(df_title_contains.iloc[0]['SJR'])


    return df_title_contains.iloc[0]['SJR']

def retrieve_quartile(row, dic_quartiles_dfs):
    if row['Publication Source'] != 'Journal':
        return None
    title = row['Publication Title']
    sourceID = row['SourceID']
    year = row['Publication Year']
    if (year == 2022):
        return NOTASSIGNED

    print(f'Title: {title}\nSourceID: {sourceID}')
    df_quartiles = dic_quartiles_dfs[f'{year}']

    df_title_contains = df_quartiles[df_quartiles['Sourceid'] == sourceID]

    if df_title_contains.empty:
        print('Empty!')
        return NOTASSIGNED

    print(f'Article from year {year}')
    if pd.isna(df_title_contains.iloc[0]['SJR Best Quartile']) or df_title_contains.iloc[0]['SJR Best Quartile'] == '-':
        return NOTASSIGNED
    print(df_title_contains.iloc[0]['SJR Best Quartile'])


    return df_title_contains.iloc[0]['SJR Best Quartile']

def retrieve_conference(row, dic_conferences_dfs):
    if row['Publication Source'] != 'Conference':
        return None

    title = row['Publication Title']
    conferenceID = row['ConferenceID']
    year = row['Publication Year']
    if (year == 2022):
        return NOTASSIGNED

    print(f'Title: {title}\ConferenceID: {conferenceID}')
    df_conferences = dic_conferences_dfs[f'{year}']

    df_title_contains = df_conferences[df_conferences[0] == conferenceID]

    if df_title_contains.empty:
        print('Empty!')
        return NOTASSIGNED

    print(f'Article from year {year}')
    if pd.isna(df_title_contains.iloc[0][4]) or df_title_contains.iloc[0][4] == '-':
        return NOTASSIGNED
    print(df_title_contains.iloc[0][4])

    return df_title_contains.iloc[0][4]

# Retrieve jornal rankings from scimagojr databases

In [3]:
df_quality = pd.read_csv('QualityChecklists.csv', sep=',', decimal='.', converters = {'ISIC Section': str, 'ISIC Division': str, 'ISIC Group': str, 'ISIC Class': str})

dic_scimagojr_dfs = {}
dic_scimagojr_dfs['2021'] = pd.read_csv(
    'scimagojr 2021.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2020'] = pd.read_csv(
    'scimagojr 2020.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2019'] = pd.read_csv(
    'scimagojr 2019.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2018'] = pd.read_csv(
    'scimagojr 2018.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2017'] = pd.read_csv(
    'scimagojr 2017.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2016'] = pd.read_csv(
    'scimagojr 2016.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2015'] = pd.read_csv(
    'scimagojr 2015.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2014'] = pd.read_csv(
    'scimagojr 2014.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2013'] = pd.read_csv(
    'scimagojr 2013.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2012'] = pd.read_csv(
    'scimagojr 2012.csv', sep=';', decimal=',', quotechar="\"")
dic_scimagojr_dfs['2011'] = pd.read_csv(
    'scimagojr 2011.csv', sep=';', decimal=',', quotechar="\"")

df_quality['SJCR Quartile'] = df_quality.apply(
    lambda row: retrieve_quartile(row, dic_scimagojr_dfs), axis=1)

df_quality['SJR Indicator'] = df_quality.apply(
    lambda row: retrieve_sjr(row, dic_scimagojr_dfs), axis=1)

Title: IEEE Transactions on Quantum Engineering
SourceID: nan
Empty!
Title: Neural Computing and Applications
SourceID: 24800.0
Article from year 2019
Q1
Title: Energies
SourceID: 62932.0
Article from year 2016
Q1
Title: International Journal of Intelligent Systems and Applications
SourceID: 21101021990.0
Empty!
Title: International Journal of Systems Assurance Engineering and Management
SourceID: 19700177002.0
Article from year 2020
Q3
Title: Applied Thermal Engineering
SourceID: 13688.0
Article from year 2020
Q1
Title: International Transactions on Electrical Energy Systems
SourceID: 21100241220.0
Article from year 2021
Q2
Title: Journal of Cloud Computing
SourceID: 21100383744.0
Article from year 2020
Q2
Title: Energy Conversion and Management
SourceID: 29372.0
Article from year 2020
Q1
Title: IEEE Transactions on Systems, Man, and Cybernetics: Systems
SourceID: 21100262320.0
Article from year 2013
Q1
Title: Frontiers in ICT
SourceID: 21100893766.0
Article from year 2017
Title: Fron

## Retrieve CORE rankings from CORE databases

In [4]:
dic_CORE_dfs = {}
dic_CORE_dfs['2021'] = pd.read_csv(
    'CORE 2021.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2020'] = pd.read_csv(
    'CORE 2020.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2019'] = pd.read_csv(
    'CORE 2020.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2018'] = pd.read_csv(
    'CORE 2018.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2017'] = pd.read_csv(
    'CORE 2017.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2016'] = pd.read_csv(
    'CORE 2017.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2015'] = pd.read_csv(
    'CORE 2017.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2014'] = pd.read_csv(
    'CORE 2014.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2013'] = pd.read_csv(
    'CORE 2013.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2012'] = pd.read_csv(
    'CORE 2013.csv', sep=',', quotechar="\"", header=None)
dic_CORE_dfs['2011'] = pd.read_csv(
    'CORE 2013.csv', sep=',', quotechar="\"", header=None)

df_quality['CORE Ranking'] = df_quality.apply(
    lambda row: retrieve_conference(row, dic_CORE_dfs), axis=1)

Title: Proceedings of the Genetic and Evolutionary Computation Conference Companion\ConferenceID: 556.0
Article from year 2018
A
Title: 15th ACM Conference on Recommender Systems, RecSys 2021, September 27, 2021  -  October 1, 2021\ConferenceID: 28.0
Article from year 2021
A
Title: 2020 International Conference for Emerging Technology, INCET 2020, June 5, 2020  -  June 7, 2020\ConferenceID: nan
Empty!
Title: nan\ConferenceID: nan
Empty!
Title: nan\ConferenceID: nan
Empty!
Title: nan\ConferenceID: nan
Empty!
Title: International Conference on Renewable Power Generation (RPG 2015)\ConferenceID: nan
Empty!
Title: 2019 IEEE Congress on Evolutionary Computation (CEC)\ConferenceID: 2061.0
Article from year 2019
B
Title: IEEE-International Conference On Advances In Engineering, Science And Management (ICAESM -2012)\ConferenceID: nan
Empty!
Title: 2020 Chinese Automation Congress (CAC)\ConferenceID: nan
Empty!
Title: 2019 2nd World Conference on Mechanical Engineering and Intelligent Manufactu

In [5]:
df = df_quality.groupby(by=['Publication Source']).size().reset_index(name='# of studies')

fig = px.bar(data_frame = df, x='Publication Source', y='# of studies', text_auto=True)
fig.write_image('searchandstudyselectionvalidation/validation_journals_and_conferences.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

## Validate study selection with distributions of studies according to their rankings

In [6]:
df_journals = df_quality[df_quality['Publication Source'] == 'Journal'] 
print(df_journals.shape)

RANGE_X = [0,3.2]

fig = px.histogram(data_frame = df_journals, x='SJR Indicator', text_auto=True, nbins=16, range_x=RANGE_X)
fig.add_vline(x=1, line_dash="dash", line_color="red")
fig.update_layout(bargap=0.2)
fig.write_image('searchandstudyselectionvalidation/journals_histogram_sjr.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

(111, 95)


In [7]:
df_journals = df_quality[df_quality['Publication Source'] == 'Journal'] 
print(df_journals.shape)

fig = px.box(data_frame = df_journals, x='SJR Indicator', points='all',range_x=RANGE_X)
fig.add_vline(x=1, line_dash="dash", line_color="red")
fig.write_image('searchandstudyselectionvalidation/journals_boxplot_sjr.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

(111, 95)


In [8]:
df_journals = df_quality[df_quality['Publication Source'] == 'Journal'] 

df = df_journals.groupby(by=['SJCR Quartile'], dropna=False).size().reset_index(name='# of studies')

print(df_journals.shape)
print(df)

fig = px.bar(data_frame = df, x='SJCR Quartile', y='# of studies', text_auto=True, width=WIDTH, height=QUARTER_HEIGHT)
fig.write_image('searchandstudyselectionvalidation/journals_bar_sjcr_quartile.pdf')
fig.show()

(111, 95)
  SJCR Quartile  # of studies
0  Not assigned            17
1            Q1            60
2            Q2            24
3            Q3             8
4            Q4             2


In [9]:
df_conferences = df_quality[df_quality['Publication Source'] == 'Conference'] 
print(df_conferences.shape)

df = df_conferences.groupby(by=['CORE Ranking']).size().reset_index(name='# of studies')

fig = px.bar(data_frame = df, x='CORE Ranking', y='# of studies', text_auto=True)
fig.write_image('searchandstudyselectionvalidation/conference_bar_core.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

(36, 95)


## Quality Assessment

In [10]:
fig = px.box(data_frame = df_quality, x='SSTOTAL', points='all',range_x=[0,1])
fig.write_image('qualityassessment/sstotal_boxplot.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

mean = np.mean(df_quality['SSTOTAL'])
print(f'mean: {mean}')

std = np.std(df_quality['SSTOTAL'])
print(f'std: {std}')

variance = np.var(df_quality['SSTOTAL'])
print(f'variance: {variance}')

min = np.min(df_quality['SSTOTAL'])
print(f'min: {min}')

max = np.max(df_quality['SSTOTAL'])
print(f'max: {max}')

median = np.median(df_quality['SSTOTAL'])
print(f'median: {median}')


mean: 0.6130765953355707
std: 0.1481334441050715
variance: 0.021943517262430344
min: 0.142857143
max: 0.875
median: 0.625


In [11]:
fig = px.box(data_frame = df_quality, x='SRTOTAL', points='all',range_x=[0,1])
fig.write_image('qualityassessment/srtotal_boxplot.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

mean = np.mean(df_quality['SRTOTAL'])
print(f'mean: {mean}')

std = np.std(df_quality['SRTOTAL'])
print(f'std: {std}')

variance = np.var(df_quality['SRTOTAL'])
print(f'variance: {variance}')

min = np.min(df_quality['SRTOTAL'])
print(f'min: {min}')

max = np.max(df_quality['SRTOTAL'])
print(f'max: {max}')

median = np.median(df_quality['SRTOTAL'])
print(f'median: {median}')

mean: 0.6661971138724835
std: 0.10162169300183199
variance: 0.010326968488558588
min: 0.230769231
max: 0.866666667
median: 0.666666667


In [12]:
df_fuzzed = df_quality.copy()

df_fuzzed['SSTOTAL'] = df_fuzzed.apply(
    lambda row: row['SSTOTAL'] + random.uniform(-0.02, 0.02), axis=1)

df_fuzzed['SRTOTAL'] = df_fuzzed.apply(
    lambda row: row['SRTOTAL'] + random.uniform(-0.02, 0.02), axis=1)

fig = px.scatter(df_fuzzed, x='SSTOTAL', y='SRTOTAL')

fig.update_xaxes(range=[0, 1], title="Quality of Study")
fig.update_yaxes(range=[0, 1], title="Quality of Reporting")

fig.add_shape(type='line',
              x0=0,
              y0=0,
              x1=1,
              y1=1,
              line=dict(color='red',),
              xref='x',
              yref='y'
              )

fig.write_image('qualityassessment/sstotal_vs_srtotal.pdf', width=WIDTH, height=HEIGHT)
fig.show()


fig = px.scatter(x=df_quality['SSTOTAL'], y=df_quality['SJR Indicator'])
fig.update_xaxes(range=[0, 1], title="Quality of Study")
fig.update_yaxes(title="SJR Indicator")
fig.write_image('qualityassessment/sstotal_vs_sjr.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

fig = px.scatter(x=df_quality['SRTOTAL'], y=df_quality['SJR Indicator'])
fig.update_xaxes(range=[0, 1], title="Quality of Reporting")
fig.update_yaxes(title="SJR Indicator")
fig.write_image('qualityassessment/srtotal_vs_sjr.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

In [13]:
correlate = np.correlate(df_quality['SSTOTAL'], df_quality['SRTOTAL'])
print(f'correlate: {correlate}')

correlate: [61.58031064]


In [14]:
from scipy.stats import spearmanr, pearsonr

cor, pval = spearmanr(a=df_quality['SSTOTAL'], b=df_quality['SRTOTAL'], alternative='greater')

print(f'cor: {cor}\npval: {pval}')

cor: 0.3114409521572176
pval: 5.5269888923878485e-05


In [15]:
def not_assigned_to_int(row):
    if type(row['SJR Indicator']) == str:
        return -1
    return row['SJR Indicator']

df_journals['SJR Indicator'] = df_journals.apply (lambda row: not_assigned_to_int(row), axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [16]:
def core_ranking_to_int(row):
    if row['CORE Ranking'] == 'A':
        return 1
    if row['CORE Ranking'] == 'B':
        return 2
    if row['CORE Ranking'] == 'C':
        return 3
    return 4

df_conferences['int_core_ranking'] = df_conferences.apply (lambda row: core_ranking_to_int(row), axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [17]:
type(df_journals['SJR Indicator'].iloc[0])
df_journals['SJR Indicator'].iloc[0]

-1.0

In [18]:
from scipy.stats import spearmanr

cor, pval = spearmanr(a=df_journals['SSTOTAL'], b=df_journals['SJR Indicator'], alternative='greater')

print(f'cor: {cor}\npval: {pval}')

cor: 0.1115339647775459
pval: 0.12192419650316626


In [19]:
from scipy.stats import spearmanr

cor, pval = spearmanr(a=df_journals['SRTOTAL'], b=df_journals['SJR Indicator'], alternative='greater')

print(f'cor: {cor}\npval: {pval}')

cor: 0.21574777139574017
pval: 0.011478013561074661


In [20]:
from scipy.stats import spearmanr

cor, pval = spearmanr(a=df_conferences['SSTOTAL'], b=df_conferences['int_core_ranking'], alternative='less')

print(f'cor: {cor}\npval: {pval}')

cor: -0.3770284813643129
pval: 0.011703861911782952


In [21]:
from scipy.stats import spearmanr

cor, pval = spearmanr(a=df_conferences['SRTOTAL'], b=df_conferences['int_core_ranking'], alternative='less')

print(f'cor: {cor}\npval: {pval}')

cor: -0.4044414647345252
pval: 0.007211456781739799


In [22]:
fig = px.box(x=df_conferences['SSTOTAL'], y=df_conferences['CORE Ranking'], points='all', category_orders={'Core Ranking': ['A', 'B', 'C', 'National: Poland', 'Not assigned']})
fig.update_xaxes(range=[0, 1], title="Quality of Study")
fig.update_yaxes(title="CORE Ranking", categoryorder='category descending')
fig.update_layout(margin={"l": 120})
fig.write_image('qualityassessment/sstotal_vs_core.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

fig = px.box(x=df_conferences['SRTOTAL'], y=df_conferences['CORE Ranking'], points='all', category_orders={'Core Ranking': ['A', 'B', 'C', 'National: Poland', 'Not assigned']})
fig.update_xaxes(range=[0, 1], title="Quality of Reporting")
fig.update_yaxes(title="CORE Ranking", categoryorder='category descending')
fig.update_layout(margin={"l": 120})
fig.write_image('qualityassessment/srtotal_vs_core.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

In [23]:
fig = px.box(x=df_journals['SSTOTAL'], y=df_journals['SJCR Quartile'], points='all', category_orders={'SJCR Quartile': ['Q1', 'Q2', 'Q3', 'Q4', 'Not assigned']})
fig.update_xaxes(range=[0, 1], title="Quality of Study")
fig.update_yaxes(title="SJR Best Quartile", categoryorder='category descending')
fig.update_layout(margin={"l": 120})
fig.write_image('qualityassessment/sstotal_vs_sjrquartile.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

fig = px.box(x=df_journals['SRTOTAL'], y=df_journals['SJCR Quartile'], points='all', category_orders={'SJCR Quartile': ['Q1', 'Q2', 'Q3', 'Q4', 'Not assigned']})
fig.update_xaxes(range=[0, 1], title="Quality of Reporting")
fig.update_yaxes(title="SJR Best Quartile", categoryorder='category descending')
fig.update_layout(margin={"l": 120})
fig.write_image('qualityassessment/srtotal_vs_sjrquartile.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

In [24]:
print(df_quality['Publication Source'])

0        Workshop
1      Conference
2         Journal
3      Conference
4      Conference
          ...    
144       Journal
145       Journal
146    Conference
147       Journal
148       Journal
Name: Publication Source, Length: 149, dtype: object


## Individual Feature Analysis

### Boxplots of SS00 and SR00

In [25]:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Box(x=df_quality['SRTOTAL'], name='SR00'), row=1, col=1)
fig.add_trace(go.Box(x=df_quality['SSTOTAL'], name='SS00'), row=1, col=1)
fig.update_xaxes(range=[0, 1], title='Score')
fig.update_yaxes(title="Feature ID")
fig.update_layout(showlegend=False)
fig.write_image('analysis/individual_ss00_sr00.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

### Stacked charts SS01 to SS10

In [26]:
def get_feature_counts(dataframe: pd.DataFrame, feature_name: str, feature_id: str) -> pd.DataFrame:
    counts = dataframe.value_counts(subset=[feature_name], dropna=False)
    row_yes = pd.DataFrame({'Feature ID': feature_id, 'Response': 'Yes', 'Count': counts.get('YES', 0)}, pd.Index([feature_id], name='Feature ID'))
    row_no = pd.DataFrame({'Feature ID': feature_id, 'Response': 'No', 'Count': counts.get('NO', 0)}, pd.Index([feature_id], name='Feature ID'))
    row_nan = pd.DataFrame({'Feature ID': feature_id, 'Response': 'N/A', 'Count': counts.get(np.nan, 0)}, pd.Index([feature_id], name='Feature ID'))
    return pd.concat([row_yes, row_no, row_nan])


In [27]:
df_ss_counts = pd.DataFrame([], columns=['Feature ID', 'Response', 'Count'])
df_ss_counts.set_index(['Feature ID'], inplace=True)

df_ss_counts = pd.concat([df_ss_counts,
    get_feature_counts(df_quality, 'SS1', 'SS01'),
    get_feature_counts(df_quality, 'SS2', 'SS02'),
    get_feature_counts(df_quality, 'SS3', 'SS03'),
    get_feature_counts(df_quality, 'SS4', 'SS04'),
    get_feature_counts(df_quality, 'SS5', 'SS05'),
    get_feature_counts(df_quality, 'SS6', 'SS06'),
    get_feature_counts(df_quality, 'SS7', 'SS07'),
    get_feature_counts(df_quality, 'SS8', 'SS08'),
    get_feature_counts(df_quality, 'SS9', 'SS09'),
    get_feature_counts(df_quality, 'SS10', 'SS10')])

df_ss_counts

Unnamed: 0_level_0,Response,Count,Feature ID
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SS01,Yes,132,SS01
SS01,No,17,SS01
SS01,,0,SS01
SS02,Yes,117,SS02
SS02,No,15,SS02
SS02,,17,SS02
SS03,Yes,70,SS03
SS03,No,79,SS03
SS03,,0,SS03
SS04,Yes,76,SS04


In [28]:
fig = px.bar(df_ss_counts, x='Feature ID', y='Count', color='Response', text='Count')
fig.write_image('analysis/individual_ss_counts.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

In [29]:
df_sr_counts = pd.DataFrame([], columns=['Feature ID', 'Response', 'Count'])
df_sr_counts.set_index(['Feature ID'], inplace=True)

df_sr_counts = pd.concat([df_sr_counts,
    get_feature_counts(df_quality, 'SR1', 'SR01'),
    get_feature_counts(df_quality, 'SR2', 'SR02'),
    get_feature_counts(df_quality, 'SR3', 'SR03'),
    get_feature_counts(df_quality, 'SR4', 'SR04'),
    get_feature_counts(df_quality, 'SR5', 'SR05'),
    get_feature_counts(df_quality, 'SR6', 'SR06'),
    get_feature_counts(df_quality, 'SR7', 'SR07'),
    get_feature_counts(df_quality, 'SR8', 'SR08'),
    get_feature_counts(df_quality, 'SR9', 'SR09'),
    get_feature_counts(df_quality, 'SR10', 'SR10'),
    get_feature_counts(df_quality, 'SR11', 'SR11'),
    get_feature_counts(df_quality, 'SR12', 'SR12'),
    get_feature_counts(df_quality, 'SR13', 'SR13'),
    get_feature_counts(df_quality, 'SR14', 'SR14'),
    get_feature_counts(df_quality, 'SR15', 'SR15'),
    get_feature_counts(df_quality, 'SR16', 'SR16')])

df_sr_counts

Unnamed: 0_level_0,Response,Count,Feature ID
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SR01,Yes,93,SR01
SR01,No,56,SR01
SR01,,0,SR01
SR02,Yes,147,SR02
SR02,No,2,SR02
SR02,,0,SR02
SR03,Yes,131,SR03
SR03,No,18,SR03
SR03,,0,SR03
SR04,Yes,28,SR04


In [30]:
fig = px.bar(df_sr_counts, x='Feature ID', y='Count', color='Response', text='Count')
fig.write_image('analysis/individual_sr_counts.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

In [31]:
counts = df_quality.value_counts(subset=['Type of Approach'], dropna=False)
row_yes = pd.DataFrame({'Response': 'Classical', 'Count': counts.get('Classical', 0)}, pd.Index(['D010'], name='Feature ID'))
row_no = pd.DataFrame({'Response': 'Quantum', 'Count': counts.get('Quantum', 0)}, pd.Index(['D010'], name='Feature ID'))
row_nan = pd.DataFrame({'Response': 'Hybrid', 'Count': counts.get('Hybrid', 0)}, pd.Index(['D010'], name='Feature ID'))
df_d010_counts = pd.concat([row_yes, row_no, row_nan])
df_d010_counts

Unnamed: 0_level_0,Response,Count
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1
D010,Classical,120
D010,Quantum,0
D010,Hybrid,29


In [32]:
fig = px.bar(df_d010_counts, x='Count', color='Response', text='Count')
fig.write_image('analysis/individual_D010.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

In [33]:
counts = df_quality.value_counts(subset=['Does the study use a simulator or a real quantum computer?'], dropna=False)
row_yes = pd.DataFrame({'Response': 'Real Quantum Computer', 'Count': counts.get('Real Quantum Computer', 0)}, pd.Index(['D030'], name='Feature ID'))
row_no = pd.DataFrame({'Response': 'Simulator', 'Count': counts.get('Simulator', 0)}, pd.Index(['D030'], name='Feature ID'))
row_nan = pd.DataFrame({'Response': 'N/A', 'Count': counts.get(np.nan, 0)}, pd.Index(['D030'], name='Feature ID'))
df_d030_counts = pd.concat([row_yes, row_no, row_nan])
df_d030_counts

Unnamed: 0_level_0,Response,Count
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1
D030,Real Quantum Computer,24
D030,Simulator,1
D030,,124


In [34]:
fig = px.bar(df_d030_counts, x='Count', color='Response', text='Count')
fig.write_image('analysis/individual_D030.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

In [35]:
counts = df_quality.value_counts(subset=['Type of Quantum Computer'], dropna=False)
row_yes = pd.DataFrame({'Response': 'Gate-based', 'Count': counts.get('Gate-based', 0)}, pd.Index(['D040'], name='Feature ID'))
row_no = pd.DataFrame({'Response': 'Annealer', 'Count': counts.get('Annealer', 0)}, pd.Index(['D040'], name='Feature ID'))
row_nan = pd.DataFrame({'Response': 'N/A', 'Count': counts.get(np.nan, 0)}, pd.Index(['D040'], name='Feature ID'))
df_d040_counts = pd.concat([row_yes, row_no, row_nan])
df_d040_counts

Unnamed: 0_level_0,Response,Count
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1
D040,Gate-based,2
D040,Annealer,23
D040,,124


In [36]:
fig = px.bar(df_d040_counts, x='Count', color='Response', text='Count')
fig.write_image('analysis/individual_D040.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

In [37]:
counts = df_quality.value_counts(subset=['Publication Source'], dropna=False)
row_yes = pd.DataFrame({'Response': 'Journal', 'Count': counts.get('Journal', 0)}, pd.Index(['D070'], name='Feature ID'))
row_no = pd.DataFrame({'Response': 'Conference', 'Count': counts.get('Conference', 0)}, pd.Index(['D070'], name='Feature ID'))
row_nan = pd.DataFrame({'Response': 'Workshop', 'Count': counts.get('Workshop', 0)}, pd.Index(['D070'], name='Feature ID'))
df_d070_counts = pd.concat([row_yes, row_no, row_nan])
df_d070_counts

Unnamed: 0_level_0,Response,Count
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1
D070,Journal,111
D070,Conference,36
D070,Workshop,2


In [38]:
fig = px.bar(df_d070_counts, x='Count', color='Response', text='Count')
fig.write_image('analysis/individual_D070.pdf', width=WIDTH, height=QUARTER_HEIGHT)
fig.show()

In [39]:
df_d020_counts = df_quality.value_counts(subset=['Algorithm Name'], dropna=False).to_frame()
df_d020_counts.reset_index(inplace=True)
df_d020_counts

Unnamed: 0,Algorithm Name,0
0,Quantum Particle Swarm Optimization,64
1,Quadratic Unconstrained Binary Optimization,25
2,Quantum Genetic Algorithm,17
3,Quantum-inspired Evolutionary Algorithm,10
4,Quantum Bat Algorithm,5
5,Quantum Generative Training,3
6,Quantum-inspired Shuffled Frog Leaping Algorithm,2
7,Quantum Ant Colony Algorithm,2
8,Quantum-behaved Pigeon-inspired Optimization,2
9,Quantum Circuit,2


In [40]:
fig = px.bar(df_d020_counts, y='Algorithm Name', x=0, text=0)
fig.update_layout(margin={"l": 440})
fig.update_xaxes(title='#')
fig.write_image('analysis/individual_D020.pdf', width=WIDTH, height=HEIGHT)
fig.show()

In [41]:
df_d060_counts = df_quality.value_counts(subset=['Publication Year'], dropna=False).to_frame()
df_d060_counts.reset_index(inplace=True)
df_d060_counts['Publication Year'] = df_d060_counts['Publication Year'].astype(str)
df_d060_counts.sort_values(by=['Publication Year'], inplace=True)
df_d060_counts

Unnamed: 0,Publication Year,0
10,2011,3
7,2012,6
11,2013,3
6,2014,8
8,2015,6
3,2016,11
4,2017,11
5,2018,9
2,2019,20
0,2020,38


In [42]:
fig = px.bar(df_d060_counts, x='Publication Year', y=0, text=0)
fig.update_xaxes(dtick=1)
fig.update_yaxes(title='#')
fig.write_image('analysis/individual_D060.pdf', width=WIDTH, height=HALF_HEIGHT)
fig.show()

### ISIC Section

In [43]:
df_isic_dict = pd.read_csv('ISIC_Rev_4_english_structure.csv', sep=',', converters={
                           'Code': str, 'Description': str})
df_isic_dict.set_index('Code', inplace=True)

def isic_code_to_description(code, df_isic_dict):
    return df_isic_dict['Description'][code]

df_quality['ISIC Section Name'] = df_quality.apply(lambda row: isic_code_to_description(
    code=row['ISIC Section'], df_isic_dict=df_isic_dict), axis=1)
df_quality['ISIC Division Name'] = df_quality.apply(lambda row: isic_code_to_description(
    code=row['ISIC Division'], df_isic_dict=df_isic_dict), axis=1)
df_quality['ISIC Group Name'] = df_quality.apply(lambda row: isic_code_to_description(
    code=row['ISIC Group'], df_isic_dict=df_isic_dict), axis=1)
df_quality['ISIC Class Name'] = df_quality.apply(lambda row: isic_code_to_description(
    code=row['ISIC Class'], df_isic_dict=df_isic_dict), axis=1)


In [60]:
fig = px.treemap(df_quality, path=[px.Constant("All"), 'ISIC Section Name', 'ISIC Division Name', 'ISIC Group Name', 'ISIC Class Name'])
fig.write_image('analysis/individual_D050_to_D053.pdf', width=WIDTH, height=HEIGHT)
fig.show()

In [61]:
fig = px.treemap(df_quality, path=[px.Constant("All"), 'ISIC Section Name'])
fig.write_image('analysis/individual_D050.pdf', width=WIDTH, height=HEIGHT)
fig.show()