# Visualizations for the Literature Review

This notebook contains the code for all visualizations in one place

## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.max_rows', None)
# set style for charts
style = 'seaborn-v0_8'  # 'ggplot' and 'seaborn-v0_8-colorblind' are also good
plt.style.use(style)

## Loading Data

In [None]:
def create_sorted_elem_list(col_name: str, na_value: any, lower: bool = True):
  df[col_name].fillna(na_value, inplace=True)
  if lower:
    sort_func = (lambda x: sorted(set(str(y).lower().strip()
                                      for y in x.strip().split(","))))
  else:
    sort_func = lambda x: sorted(set(y.strip() for y in x.strip().split(",")))
  df[col_name] = df[col_name].apply(sort_func)

In [None]:
# raw data as is
df_raw = pd.read_csv('MA_Questionnare_Answers.csv')

# indexes to remove
rem_ids: list = [91, 92, 93, 96, 118]  # Google Sheets row number - 2

# removing those entries
df = (df_raw.drop(index = rem_ids, inplace=False)
            .drop(columns='Zeitstempel', inplace=False)
            .reset_index(drop=True, inplace=False))

# renaming columns for easy access
name_dict = {'What is the app called?': 'app_name',
             'What year was this paper published?': 'publication_year',
             'Is the software a generic tool or a domain specific app?': 'is_generic',
             'Which OSs does the application support?': 'supported_OSs',
             'Which devices does the application support?': 'supported_devices',
             'Is the code available somewhere publicly?': 'code_availability',
             'What data collection techniques are employed in-app?': 'data_collection_techniques',
             'What EMA sampling strategy is being employed?': 'sampling_strategies',
             'Which notifications does the software use?': 'supported_notification_types',
             'What domain is the intervention being done for?': 'application_domains',
             'What static intervention content is being delivered to the user?': 'static_intervention_contents',
             'What dynamic intervention content is being delivered to the user?': 'dynamic_intervention_contents',
             'What JITAI components are being used in the interventions?': 'jitai_components',
             'Which AI technologies are being used?': 'ai_technologies',
             'What is the title of this paper?': 'paper_title',
             'Did the intervention according to the authors have benefits that are statistically relevant?': 'is_intervention_benefitial',
             'How does the publication explain the code or system?': 'code_explanations',
             'Notes': 'notes'}
df.rename(columns=name_dict, inplace=True)

# converting boolean columns to actual booleans
df.is_generic = df.is_generic.apply(lambda x: not x == 'specific app').astype(bool)
df.is_intervention_benefitial = df.is_intervention_benefitial.apply(lambda x: x == 'yes').astype(bool)

# converting year numbers to integers
df.publication_year = df.publication_year.astype(int)

# converting all comma-separated entries of list based features to actual lists
lst_features = [{'col_name': 'supported_OSs', 'na_value': 'unknown', 'lower': True},
                {'col_name': 'app_name', 'na_value': 'not named', 'lower': False},
                {'col_name': 'supported_devices', 'na_value': 'Smartphone', 'lower': False},
                {'col_name': 'code_availability', 'na_value': 'private', 'lower': False},
                {'col_name': 'data_collection_techniques', 'na_value': 'questionnaire', 'lower': True},
                {'col_name': 'sampling_strategies', 'na_value': 'event-contingent', 'lower': True},
                {'col_name': 'supported_notification_types', 'na_value': 'no notifications', 'lower': True},
                {'col_name': 'jitai_components', 'na_value': 'no components', 'lower': True},
                {'col_name': 'code_explanations', 'na_value': 'no explanations', 'lower': True},
                {'col_name': 'notes', 'na_value': 'no comments', 'lower': False},
                {'col_name': 'application_domains', 'na_value': 'no specific domain', 'lower': True},
                {'col_name': 'static_intervention_contents', 'na_value': 'no static content', 'lower': True},
                {'col_name': 'ai_technologies', 'na_value': 'no AI methods', 'lower': True},
                {'col_name': 'dynamic_intervention_contents', 'na_value': 'no dynamic content', 'lower': True}]
for config in lst_features:
  create_sorted_elem_list(config['col_name'], config['na_value'], config['lower'])

In [None]:
def get_supported_X(df: pd.DataFrame, required_X: list,
                    col_name: str, only: bool = False):
  '''Given a column name and a list of required feature values this returns the
     indexes of all rows that contain all the required values in their feature
     value.'''
  def has_all_X(supported_X, required):
    required_lower = [x.lower() for x in required]
    return all(y in set(str(x).lower() for x in supported_X) for y in required_lower)
  def has_only_X(supported_X, required):
    has_all = has_all_X(supported_X, required)
    has_same_elements = len(supported_X) == len(required)
    return has_all and has_same_elements
  if not only:
    return df[df[col_name].apply(lambda x: has_all_X(x, required_X))].index.tolist()
  else:
    return df[df[col_name].apply(lambda x: has_only_X(x, required_X))].index.tolist()


def get_all_X(df: pd.DataFrame, col_name: str, with_usage: bool = False):
  '''Given a column name this returns a list of all present feature values.
     This should not be used for non list-based columns such as publication_year,
     is_generic, is_intervention_benefitial, paper_title, and notes.
     
     If with_usage is set to True this will return a dict with feature values as
     keys and the amount of occurences as value. Otherwise this will return a
     list of all feature values.'''
  if with_usage:
    values: dict[str, int] = {}
    for _, row in df.iterrows():
      for key in row[col_name]:
        if key not in values:
          values[key] = 1
        else:
          values[key] += 1
    return values
  else:
    values = []
    for _, row in df.iterrows():
      for v in row[col_name]:
        values.append(v)
    return list(set(values))
  

## Analysis

In [None]:
# getting data
app_usage: dict = get_all_X(df, 'app_name', True)
app_usage_mult: dict = {k: v for k, v in app_usage.items() if v > 1}

# preparing data
num_unnamed = app_usage['not named']  # all apps that are not named
# all apps that are named and used >1 times
num_mult_use = sum(app_usage_mult.values()) - num_unnamed
# all remaining apps are only used once (so far)
num_single_use = sum(app_usage.values()) - num_unnamed - num_mult_use

# plotting the pie chart
xs = [f'not named ({num_unnamed})',
      f'multiple uses ({num_mult_use})',
      f'single use ({num_single_use})']
ys = [num_unnamed, num_mult_use, num_single_use]
fig, ax = plt.subplots(nrows=1, ncols=1)
ax.pie(ys, labels=xs, autopct='%1.1f%%')
fig.tight_layout()

In [None]:
# plotting the apps used multiple times
if 'not named' in app_usage_mult:
  del app_usage_mult['not named']
app_usage_mult = dict(sorted(app_usage_mult.items(),
                             key=lambda x: x[1], reverse=True))
fig, ax = plt.subplots()
xs = list(app_usage_mult.keys())
ys = list(app_usage_mult.values())
ax.bar(xs, ys)
ax.set_ylabel('Count')
ax.set_yticks(range(0, 5))
ax.set_xlabel('Application Name')
ax.set_xticklabels(xs, rotation=30, ha='right')
ax.set_title('EMA/EMI Applications used Multiple Times')
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.05),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
# plotting code explanations
xs = ('no explanations', 'external links', 'diagrams', 'screenshots')
n_noexp_only = len(get_supported_X(df, ['no explanations'], 'code_explanations', only=True))
n_extlinks_only = len(get_supported_X(df, ['external links or papers'], 'code_explanations', only=True))
n_diagrams_only = len(get_supported_X(df, ['diagrams'], 'code_explanations', only=True))
n_screenshots_only = len(get_supported_X(df, ['screenshots'], 'code_explanations', only=True))
n_noexp_partly = len(get_supported_X(df, ['no explanations'], 'code_explanations', only=False)) - n_noexp_only
n_extlinks_partly = len(get_supported_X(df, ['external links or papers'], 'code_explanations', only=False)) - n_extlinks_only
n_diagrams_partly = len(get_supported_X(df, ['diagrams'], 'code_explanations', only=False)) - n_diagrams_only
n_screenshots_partly = len(get_supported_X(df, ['screenshots'], 'code_explanations', only=False)) - n_screenshots_only
ys_only = [n_noexp_only, n_extlinks_only, n_diagrams_only, n_screenshots_only]
ys_partly = [n_noexp_partly, n_extlinks_partly, n_diagrams_partly, n_screenshots_partly]
width = 0.5
fig, ax = plt.subplots()
r = range(len(xs))
p1 = ax.bar(r, ys_only, width=width, label='Only this method')
p2 = ax.bar(r, ys_partly, bottom=ys_only, width=width, label='At least one other method')
for i in range(len(r)):
    ax.text(r[i], ys_only[i] / 2, str(ys_only[i]), ha='center', va='center', color='white')
    ax.text(r[i], ys_only[i] + ys_partly[i] / 2, str(ys_partly[i]), ha='center', va='center', color='white')
    ax.text(r[i], ys_only[i] + ys_partly[i], str(ys_only[i] + ys_partly[i]), ha='center', va='bottom')
ax.set_xlabel('Explanations')
ax.set_ylabel('Count')
ax.set_xticks(r)
ax.set_xticklabels(xs)
ax.set_title('Code Explanation Methods')
ax.legend()

In [None]:
# printing out more details for the text
df.code_explanations.value_counts()

In [None]:
# getting data
n_private_code = len(get_supported_X(df, ['private'],
                                     'code_availability', False))
n_public_code = df.shape[0] - n_private_code

# plotting code availability
xs = [f'private ({n_private_code})', f'public ({n_public_code})']
ys = [n_private_code, n_public_code]
fig, ax = plt.subplots(nrows=1, ncols=1)
ax.pie(ys, labels=xs, autopct='%1.1f%%')
fig.tight_layout()

In [None]:
# printing more textual information
df.code_availability.value_counts()

In [None]:
# collecting domain data
xs = ['general mental health',
      'mood tracking',
      'addiction cessation',
      'physical activity',
      'dietary habits',
      'stress levels',
      'cardio-vascular problems',
      'sexual health and urology',
      'social interactions',
      'cancer',
      'diabetes',
      'orthopedic applications',
      'pregnancy and parenthood']
ys = []
for x in xs:
  ys.append(len(get_supported_X(df, [x], 'application_domains', False)))

# appending other data
y_other = 0
xs_other = ['fruit and vegetable consumption', 'empowering',
            'motivational deficit', 'tinnitus or hearing loss',
            'copd', 'cognitive‐affective therapy',
            'goal setting and motivation']
print('data from the other category')
for x in xs_other:
  n = len(get_supported_X(df, [x], 'application_domains', False))
  print('\t', x, n)
  y_other += n
xs.append('other')
ys.append(y_other)

# plotting the data
fig, ax = plt.subplots()
ax.bar(xs, ys)
ax.set_ylabel('Count')
ax.set_xlabel('Intervention Domain')
ax.set_xticklabels(xs, rotation=30, ha='right')
ax.set_title('Support for Techniques in Intervention Domains')
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.25),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
# printing of data for text
for x, y in zip(xs, ys):
  print(y, x)

In [None]:
# tests for text descriptions
df.loc[get_supported_X(df, ['physical activity'], 'application_domains')].application_domains.tolist()

In [None]:
# textual data for is_generic
df.is_generic.value_counts()

In [None]:
# data_collection_techniques
# sampling_strategies
df.sampling_strategies.value_counts()

In [None]:
# plotting code explanations
xs = ('diary', 'interview', 'questionnaire', 'microphone',
      'camera', 'other sensing')
n_diary_only = len(get_supported_X(df, ['diary'], 'data_collection_techniques', only=True))
n_interview_only = len(get_supported_X(df, ['interview'], 'data_collection_techniques', only=True))
n_questionnaire_only = len(get_supported_X(df, ['questionnaire'], 'data_collection_techniques', only=True))
n_microphone_only = len(get_supported_X(df, ['microphone'], 'data_collection_techniques', only=True))
n_camera_only = len(get_supported_X(df, ['camera'], 'data_collection_techniques', only=True))
sensing_only_ids = [44, 56, 79, 109, 111, 113]  # had to do this manually
n_sensing_only = len(sensing_only_ids)
n_diary_partly = len(get_supported_X(df, ['diary'], 'data_collection_techniques', only=False)) - n_diary_only
n_interview_partly = len(get_supported_X(df, ['interview'], 'data_collection_techniques', only=False)) - n_interview_only
n_questionnaire_partly = len(get_supported_X(df, ['questionnaire'], 'data_collection_techniques', only=False)) - n_questionnaire_only
n_microphone_partly = len(get_supported_X(df, ['microphone'], 'data_collection_techniques', only=False)) - n_microphone_only
n_camera_partly = len(get_supported_X(df, ['camera'], 'data_collection_techniques', only=False)) - n_camera_only
n_sensing_partly = len(get_supported_X(df, ['other sensing'], 'data_collection_techniques', only=False)) - n_sensing_only

ys_only = [n_diary_only, n_interview_only, n_questionnaire_only,
           n_microphone_only, n_camera_only, n_sensing_only]
ys_partly = [n_diary_partly, n_interview_partly, n_questionnaire_partly,
             n_microphone_partly, n_camera_partly, n_sensing_partly]

width = 0.5
fig, ax = plt.subplots()
r = range(len(xs))
p1 = ax.bar(r, ys_only, width=width, label='Only this method')
p2 = ax.bar(r, ys_partly, bottom=ys_only, width=width, label='At least one other method')
for i in range(len(r)):
    do_partly = False
    if ys_only[i] > 0:
      do_partly = True
      ax.text(r[i], ys_only[i] / 2, str(ys_only[i]), ha='center', va='center', color='white')
    if ys_partly[i] > 0 and do_partly:
      ax.text(r[i], ys_only[i] + ys_partly[i] / 2, str(ys_partly[i]), ha='center', va='center', color='white')
    ax.text(r[i], ys_only[i] + ys_partly[i], str(ys_only[i] + ys_partly[i]), ha='center', va='bottom')
ax.set_xlabel('Method')
ax.set_ylabel('Count')
ax.set_title('Data Input Methods used across all Publications')
ax.set_xticks(r)
ax.set_xticklabels(xs)
ax.legend()

In [None]:
# checking the sensing only techniques for text
df.loc[sensing_only_ids].data_collection_techniques.tolist()

In [None]:
# checking combinations for texts
df.loc[get_supported_X(df, ['other sensing', 'external hardware sensing'],
  'data_collection_techniques', False)].notes.tolist()

In [None]:
df.sampling_strategies.value_counts()

In [None]:
# plotting sampling strats
xs = ('Event-Contingent', 'Signal-Contingent', 'Continuous')
n_event_only = len(get_supported_X(df, ['event-contingent'], 'sampling_strategies', only=True))
n_signal_only = len(get_supported_X(df, ['signal-contingent'], 'sampling_strategies', only=True))
n_cont_only = len(get_supported_X(df, ['continuous'], 'sampling_strategies', only=True))
n_event_partly = len(get_supported_X(df, ['event-contingent'], 'sampling_strategies', only=False)) - n_event_only
n_signal_partly = len(get_supported_X(df, ['signal-contingent'], 'sampling_strategies', only=False)) - n_signal_only
n_cont_partly = len(get_supported_X(df, ['continuous'], 'sampling_strategies', only=False)) - n_cont_only

ys_only = [n_event_only, n_signal_only, n_cont_only]
ys_partly = [n_event_partly, n_signal_partly, n_cont_partly]

width = 0.5
fig, ax = plt.subplots()
r = range(len(xs))
p1 = ax.bar(r, ys_only, width=width, label='Only this strategy')
p2 = ax.bar(r, ys_partly, bottom=ys_only, width=width, label='At least one other strategy')
for i in range(len(r)):
    do_partly = False
    if ys_only[i] > 0:
      do_partly = True
      ax.text(r[i], ys_only[i] / 2, str(ys_only[i]), ha='center', va='center', color='white')
    if ys_partly[i] > 0 and do_partly:
      ax.text(r[i], ys_only[i] + ys_partly[i] / 2, str(ys_partly[i]), ha='center', va='center', color='white')
    ax.text(r[i], ys_only[i] + ys_partly[i], str(ys_only[i] + ys_partly[i]), ha='center', va='bottom')
ax.set_xlabel('Method')
ax.set_ylabel('Counts')
ax.set_title('Employed Sampling Strategies among all Publications')
ax.set_xticks(r)
ax.set_xticklabels(xs)
ax.legend()

In [None]:
# tests for texts
df.sampling_strategies.value_counts()

In [None]:
# supported notifications date
n_no_notifs = len(get_supported_X(df, ['no notifications'], 'supported_notification_types', True))
n_inapp_notifs = len(get_supported_X(df, ['in-app notifications'], 'supported_notification_types', False))
n_device_notifs = len(get_supported_X(df, ['device notifications'], 'supported_notification_types', False))
n_both_notifs = len(get_supported_X(df, ['device notifications', 'in-app notifications'], 'supported_notification_types', True))
# plotting
fix, axes = plt.subplots(ncols=3, figsize=((18, 4)))
m = df.shape[0]
k = m - n_no_notifs
axes[0].pie([k, n_no_notifs],
            labels=[f'supported ({k})', f'not supported ({n_no_notifs})'],
            autopct='%1.1f%%')
axes[0].set_title('Notifications')
k = m - n_inapp_notifs
axes[1].pie([n_inapp_notifs, k],
            labels=[f'supported ({n_inapp_notifs})', f'not supported ({k})'],
            autopct='%1.1f%%')
axes[1].set_title('In-App Notifications')
k = m - n_device_notifs
axes[2].pie([n_device_notifs, k],
            labels=[f'supported ({n_device_notifs})', f'not supported ({k})'],
            autopct='%1.1f%%')
axes[2].set_title('Device Notifications')
fig.tight_layout()

In [None]:
# for texts
df.loc[get_supported_X(df, ['device notifications'], 'supported_notification_types', False)].notes.tolist()

In [None]:
n_static_edu = len(get_supported_X(df, ['educational material'],
                                   'static_intervention_contents', False))
n_static_exe = len(get_supported_X(df, ['exercises'],
                                   'static_intervention_contents', False))
n_static_rem = len(get_supported_X(df, ['reminders'],
                                   'static_intervention_contents', False))
n_static_aud = len(get_supported_X(df, ['listening to audio'],
                                   'static_intervention_contents', False))
n_static_vid = len(get_supported_X(df, ['watching video'],
                                   'static_intervention_contents', False))
n_static_mot = len(get_supported_X(df, ['motivational messages'],
                                   'static_intervention_contents', False))
n_static_app = len(get_supported_X(df, ['links and apps'],
                                   'static_intervention_contents', False))
n_static_sup = len(get_supported_X(df, ['support calls'],
                                   'static_intervention_contents', False))
xs = ['Educational Material', 'Exercises', 'Reminders', 'Listening to Audio',
      'Watching Video', 'Motivational Messages', 'Links or Apps',
      'Support Calls']
ys = [n_static_edu, n_static_exe, n_static_rem, n_static_aud, n_static_vid,
      n_static_mot, n_static_app, n_static_sup]
# plotting a bar chart
fig, ax = plt.subplots()
ax.bar(xs, ys)
ax.set_ylabel('Count')
ax.set_xlabel('Content Type')
ax.set_xticklabels(xs, rotation=30, ha='right')
ax.set_title('Use of Static Intervention Content Types in Literature')
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.25),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
# for text
df.loc[get_supported_X(df, ['links and apps'],
                       'static_intervention_contents', False)].static_intervention_contents.tolist()

In [None]:
n_dynamic_pmf = len(get_supported_X(df, ['personalized messages/feedback'],
                                   'dynamic_intervention_contents', False))
n_dynamic_cxn = len(get_supported_X(df, ['context-aware prompts/notifications'],
                                   'dynamic_intervention_contents', False))
n_dynamic_ale = len(get_supported_X(df, ['alerting others'],
                                   'dynamic_intervention_contents', False))
n_dynamic_coa = len(get_supported_X(df, ['coaching'],
                                   'dynamic_intervention_contents', False))

xs = ['Personalized Messages/Feedback', 'Context-Aware Prompts/Notifications',
      'Alerting Others', 'Coaching']
ys = [n_dynamic_pmf, n_dynamic_cxn, n_dynamic_ale, n_dynamic_coa]
# plotting a bar chart
fig, ax = plt.subplots()
ax.bar(xs, ys)
ax.set_ylabel('Count')
ax.set_xlabel('Content Type')
ax.set_xticklabels(xs, rotation=30, ha='right')
ax.set_title('Usage of Dynamic Intervention Content Types in Literature')
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.25),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
print(ys)

In [None]:
# for text
df.loc[get_supported_X(df, ['coaching'],
                       'dynamic_intervention_contents', False)].static_intervention_contents.tolist()

In [None]:
n_nojitai = len(get_supported_X(df, ['no components'],
                                'jitai_components', True))
n_jitaitriggers = len(get_supported_X(df, ['triggers'],
                                      'jitai_components', True))
n_jitaiadaptoble = len(get_supported_X(df, ['adaptible intervention content'],
                                       'jitai_components', True))
n_jitaiboth = len(get_supported_X(df, ['adaptible intervention content', 'triggers'],
                                  'jitai_components', True))

xs = ['No JITAI Components', 'Triggers', 'Adaptable Content', 'Both']
ys = [n_nojitai, n_jitaitriggers, n_jitaiadaptoble, n_jitaiboth]
# plotting a bar chart
fig, ax = plt.subplots()
ax.bar(xs, ys)
ax.set_ylabel('Count')
ax.set_xlabel('Component')
ax.set_xticklabels(xs, rotation=30, ha='right')
ax.set_title('JITAI Components supported across all Publications')
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.25),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
# for text
df.loc[get_supported_X(df, ['adaptible intervention content', 'triggers'],
                       'jitai_components', True)].notes.to_list()

In [None]:
# viz of this is rather pointless given the eligibility criteria
df.supported_devices.value_counts()

In [None]:
n_android = len(get_supported_X(df, ['android'], 'supported_OSs', True))
n_ios = len(get_supported_X(df, ['ios'], 'supported_OSs', True))
n_xplatform = len(get_supported_X(df, ['cross-platform'], 'supported_OSs', False))
n_unknwon = len(get_supported_X(df, ['unknown'], 'supported_OSs', True))

xs = ['Android Only', 'iOS Only', 'Cross-Platform', 'Unknown']
ys = [n_android, n_ios, n_xplatform, n_unknwon]
# plotting a bar chart
fig, ax = plt.subplots()
ax.bar(xs, ys)
ax.set_ylabel('Count')
ax.set_xlabel('OS')
ax.set_xticklabels(xs, rotation=30, ha='right')
ax.set_title('Supported Operating Systems in EMA/EMI Literature')
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.25),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
# for text
print('uses Android', len(get_supported_X(df, ['Android'], 'supported_OSs', False)) + 2)
print('uses iOS', len(get_supported_X(df, ['iOS'], 'supported_OSs', False)) + 2)

In [None]:
min_year = min(df.publication_year.value_counts().keys())
max_year = max(df.publication_year.value_counts().keys())
xs = list(range(min_year, max_year+1))
ys = []
for i in xs:
  ys.append(df[df.publication_year == i].shape[0])
min_y = min(ys)
max_y = max(ys)

fig, ax = plt.subplots()
ax.bar(xs, ys)
ax.set_ylabel('Count')
# ax.set_yticks(range(min_y, max_y))
ax.set_xlabel('Year')
ax.set_title('Number of new EMA/EMI Publications over the Years')
ax.set_xticks(range(min_year, max_year+1))
for p in ax.patches:
  ax.annotate(str(int(p.get_height())),
              (p.get_x() + p.get_width() / 2.0, p.get_height() - 0.25),
              ha='center', va='center',
              xytext=(0, 10), textcoords='offset points')
fig.tight_layout()

In [None]:
# devices for text
df.supported_devices.value_counts()

In [None]:
# ai_technologies for text
df.ai_technologies.value_counts()

# len(get_supported_X(df, ['predict potential triggers'], 'ai_technologies', False))