In [1]:
import pandas as pd
import ast
df = pd.read_csv('data/pulled_ctg_studies.csv')
df.head()

Unnamed: 0,nct_id,org_name,org_class,brief_title,overall_status,why_stopped,collaborators,has_dmc,fda_regulated_drug,fda_regulated_device,phase,conditions,keywords
0,NCT05838625,"Click Therapeutics, Inc.",INDUSTRY,Study of Two Digital Therapeutics for the Trea...,ACTIVE_NOT_RECRUITING,,"[{'name': 'Boehringer Ingelheim', 'class': 'IN...",True,False,True,PHASE3,['Schizophrenia'],"['Prescription digital therapeutic (PDT)', 'So..."
1,NCT06791122,Boehringer Ingelheim,INDUSTRY,A Prospective Cohort Study to Assess Clinical ...,RECRUITING,,,False,False,True,,['Schizophrenia'],
2,NCT06230445,Sun Yat-sen University,OTHER,Effects of Digital Therapeutic in Whole Proces...,RECRUITING,,,False,False,False,PHASE3,['Application of Digital Therapeutic in Lung C...,"['Digital therapeutic', 'Whole process managem..."
3,NCT06004388,"Click Therapeutics, Inc.",INDUSTRY,Study of Two Digital Therapeutics for the Prev...,COMPLETED,,,True,False,True,PHASE3,"['Migraine', 'Episodic Migraine', 'Headache', ...","['Prescription digital therapeutic (PDT)', 'So..."
4,NCT05375500,Karuna Labs Inc.,INDUSTRY,Embodiment in Virtual Reality as a Telemedicin...,COMPLETED,,,False,False,True,,"['Chronic Pain', 'Low Back Pain', 'Shoulder Pa...",


### General Descriptive Statistics


In [72]:
# Organizations that have their digital therapeutics listed in ClinicalTrials.gov
df['org_name'].value_counts()

org_name
Click Therapeutics, Inc.                          10
Woebot Health                                      6
Better Therapeutics                                4
NightWare                                          4
Limbix Health, Inc.                                3
                                                  ..
University of Utah                                 1
Norwegian University of Science and Technology     1
Northwestern University                            1
Tesu Saglik Teknolojileri A.S.                     1
Fundació Sant Joan de Déu                          1
Name: count, Length: 87, dtype: int64

In [None]:
# Organizational classification 
df['org_class'].value_counts()

org_class
INDUSTRY    75
OTHER       53
Name: count, dtype: int64

In [68]:
# Proportion of status of digital therapeutics
df['overall_status'].value_counts()

overall_status
COMPLETED                  64
ACTIVE_NOT_RECRUITING      16
RECRUITING                 15
NOT_YET_RECRUITING         10
TERMINATED                 10
UNKNOWN                     9
WITHDRAWN                   3
ENROLLING_BY_INVITATION     1
Name: count, dtype: int64

In [124]:
# Reasons for termination of clinical trials
termination_reason = df['why_stopped'].value_counts()
termination_reason = termination_reason.reset_index()
termination_reason.columns = ['Reason', 'Count']

termination_reason

Unnamed: 0,Reason,Count
0,recruitment challenges,1
1,COVID and the Study was withdrawn (limited rec...,1
2,Terminated due to difficulty enrolling patient...,1
3,Enrollment too low,1
4,Internal company decision,1
5,Change of company's priority; learning from a ...,1
6,loss of funding,1
7,Low Enrollment,1
8,Data Safety Monitoring Board (DSMB) recommenda...,1
9,Sponsor/Investigator Decision,1


In [110]:
# Interesting to see the collaborators. Potentially useful.
collabs = dict()
for i in df['collaborators']:
    if pd.isna(i):
        continue   
    else:
        i = ast.literal_eval(i)
        for j in i:
            name = j['name']
            if name in collabs:
                collabs[name] += 1
            else:
                collabs[name] = 1
             
collabs = pd.DataFrame(list(collabs.items()), columns=['Collaborators', 'Count']).sort_values(by='Count', ascending=False)
collabs

Unnamed: 0,Collaborators,Count
13,National Institute on Drug Abuse (NIDA),11
2,National Institute of Mental Health (NIMH),5
0,Boehringer Ingelheim,4
70,Minneapolis Veterans Affairs Medical Center,2
58,RationalPsych,2
...,...,...
31,"Akili Interactive Labs, Inc.",1
30,Brown University,1
29,Iqvia Pty Ltd,1
28,LifeScan,1


In [140]:
# Number of studies with DMC 
# DMC = Data Monitoring Committee and is a group important in clinical research in psychiatry because they provide an added layer of protection for vulnerable populations 
# https://pmc.ncbi.nlm.nih.gov/articles/PMC4118004/
dmc = df['has_dmc'].value_counts()
dmc = dmc.reset_index()
dmc.columns = ['DMC', 'Count']
dmc['DMC'] = dmc['DMC'].replace({True: 'Yes', False: 'No'})
dmc

Unnamed: 0,DMC,Count
0,No,79
1,Yes,36


In [141]:
# Number of FDA Regulated Device 
fda_device = df['fda_regulated_device'].value_counts() 
fda_device = fda_device.reset_index()
fda_device.columns = ['FDA Regulated Device', 'Count']
fda_device['FDA Regulated Device'] = fda_device['FDA Regulated Device'].replace({True: 'Yes', False: 'No'})
fda_device



Unnamed: 0,FDA Regulated Device,Count
0,No,69
1,Yes,58


In [143]:
phases = df['phase'].value_counts(dropna=False)
phases = phases.reset_index()
phases.columns = ['Phase', 'Count']
phases['Phase'] = phases['Phase'].replace({None: 'N/A'})

phases

Unnamed: 0,Phase,Count
0,,114
1,PHASE3,9
2,PHASE2,4
3,PHASE1,1


In [None]:
# Conditions that Dtx are being tested for
conditions = dict()
for i in df['conditions']:
    if pd.isna(i):
        continue   
    else:
        i = ast.literal_eval(i)
        for j in i:
            if j in conditions:
                conditions[j] += 1
            else:
                conditions[j] = 1

conditions = pd.DataFrame(list(conditions.items()), columns=['Conditions', 'Count']).sort_values(by='Count', ascending=False)
conditions

Unnamed: 0,Conditions,Count
0,Schizophrenia,8
43,Depression,8
132,Nightmare,5
19,"Diabetes Mellitus, Type 2",4
20,Substance Use Disorders,4
...,...,...
73,Dysarthria as Late Effect of Stroke,1
72,Pain Perception,1
71,Anxiety Disorders,1
70,Postpartum Depression,1


In [148]:
# Conditions that Dtx are being tested for
keywords_dict = dict()
for i in df['keywords']:
    if pd.isna(i):
        continue   
    else:
        i = ast.literal_eval(i)
        for j in i:
            if j in conditions:
                keywords_dict[j] += 1
            else:
                keywords_dict[j] = 1

keywords_dict = pd.DataFrame(list(keywords_dict.items()), columns=['Conditions', 'Count']).sort_values(by='Count', ascending=False)

# for condition in keywords_dict['Conditions']:
#     print(condition)

Prescription digital therapeutic (PDT)
Therapeutic Education Application
Self Help
immunotherapy
checkpoint inhibitors
immune-related adverse events
Clinical Trials, Randomized
Prevention
eHealth
Anxiety Sensitivity
Adolescence
Behavioral Activation
antithrombotic treatment
digital therapeutic application assistance
Nightmare Disorder
Post-traumatic stress disorder
Veteran
Digital medicine
Nightmares
Overactive bladder
Urge incontinence
Pelvic floor disorders
Nervous System Diseases
Movement Disorders
Neurodegenerative Diseases
Brain Diseases
Central Nervous System Diseases
Nightmare
Problematic Screen Use
Cognitive Behavioral Therapy
Irritable Bowel Syndrome
reduced emotional expression
sildenafil
Schizoaffective disorder
Major Depressive Disorder
MDD
Augmented Reality
behavioral
critical care
behavior modification
Mindfulness
Virtual Reality
Opioid Use Disorder
iontophoresis
cutaneous microcirculation
Healthy volunteers
reduced social engagement
CBT
smartphone app
mental disorder
psy

In [2]:
for i in df['keywords']:
    print(i)

['Prescription digital therapeutic (PDT)', 'Software as a Medical Device (SaMD)', 'Smartphone app', 'Schizophrenia', 'Negative Symptoms']
nan
['Digital therapeutic', 'Whole process management', 'Lung cancer', 'Patient follow-up']
['Prescription digital therapeutic (PDT)', 'Software as a Medical Device (SaMD)', 'Smartphone app', 'CGRP', 'Calcitonin Gene-Related Peptide Inhibitor Therapy']
nan
nan
['PTSD']
['Prescription digital therapeutic (PDT)', 'Software as a Medical Device (SaMD)', 'Smartphone app']
['Exercise']
['postpartum pelvic muscle training', 'postpartum pelvic floor', 'postpartum pelvic floor physical therapy', 'postpartum urinary incontinence', 'postpartum bowel incontinence', 'postpartum pelvic floor recovery']
['digital', 'mobile health', 'autism', 'pediatrics']
nan
['depression', 'anxiety']
nan
nan
nan
nan
['chronic pain', 'primary care', 'digital therapeutic', 'mobile app', 'pain function', 'pain interference', 'psychotherapy']
['cancer', 'digital solution', 'lifestyle 