# Clinical Trial Completion Rates

This analysis is intended to assess studies posted on clinicaltrials.gov to determine success rates. In this case success of a trial is defined as completing as planned as determined by the status flag in the clinicaltrials.gov data set.

In [None]:
require 'dbi'
require 'daru'
require 'nyaplot'
require 'yaml'

# change this to point to your aact_analysis directory
database = YAML.load_file("/home/dan/workspace/aact_analysis/secrets.yml")['database']; nil

In [None]:
db = DBI.connect("dbi:Mysql:#{database['name']}:#{database['host']}", database['user'], database['password']); nil

## Extract Data

In [None]:
studies = Daru::DataFrame.from_sql(
  db, 
  "select 
    nct_id, 
    overall_status, 
    start_date, 
    completion_date, 
    completion_date_type, 
    phase
   from clinical_study
   where
    start_date is not null and completion_date is not null
    and start_date > '2008-09-01' 
    and completion_date < '2014-01-01' 
    and completion_date_type = 'Actual'
    and study_type != 'Expanded Access'")

studies.size

In [None]:
status_counts = studies.group_by(:overall_status).count[1..1]
status_counts.vectors = Daru::Index.new([:total_study_count])
status_counts[:status] = status_counts.index.to_a
status_counts[:percent_complete] = status_counts.map(:row) { |c| ((c[:total_study_count].to_f / studies.size.to_f) * 100.0).round(4) }
status_counts

In [None]:
status_counts.plot type: :bar, x: :status, y: :total_study_count do |plot, diagram|
  plot.x_label 'Terminal Status'
  plot.y_label '# of studies'
  plot.rotate_x_label -45
  plot.margin({top: 30, bottom: 140, left: 100, right: 30})
  diagram.color ['#84C76D']
end

## Completion Rates by Phase

In [None]:
# build a data frame where each row is a study phase
phase_completion_rates = Daru::DataFrame.new({ phase: studies[:phase].uniq.to_a })

# get the # of studies in each phase
studies_by_phase = studies.group_by(:phase).count[1..1]
studies_by_phase.vectors = Daru::Index.new([:total_study_count])
studies_by_phase[:phase] = studies_by_phase.index.to_a

# join total study counts to the phase data frame
phase_completion_rates = phase_completion_rates.join studies_by_phase, on: [:phase], how: :inner

# get the # of completed studies in each phase
completed_studies_by_phase = studies.filter_rows { |r| r[:overall_status] == 'Completed' }.group_by(:phase).count[1..1]
completed_studies_by_phase.vectors = Daru::Index.new([:completed_study_count])
completed_studies_by_phase[:phase] = studies_by_phase.index.to_a

# join the completed study counts to the phase data frame
phase_completion_rates = phase_completion_rates.join completed_studies_by_phase, on: [:phase], how: :inner

# calculate the % of studies for each phase which terminated with a status of 'Completed'
phase_completion_rates[:percent_complete] = phase_completion_rates.map(:row) { |r| ((r[:completed_study_count].to_f / r[:total_study_count].to_f) * 100.0).round(4) }
phase_completion_rates

In [None]:
phase_completion_rates.plot type: :bar, x: :phase, y: :percent_complete do |plot, diagram|
  plot.x_label 'Study Phase'
  plot.y_label '% of studies completed'
  plot.rotate_x_label -45
  plot.yrange [0,100]
  plot.margin({top: 30, bottom: 180, left: 80, right: 30})
  diagram.color ['#84C76D']
end