In [1]:
%matplotlib inline

In [2]:
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.ticker import MaxNLocator
from cycler import cycler

import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 30)
plt.style.use('../../publication-guidelines/Code/tempus.mplstyle')

**Read in PTS and archive**

In [30]:
df1 = pd.read_csv('../Data/PTS_01_18_22.tsv', sep='\t', header=1)
print(df1.shape)
df2 = pd.read_csv('../Data/PTS_archive_01_18_22.tsv', sep='\t', header=1)
print(df2.shape)
# df3 = pd.read_csv('../Data/PTS_archive_rejcted_01_06_21.tsv', sep='\t', header=1)
# print(df3.shape)
df = pd.concat([df1, df2])
print(df.shape)
df.reset_index(drop=True, inplace=True)

cols_to_fill = ['Date of Most Recent Resubmission (Yr, Quarter)',
                'Date Actual Submission Occurred (Yr, Quarter, Date)',
                'Year Quarter of Publication']
for col in cols_to_fill:
    df[col] = df[col].fillna('')
    
###Remove acknowledged work
# df = df[df['Authorship Lead'].isin(['Tempus 1st &/or Last Author', 'Collab 1st and Last Author (Tempus Authored)'])]
print(df.shape)
    
df.head()

(162, 37)
(185, 37)
(347, 37)
(347, 37)


Unnamed: 0,Publication Stage,Manuscript Pipeline,Disclosure Categories,Authorship Lead,Idea Origin,Product,Title/Topic/Link,Publication Type,Disclosure Type Detail,Study Type,Tempus Lead(s),SciComm Lead / Point Person,Tempus Authors,Last Author,External Collaborator (PI and Institution),...,"Date of Most Recent Resubmission (Yr, Quarter)",Date of Publication (Date),Year Quarter of Publication,Cancer Type,Disease Area,CLQ data usage,LENS Usage,IRB,IRB Protocol No.,IP/Legal,Main Org Work Derived From,DOI,PMID,Published Info Uploaded to Website,Notes
0,Stage 6: Publication Accepted,Topic / Project-specific,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),External,xT,Crizotinib in patients with tumors harboring A...,manuscript,research article,Clinical Utility,"Kevin White, David King, Naveen Malik, Bob Tel...",pp: Matthew Kase,"Kevin White, David King, Naveen Malik, Bob Tel...",Keith T. Flaherty,Keith T. Flaherty - NIH/NCI,...,,(2021 Q3 9/30 Accepted),,Multiple cancer types,Oncology,No CLQ data included,No LENS usage,No IRB Required,,Contacted,R&D,,,,Originally submitted to Annals of Oncology but...
1,Stage 6: Publication Accepted,Topic / Project-specific,2b. Tempus authored Posters,Tempus 1st &/or Last Author,Internal,AI - Other,Effects of Color Calibration via ICC Profile o...,abstract,poster,Research/Discovery Foundation,Kshitij Ingale,Adam Hockenberry,Kshitij Ingale; Rohan Joshi; Irvin Ho; Aicha B...,Martin Stumpe,,...,,(2021 Q4 Accepted),,Multiple cancer types,,CLQ data included,No LENS usage,No IRB Required,,Contacted,R&D,,,,
2,Stage 6: Publication Accepted,Topic / Project-specific,2b. Tempus authored Posters,Tempus 1st &/or Last Author,Internal,AI - Other,Deep Learning Identifies Microsatellite Instab...,abstract,poster,Research/Discovery Foundation,Rohan Joshi,Adam Hockenberry,Rohan Joshi; Andrew Kruger; Elle Moore; Ryan J...,Martin Stumpe,,...,,(2021 Q4 Accepted),,Multiple cancer types,Oncology,No CLQ data included,No LENS usage,No IRB Required,,Contacted,R&D,,,,
3,Stage 6: Publication Accepted,Topic / Project-specific,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),External,Algos - HRD,Harmonizing on the Definition of Homologous Re...,manuscript,research article,Research/Discovery Foundation,"Alain Silk, Jerod Parsons, and Nike Beaubier",pp: Alex Bobe,"Alain Silk, Jerod Parsons, and Nike Beaubier",Mark Stewart,Mark Stewart - Friends of Cancer Research,...,,(2021 Q4 Accepted),,,Oncology,No CLQ data included,No LENS usage,No IRB Required,,Contacted,Medical Science,,,,
4,Stage 5: In Revision,Topic / Project-specific,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),External,RNA,Redefining tumor classification and clinical s...,manuscript,research article,Research/Discovery Foundation,Ameen Salahudeen,pp: Matthew Kase,"Sonal Khare, Ameen Salahudeen, Tim Rand",Ashiq Masood,Ashiq Masood - Rush University Medical Center,...,,,,Colorectal cancer,Oncology,No CLQ data included,No LENS usage,No IRB Required,,Contacted,R&D,,,,"Rejected from: Cell Reports, Cancer Cell, and Gut"


In [31]:
df['Main Org Work Derived From'].value_counts()

R&D                         115
Medical Affairs              93
Medical Science              24
acknowlegement - unknown      9
Acknowlegement - Unknown      7
Therapies                     4
Pharma                        3
Sales                         2
Medical Affairs/Sales         1
Name: Main Org Work Derived From, dtype: int64

# Extract (re-)submission quarters

In [32]:
renaming_dict = {'Title/Topic/Link': 'Title',
                'Origin (internal vs. external)': 'Origin',
                'Key BU Stakeholder Publication Supports': 'Stakeholder'}


    
cols_to_keep = ['Title/Topic/Link',
              'Authorship Lead',
              'Publication Type',
              'Main Org Work Derived From',
              'Tempus Lead(s)']

tiny_df = df[cols_to_keep].copy()
tiny_df.rename(columns=renaming_dict, inplace=True)
print(tiny_df.shape)
tiny_df.head()

(347, 5)


Unnamed: 0,Title,Authorship Lead,Publication Type,Main Org Work Derived From,Tempus Lead(s)
0,Crizotinib in patients with tumors harboring A...,Collab 1st and Last Author (Tempus Authored),manuscript,R&D,"Kevin White, David King, Naveen Malik, Bob Tel..."
1,Effects of Color Calibration via ICC Profile o...,Tempus 1st &/or Last Author,abstract,R&D,Kshitij Ingale
2,Deep Learning Identifies Microsatellite Instab...,Tempus 1st &/or Last Author,abstract,R&D,Rohan Joshi
3,Harmonizing on the Definition of Homologous Re...,Collab 1st and Last Author (Tempus Authored),manuscript,Medical Science,"Alain Silk, Jerod Parsons, and Nike Beaubier"
4,Redefining tumor classification and clinical s...,Collab 1st and Last Author (Tempus Authored),manuscript,R&D,Ameen Salahudeen


In [33]:
###Chooose the quarters we want to care about
all_quarters = []
for year in ['2020', '2021']:
    for quarter in ['Q1', 'Q2', 'Q3', 'Q4']:
        all_quarters.append('{} {}'.format(year, quarter))

In [34]:
###Make some new clean columns
tiny_df['Submission_status'] = ''
tiny_df['Resubmission_status'] = ''
tiny_df['Publication_status'] = ''
tiny_df['Submission_quarter'] = ''
tiny_df['Resubmission_quarter'] = ''
tiny_df['Publication_quarter'] = ''

###Iterate throught the quarters that I care about
for quarter in all_quarters:
    print(quarter)
    ###################################################################
    ###Select the lines that were published this quarter
    temp_df = df[df['Year Quarter of Publication'].isnull()==False]
    temp_df = temp_df[(temp_df['Year Quarter of Publication'].str.contains(quarter))]
    indices = [i for i in temp_df.index if i in tiny_df.index]
    ###And set values in my tiny_df
    tiny_df.at[indices, 'Publication_status'] = 'Published'
    tiny_df.at[indices, 'Publication_quarter'] = quarter
    ###################################################################
    ###Now select the lines that were submitted (OR re-submitted) this quarter
    temp_df = df[(df['Date Actual Submission Occurred (Yr, Quarter, Date)'].str.contains(quarter))]
    indices = [i for i in temp_df.index if i in tiny_df.index]
    ###And update these values as well
    tiny_df.at[indices, 'Submission_status'] = 'Submitted'
    tiny_df.at[indices, 'Submission_quarter'] = quarter
    
    ###################################################################
    ###Now select the lines that were submitted (OR re-submitted) this quarter
    temp_df = df[(df['Date of Most Recent Resubmission (Yr, Quarter)'].str.contains(quarter))]
    indices = [i for i in temp_df.index if i in tiny_df.index]
    ###And update these values as well
    tiny_df.at[indices, 'Resubmission_status'] = 'Submitted'
    tiny_df.at[indices, 'Resubmission_quarter'] = quarter
            
    
# ###Limit my tiny dataframe to only care about projects that were either submitted or published
# tiny_df = tiny_df[(tiny_df['Submission_status']=='Submitted') |\
#                   (tiny_df['Resubmission_status']=='Submitted') |\
#                   (tiny_df['Publication_status']=='Published')]
# print(tiny_df.shape)
# tiny_df.head()

2020 Q1
2020 Q2
2020 Q3
2020 Q4
2021 Q1
2021 Q2
2021 Q3
2021 Q4


In [9]:
# quarters = ['2020 Q1', '2020 Q2', '2020 Q3', '2020 Q4']
# quarters = ['2021 Q1', '2021 Q2', '2021 Q3', '2021 Q4']
quarters = ['2021 Q4']

tempy = df[(tiny_df['Submission_quarter'].isin(quarters) | tiny_df['Resubmission_quarter'].isin(quarters)) &\
       tiny_df['Main Org Work Derived From'].str.contains('Medical')]
print(tempy.shape)
# tempy[tempy['Publication Type']=='abstract'].to_clipboard()
tempy[tempy['Publication Type']=='manuscript'].to_clipboard()
# tempy.head()

(11, 37)


In [10]:
tempy

Unnamed: 0,Publication Stage,Manuscript Pipeline,Disclosure Categories,Authorship Lead,Idea Origin,Product,Title/Topic/Link,Publication Type,Disclosure Type Detail,Study Type,Tempus Lead(s),SciComm Lead / Point Person,Tempus Authors,Last Author,External Collaborator (PI and Institution),...,"Date of Most Recent Resubmission (Yr, Quarter)",Date of Publication (Date),Year Quarter of Publication,Cancer Type,Disease Area,CLQ data usage,LENS Usage,IRB,IRB Protocol No.,IP/Legal,Main Org Work Derived From,DOI,PMID,Published Info Uploaded to Website,Notes
7,Stage 4: Publication Submitted,External Research: KOL Development,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),External,"xT, xF",Clinicopathological characteristics of KRAS G1...,manuscript,research article,Research/Discovery Foundation,Sherif El-Refai,Matthew Kase / Adam Hockenberry,"Sherif El-Refai, Prerna Jain, Denise Lau",Mohamed Salem,Mohamed Salem - LCI,...,2021 Q4 11/1/21,,,Multiple cancer types,Oncology,No CLQ data included,LENS cohort selection & partial analysis,No IRB Required,,Contacted,Medical Affairs,,,,
14,Stage 5: In Revision,External Research: Other,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),External,xT,NTRK gene fusions in thyroid cancer,manuscript,research article,Research/Discovery Foundation,Arya Ashok,pp: Vanessa Nepomuceno,Arya Ashok,Hyunseok Kang,Hyunseok Kang - UCSF,...,,,,Reproductive or hormone-related cancer,Oncology,No CLQ data included,LENS cohort selection & partial analysis,No IRB Required,,Not contacted,Medical Affairs,,,,previous rejected submissions to Cancer (Sept ...
17,Stage 4: Publication Submitted,External Research: KOL Development,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),External,RNA,"Clinical, genomic and transcriptomic data prof...",manuscript,research article,Research/Discovery Foundation,Sherif El-Refai / Denise Lau,Adam Hockenberry,"Prerna Jain, Denise Lau, Sherif El-Refai, Ada...",Mark Yarchoan,"Mark Yarchoan - Johns Hopkins University, Kabi...",...,,,,Biliary tract cancer,Oncology,No CLQ data included,No LENS usage,,,Contacted,Medical Affairs,,,,"Originally submitted to JITC, then Cancer Immu..."
23,Stage 6: Publication Accepted,External Research: Discovery Data-Sharing,2b. Tempus authored Posters,Collab 1st and Last Author (Tempus Authored),External,Algos - HRD,Interrelation of Functional Homologous Recombi...,abstract,poster,Research/Discovery Foundation,Sherif El Refai,Adam Hockenberry,"Benjamin Leibowitz, Elizabeth Mauer, Sherif El...",Jason Zhu,Levin Cancer Insititute,...,,(2021 Q4 Accepted),,Prostate cancer,Oncology,No CLQ data included,LENS cohort selection & partial analysis,No IRB Required,,Contacted,Medical Affairs,,,,
24,Stage 6: Publication Accepted,External Research: Discovery Data-Sharing,2b. Tempus authored Posters,Collab 1st and Last Author (Tempus Authored),External,xT,Actionable genomic landscapes from a real-worl...,abstract,poster,Research/Discovery Foundation,Greg Call,Adam Hockenberry,"Elizabeth Mauer, Greg Call",Solomon Woldu,UTSW,...,,(2021 Q4 Accepted),,Reproductive or hormone-related cancer,Oncology,No CLQ data included,LENS cohort selection & partial analysis,No IRB Required,,Contacted,Medical Affairs,,,,
25,Stage 6: Publication Accepted,External Research: Discovery Data-Sharing,2b. Tempus authored Posters,Collab 1st and Last Author (Tempus Authored),External,xT,Renal cell carcinoma (RCC) metastatic to pancr...,abstract,poster,Research/Discovery Foundation,Arya Ashok,Adam Hockenberry,"Elizabeth Mauer, Alex Barrett, Arya Ashok",Ali Khaki,Stanford,...,,(2021 Q4 Accepted),,Other,Oncology,No CLQ data included,LENS cohort selection & partial analysis,No IRB Required,,Contacted,Medical Affairs,,,,
26,Stage 4: Publication Submitted,External Research: Discovery Data-Sharing,,Collab 1st and Last Author (Tempus Authored),External,xT,Actionable genomic landscapes from a real-worl...,abstract,,Research/Discovery Foundation,Greg Call,Adam Hockenberry,"Elizabeth Mauer, Greg Call",Solomon Woldu,UTSW,...,,,,Reproductive or hormone-related cancer,Oncology,,,,,Contacted,Medical Affairs,,,,
34,Stage 4: Publication Submitted,External Research: KOL Development,,Collab 1st and Last Author (Tempus Authored),External,RNA,Comprehensive genomic and transcriptomic profi...,abstract,,Research/Discovery Foundation,"Josh SK Bell, Kristiyana Kaneva, Brooke Rhead,...",Matthew Kase,"Josh SK Bell, Kristiyana Kaneva, Brooke Rhead,...",Funmi Olopade,Olopade - U of C,...,,,,Breast cancer,Oncology,,,,,,Medical Affairs,,,,Control/tracking number: 22-A-5049-AACR
36,Stage 4: Publication Submitted,External Research: Other,,Collab 1st and Last Author (Tempus Authored),External,xT,"Multi-institutional study of the incidence, ge...",manuscript,research article,Clinical Utility,Kristiyana Kaneva,pp: Alex Bobe,Kristiyana Kaneva,Karen D. Wright,Karen D. Wright - Boston Children’s Cancer and...,...,,,,Other,Oncology,No CLQ data included,No LENS usage,Covered under external collaborator's IRB,,Contacted - No IP,Medical Affairs,,,,
37,Stage -3: Rejected,External Research: Other,1a. Authored peer-reviewed article in top tie...,Tempus 1st &/or Last Author,Internal,xT,Rate of germline findings detected during NGS ...,manuscript,research article,Research/Discovery Foundation,"Arya Ashok, Jessica Stoll",Vanessa Nepomuceno,"Arya Ashok, Jessica Stoll, Elizabeth Mauer",Funda Meric-Bernstam,Timothy Yap and Funda Meric-Bernstam - MD Ande...,...,,,,Multiple cancer types,Oncology,No CLQ data included,No LENS usage,Covered under IRB Protocol - Exemption,Pro00042950,Contacted,Medical Affairs,,,,ASCO '21 conversion 11/2021 - Reject from JAMA...


In [15]:
tempy = df[(df['Publication Stage'].isin(['Stage 1: Publication Idea Approved by Dept. Head', 'Stage 2: Publication in Development'])) &\
       (df['Main Org Work Derived From'].str.contains('Medical'))]

In [37]:
df[(df['Year Quarter of Publication']=='2021 Q4')& (df['Publication Type'] == 'manuscript')][[
    'Title/Topic/Link',
    'Disclosure Categories',
    'Authorship Lead',
    'Publication Type',
    'Year Quarter of Publication'
]]

Unnamed: 0,Title/Topic/Link,Disclosure Categories,Authorship Lead,Publication Type,Year Quarter of Publication
162,Validation of Genomic and Transcriptomic Model...,3. Pre-print,Tempus 1st &/or Last Author,manuscript,2021 Q4
174,Viral dynamics of SARS-CoV-2 variants in vacci...,1a. Authored peer-reviewed article in top tie...,Collab 1st and Last Author (Tempus Authored),manuscript,2021 Q4
176,Systematic Review and Meta-Analysis of L-Methy...,1b. Authored peer-reviewed article in mid tie...,Collab 1st and Last Author (Tempus Authored),manuscript,2021 Q4
187,Mouse-INtraDuctal (MIND): an in vivo model for...,3. Acknowledged/Tempus utilized,Tempus acknowledged or services used,manuscript,2021 Q4
188,Clinical outcomes and longitudinal circulating...,3. Acknowledged/Tempus utilized,Tempus acknowledged or services used,manuscript,2021 Q4
190,Review of the genomic landscape of common pedi...,1c. Authored peer-reviewed article in lower t...,Tempus Employee (unaffiliated work),manuscript,2021 Q4
191,rECHOmmend: an ECG-based machine-learning appr...,3. Pre-print,Collab 1st and Last Author (Tempus Authored),manuscript,2021 Q4


In [27]:
df[(df['Year Quarter of Publication']=='2021 Q4')& (df['Publication Type'] == 'abstract')][[
    'Title/Topic/Link',
    'Disclosure Categories',
    'Authorship Lead',
    'Publication Type',
    'Year Quarter of Publication',
    'Disclosure Type Detail',
    'Journal / Conference'
]].to_clipboard()

In [28]:
df.columns

Index(['Publication Stage', 'Manuscript Pipeline', 'Disclosure Categories',
       'Authorship Lead', 'Idea Origin', 'Product', 'Title/Topic/Link',
       'Publication Type', 'Disclosure Type Detail', 'Study Type',
       'Tempus Lead(s)', 'SciComm Lead / Point Person', 'Tempus Authors',
       'Last Author', 'External Collaborator (PI and Institution)',
       'Exec. Sponsor / Department Head',
       'Key BU Stakeholder Publication Supports', 'Business Purpose',
       'Journal / Conference', 'Journal Impact Factor (JIF)',
       'Date of Target Submission (Yr, Quarter, Date)',
       'Date Actual Submission Occurred (Yr, Quarter, Date)',
       'Date of Most Recent Resubmission (Yr, Quarter)',
       'Date of Publication (Date)', 'Year Quarter of Publication',
       'Cancer Type', 'Disease Area', 'CLQ data usage', 'LENS Usage', 'IRB',
       'IRB Protocol No.', 'IP/Legal', 'Main Org Work Derived From', 'DOI',
       'PMID', 'Published Info Uploaded to Website', 'Notes'],
      dtyp

In [36]:
df[(df['Date Actual Submission Occurred (Yr, Quarter, Date)'].str.contains('2021 Q4') )& (df['Publication Type'] == 'abstract')][[
    'Title/Topic/Link',
    'Disclosure Categories',
    'Authorship Lead',
    'Publication Type',
    'Year Quarter of Publication',
    'Disclosure Type Detail',
    'Journal / Conference'
]].to_clipboard()