In [85]:
import pandas as pd
import numpy as np

from termcolor import colored
import os

%matplotlib inline
%config matplotlib_inline.matplotlib_formats = 'retina'
%config InlineBackend.figure_format = 'retina'
import matplotlib
import matplotlib.pyplot as plt

year_min = 2017

In [86]:
litsearch_folder = '../../literature search'

In [87]:
!ls ../../literature\ search

20240812_EMBASE direct search.csv
20240812_Pubmed MeSH search.csv
20240812_Pubmed direct search.csv
20240812_litsearch_final.csv
Yu-Yun Shao_After 2017 70 selected references.xlsx
Yu-Yun Shao_After 2017 911 articles.xlsx
~$20240812_litsearch_final_inspected.xlsx
~$Yu-Yun Shao_After 2017 70 selected references.xlsx


# A. Literature screening

### 1. PubMed MeSH search

In [93]:
df_Pubmed_MeSH = pd.read_csv(os.path.join(litsearch_folder, '20240812_Pubmed MeSH search.csv'))\
    .loc[lambda d: d['Publication Year'] >= year_min]
print(colored(f"Number of references: {df_Pubmed_MeSH.shape[0]}", "red"))
df_Pubmed_MeSH = df_Pubmed_MeSH.reset_index().rename(columns = {'index': 'database_index'})
df_Pubmed_MeSH['database_index'] += 1
df_Pubmed_MeSH['database_index'] = [f'PubMed_MeSH:{database_index}' for database_index in df_Pubmed_MeSH.database_index]
np.transpose(df_Pubmed_MeSH[:2])

[31mNumber of references: 378[0m


Unnamed: 0,0,1
database_index,PubMed_MeSH:1,PubMed_MeSH:2
PMID,28052619,28052621
Title,The impact of direct antiviral agents on the d...,Hepatitis B virus long-term impact of antivira...
Authors,"Reig M, Boix L, Bruix J.","Grossi G, Viganò M, Loglio A, Lampertico P."
Citation,Liver Int. 2017 Jan;37 Suppl 1:136-139. doi: 1...,Liver Int. 2017 Jan;37 Suppl 1:45-51. doi: 10....
First Author,Reig M,Grossi G
Journal/Book,Liver Int,Liver Int
Publication Year,2017,2017
Create Date,2017/01/05,2017/01/05
PMCID,,


In [94]:
# we select a subset of columns with necessary information
df_Pubmed_MeSH = df_Pubmed_MeSH.loc[:, ['database_index', 'Title', 'Authors', 'Citation', 'Publication Year', 'Create Date', 'PMID', 'DOI']]
for col in ['Abstract', 'Clinical Trial Numbers']:
    df_Pubmed_MeSH[col] = pd.NA

In [95]:
# although some references do not have DOI, they have PMID
df_Pubmed_MeSH.loc[lambda d: pd.isnull(d.DOI)]

Unnamed: 0,database_index,Title,Authors,Citation,Publication Year,Create Date,PMID,DOI,Abstract,Clinical Trial Numbers
19,PubMed_MeSH:20,Transarterial Infusion Chemotherapy With and W...,"Zhao J, Li D, Shi Y, Shi F, Feng C, Li W, Tao ...",Ann Acad Med Singap. 2017 May;46(5):174-184.,2017,2017/06/11,28600578,,,
20,PubMed_MeSH:21,Excellent response to Anti-PD-1 therapy in a p...,"Mamdani H, Wu H, O'Neil BH, Sehdev A.",Discov Med. 2017 May;23(128):331-336.,2017,2017/07/18,28715649,,,
33,PubMed_MeSH:34,The treatment path in hepatocellular carcinoma,"El-Serag HB, Zhu AX, Johnson MS.",Clin Adv Hematol Oncol. 2017 Aug;15 Suppl 9(8)...,2017,2017/10/17,29036035,,,
50,PubMed_MeSH:51,Immunostimulatory monoclonal antibodies for he...,"Mazzolini GD, Malvicini M.",Medicina (B Aires). 2018;78(1):29-32.,2018,2018/01/24,29360073,,,
159,PubMed_MeSH:160,[Recent advances in managing hepatitis D],"Bonnemain CL, Cochand L, Portmann A, Béguelin C.",Rev Med Suisse. 2019 Oct 9;15(666):1802-1806.,2019,2019/10/11,31599521,,,
178,PubMed_MeSH:179,Patient Reported Outcomes Show Newer Drug Comb...,Wright KM.,Oncology (Williston Park). 2020 Feb 20;34(2):6...,2020,2020/07/10,32645197,,,
179,PubMed_MeSH:180,"Clinical significance of changes in AFP, HTATI...","Zhang P, Chen Z, Chen L, Zang H, Zhu B, Shao W...",J BUON. 2020 Mar-Apr;25(2):1206-1211.,2020,2020/06/12,32521927,,,
249,PubMed_MeSH:250,Which is the best combination of surgery for h...,"Wu QQ, Chen YX, Zheng TT, Dai MT, Ye T, Xu YH,...",J BUON. 2021 May-Jun;26(3):889-896.,2021,2021/07/16,34268950,,,
270,PubMed_MeSH:271,Comparing the efficacy and safety of local-reg...,"Wu QQ, Gao H, Du SS, Chen YX, Hu Y, Yang P, Ho...",J BUON. 2021 Sep-Oct;26(5):1950-1957.,2021,2021/11/11,34761604,,,


In [96]:
# here we create a special column composed of PMID and DOI that will be used for removing duplicates
df_Pubmed_MeSH['PMID_DOI'] = [f'{PMID}({DOI})' if not pd.isnull(DOI) else f'{PMID}(na)' for PMID, DOI in zip(df_Pubmed_MeSH.PMID, df_Pubmed_MeSH.DOI)]
np.transpose(df_Pubmed_MeSH[:2])

Unnamed: 0,0,1
database_index,PubMed_MeSH:1,PubMed_MeSH:2
Title,The impact of direct antiviral agents on the d...,Hepatitis B virus long-term impact of antivira...
Authors,"Reig M, Boix L, Bruix J.","Grossi G, Viganò M, Loglio A, Lampertico P."
Citation,Liver Int. 2017 Jan;37 Suppl 1:136-139. doi: 1...,Liver Int. 2017 Jan;37 Suppl 1:45-51. doi: 10....
Publication Year,2017,2017
Create Date,2017/01/05,2017/01/05
PMID,28052619,28052621
DOI,10.1111/liv.13321,10.1111/liv.13291
Abstract,,
Clinical Trial Numbers,,


### 2. PubMed direct search

In [97]:
df_Pubmed_direct = pd.read_csv(os.path.join(litsearch_folder, '20240812_Pubmed direct search.csv'))\
    .loc[lambda d: d['Publication Year'] >= year_min]
print(colored(f"Number of references: {df_Pubmed_direct.shape[0]}", "red"))
df_Pubmed_direct = df_Pubmed_direct.reset_index().rename(columns = {'index': 'database_index'})
df_Pubmed_direct['database_index'] += 1
df_Pubmed_direct['database_index'] = [f'PubMed_direct:{database_index}' for database_index in df_Pubmed_direct.database_index]
np.transpose(df_Pubmed_direct[:2])

[31mNumber of references: 1584[0m


Unnamed: 0,0,1
database_index,PubMed_direct:5,PubMed_direct:6
PMID,28072706,28072765
Title,"A randomized, multicenter, phase III study of ...",Phase II study of the PI3K inhibitor BKM120 in...
Authors,"Lee HS, Chung MJ, Park JY, Bang S, Park SW, Ki...","Heudel PE, Fabbro M, Roemer-Becuwe C, Kaminsky..."
Citation,Medicine (Baltimore). 2017 Jan;96(1):e5702. do...,Br J Cancer. 2017 Jan;116(3):303-309. doi: 10....
First Author,Lee HS,Heudel PE
Journal/Book,Medicine (Baltimore),Br J Cancer
Publication Year,2017,2017
Create Date,2017/01/11,2017/01/11
PMCID,PMC5228666,PMC5294485


In [98]:
# we select a subset of columns with necessary information
df_Pubmed_direct = df_Pubmed_direct.loc[:, ['database_index', 'Title', 'Authors', 'Citation', 'Publication Year', 'Create Date', 'PMID', 'DOI']]
for col in ['Abstract', 'Clinical Trial Numbers']:
    df_Pubmed_direct[col] = pd.NA

In [99]:
# although some references do not have DOI, they have PMID
df_Pubmed_direct.loc[lambda d: pd.isnull(d.DOI)]

Unnamed: 0,database_index,Title,Authors,Citation,Publication Year,Create Date,PMID,DOI,Abstract,Clinical Trial Numbers
41,PubMed_direct:46,Excellent response to Anti-PD-1 therapy in a p...,"Mamdani H, Wu H, O'Neil BH, Sehdev A.",Discov Med. 2017 May;23(128):331-336.,2017,2017/07/18,28715649,,,
80,PubMed_direct:85,The treatment path in hepatocellular carcinoma,"El-Serag HB, Zhu AX, Johnson MS.",Clin Adv Hematol Oncol. 2017 Aug;15 Suppl 9(8)...,2017,2017/10/17,29036035,,,
528,PubMed_direct:533,[Recent advances in managing hepatitis D],"Bonnemain CL, Cochand L, Portmann A, Béguelin C.",Rev Med Suisse. 2019 Oct 9;15(666):1802-1806.,2019,2019/10/11,31599521,,,
611,PubMed_direct:616,Patient Reported Outcomes Show Newer Drug Comb...,Wright KM.,Oncology (Williston Park). 2020 Feb 20;34(2):6...,2020,2020/07/10,32645197,,,
1346,PubMed_direct:1351,[Ramucirumab Treatment for Unresectable Advanc...,"Jin L, Tanizawa Y, Kuzuya T.",Gan To Kagaku Ryoho. 2023 Jul;50(7):799-807.,2023,2023/07/27,37496225,,,
1414,PubMed_direct:1419,Tremelimumab (Imjudo) in Combination With Durv...,,Ottawa (ON): Canadian Agency for Drugs and Tec...,2023,2023/11/01,38320074,,,
1544,PubMed_direct:1549,Epcoritamab (Epkinly): CADTH Reimbursement Rec...,,Ottawa (ON): Canadian Agency for Drugs and Tec...,2024,2024/06/01,39088680,,,


In [100]:
# here we create a special column composed of PMID and DOI that will be used for removing duplicates
df_Pubmed_direct['PMID_DOI'] = [f'{PMID}({DOI})' if not pd.isnull(DOI) else f'{PMID}(na)' for PMID, DOI in zip(df_Pubmed_direct.PMID, df_Pubmed_direct.DOI)]
np.transpose(df_Pubmed_direct[:2])

Unnamed: 0,0,1
database_index,PubMed_direct:5,PubMed_direct:6
Title,"A randomized, multicenter, phase III study of ...",Phase II study of the PI3K inhibitor BKM120 in...
Authors,"Lee HS, Chung MJ, Park JY, Bang S, Park SW, Ki...","Heudel PE, Fabbro M, Roemer-Becuwe C, Kaminsky..."
Citation,Medicine (Baltimore). 2017 Jan;96(1):e5702. do...,Br J Cancer. 2017 Jan;116(3):303-309. doi: 10....
Publication Year,2017,2017
Create Date,2017/01/11,2017/01/11
PMID,28072706,28072765
DOI,10.1097/MD.0000000000005702,10.1038/bjc.2016.430
Abstract,,
Clinical Trial Numbers,,


## 3. Embase direct search

In [101]:
df_Embase = pd.read_csv(os.path.join(litsearch_folder, '20240812_EMBASE direct search.csv'), na_values = "")\
    .loc[lambda d: d['Publication Year'] >= year_min]
print(colored(f"Number of references: {df_Embase.shape[0]}", "red"))
df_Embase = df_Embase.reset_index().rename(columns = {'index': 'database_index'}).rename(
    columns = {'Author Names': 'Authors', 'Source': 'Citation', 'Date of Publication': 'Create Date', 'Medline PMID': 'PMID'}
)
df_Embase['database_index'] += 1
df_Embase['database_index'] = [f'Embase:{database_index}' for database_index in df_Embase.database_index]
np.transpose(df_Embase[:2])

[31mNumber of references: 1584[0m


Unnamed: 0,0,1
database_index,Embase:1,Embase:2
Title,Entecavir versus tenofovir for prevention of h...,Macro CD5L(+) deteriorates CD8(+)T cells exhau...
Authors,"Pan L.-X., Wang Y.-Y., Li Z.-H., Luo J.-X., Wu...","Lu J.-C., Wu L.-L., Sun Y.-N., Huang X.-Y., Ga..."
Citation,Trials (2024) 25:1 Article Number: 25. Date of...,Nature Communications (2024) 15:1 Article Numb...
Publication Year,2024,2024
Volume,25,15
Issue,1,1
First Page,,
Last Page,,
Create Date,1 Dec 2024,1 Dec 2024


In [102]:
# we select a subset of columns with necessary information
df_Embase = df_Embase.loc[:, ['database_index', 'Title', 'Authors', 'Citation', 'Publication Year', 'Create Date', 'Abstract', 'Clinical Trial Numbers', 'PMID', 'DOI']]

In [103]:
# here we create a special column composed of PMID and DOI that will be used for removing duplicates
# if they both are absent we simply put the Embase record number
df_Embase['PMID_DOI'] = [f'{int(PMID)}({DOI})' if ((not pd.isnull(PMID))&(not pd.isnull(DOI))) else (f'{PMID}(na)' if (not pd.isnull(PMID)) else Embase_index) for PMID, DOI, Embase_index in zip(df_Embase['PMID'], df_Embase.DOI, df_Embase.database_index)]

In [104]:
np.transpose(df_Embase.loc[lambda d: ((pd.isnull(d.DOI))&(pd.isnull(d.PMID)))][:3])

Unnamed: 0,27,28,29
database_index,Embase:28,Embase:29,Embase:30
Title,"A phase 3, randomized study of adjuvant rilveg...",Phase 2/3 study of livmoniplimab in combinatio...,Phase 2 study of livmoniplimab in combination ...
Authors,"Fan J., Bekaii-Saab T.S., Aldrighetti L.A., Br...","Abou-Alfa G.K., Bouattour M., Cheng A.-L., Day...","Abou-Alfa G.K., Chiu C.-F., Piscaglia F., Sang..."
Citation,Journal of Clinical Oncology (2024) 42:16 Supp...,Journal of Clinical Oncology (2024) 42:16 Supp...,Journal of Clinical Oncology (2024) 42:16 Supp...
Publication Year,2024,2024,2024
Create Date,1 Jun 2024,1 Jun 2024,1 Jun 2024
Abstract,Background: Biliary tract cancer (BTC) is a ra...,Background: Most patients with hepatocellular ...,Background: Patients (pts) with hepatocellular...
Clinical Trial Numbers,,,
PMID,,,
DOI,,,


In [105]:
np.transpose(df_Pubmed_MeSH.loc[lambda d: pd.isnull(d.DOI)][:2])

Unnamed: 0,19,20
database_index,PubMed_MeSH:20,PubMed_MeSH:21
Title,Transarterial Infusion Chemotherapy With and W...,Excellent response to Anti-PD-1 therapy in a p...
Authors,"Zhao J, Li D, Shi Y, Shi F, Feng C, Li W, Tao ...","Mamdani H, Wu H, O'Neil BH, Sehdev A."
Citation,Ann Acad Med Singap. 2017 May;46(5):174-184.,Discov Med. 2017 May;23(128):331-336.
Publication Year,2017,2017
Create Date,2017/06/11,2017/07/18
PMID,28600578,28715649
DOI,,
Abstract,,
Clinical Trial Numbers,,


## B. Removing duplicates

In [106]:
# identifying duplicates
df_all_idxs =  pd.concat([df_Embase, df_Pubmed_MeSH, df_Pubmed_direct], ignore_index=True)
df_all_idxs

Unnamed: 0,database_index,Title,Authors,Citation,Publication Year,Create Date,Abstract,Clinical Trial Numbers,PMID,DOI,PMID_DOI
0,Embase:1,Entecavir versus tenofovir for prevention of h...,"Pan L.-X., Wang Y.-Y., Li Z.-H., Luo J.-X., Wu...",Trials (2024) 25:1 Article Number: 25. Date of...,2024,1 Dec 2024,Background: Entecavir and tenofovir disoproxil...,ClinicalTrials.gov (NCT02650271),38183137.0,10.1186/s13063-023-07742-x,38183137(10.1186/s13063-023-07742-x)
1,Embase:2,Macro CD5L(+) deteriorates CD8(+)T cells exhau...,"Lu J.-C., Wu L.-L., Sun Y.-N., Huang X.-Y., Ga...",Nature Communications (2024) 15:1 Article Numb...,2024,1 Dec 2024,Intratumoral immune status influences tumor th...,"ClinicalTrials.gov (NCT03951597, NCT05342194)",38245530.0,10.1038/s41467-024-44795-1,38245530(10.1038/s41467-024-44795-1)
2,Embase:3,Surrogacy of one-year survival for overall sur...,"Jin Y., Ren H., Yue Q., Wu W., Liu C., Guo Y.,...",BMC Cancer (2024) 24:1 Article Number: 258. Da...,2024,1 Dec 2024,Background: The increasing number of sequentia...,"ClinicalTrials.gov (NCT00105443, NCT00108953, ...",38395854.0,10.1186/s12885-024-12000-7,38395854(10.1186/s12885-024-12000-7)
3,Embase:4,S100A9(+)CD14(+) monocytes contribute to anti-...,"Tu X., Chen L., Zheng Y., Mu C., Zhang Z., Wan...",Journal of Experimental and Clinical Cancer Re...,2024,1 Dec 2024,Background: The paucity of reliable biomarkers...,"ClinicalTrials.gov (NCT02576509, NCT02989922, ...",38454445.0,10.1186/s13046-024-02985-1,38454445(10.1186/s13046-024-02985-1)
4,Embase:5,"Neoadjuvant therapy of sequential TACE, camrel...","Hao Y., Xie F., Zhou Y., Li C., Zhang X., Shen...",Trials (2024) 25:1 Article Number: 490. Date o...,2024,1 Dec 2024,Background: The high recurrence rate after liv...,,39030637.0,10.1186/s13063-024-08340-1,39030637(10.1186/s13063-024-08340-1)
...,...,...,...,...,...,...,...,...,...,...,...
3541,PubMed_direct:1584,Efficacy and safety of combining short-course ...,"Wang F, Lai C, Lv Y, Zhang F, Shi L, Wang Y, S...",Int J Surg. 2024 Aug 2. doi: 10.1097/JS9.00000...,2024,2024/08/02,,,39093871.0,10.1097/JS9.0000000000001960,39093871(10.1097/JS9.0000000000001960)
3542,PubMed_direct:1585,"Phase I studies of davoceticept (ALPN-202), a ...","Davar D, Cavalcante L, Lakhani N, Moser J, Mil...",J Immunother Cancer. 2024 Aug 3;12(8):e009474....,2024,2024/08/03,,,39097413.0,10.1136/jitc-2024-009474,39097413(10.1136/jitc-2024-009474)
3543,PubMed_direct:1586,Sunitinib for the treatment of patients with a...,"Nasca V, Prinzi N, Coppa J, Prisciandaro M, Ol...",Eur J Cancer. 2024 Aug 9;209:114276. doi: 10.1...,2024,2024/08/11,,,39128186.0,10.1016/j.ejca.2024.114276,39128186(10.1016/j.ejca.2024.114276)
3544,PubMed_direct:1587,Patient-Reported Outcomes From the Phase III H...,"Sangro B, Galle PR, Kelley RK, Charoentum C, D...",J Clin Oncol. 2024 Aug 10;42(23):2790-2799. do...,2024,2024/05/28,,,38805668.0,10.1200/JCO.23.01462,38805668(10.1200/JCO.23.01462)


In [107]:
df_all_unique = df_all_idxs.drop_duplicates(subset=['PMID_DOI']).copy()
print(colored(f"Number of references: {df_all_unique.shape[0]}", "red"))
df_all_unique['database'] = [idx.split(":")[0] for idx in df_all_unique.database_index]
np.transpose(df_all_unique[:2])

[31mNumber of references: 3193[0m


Unnamed: 0,0,1
database_index,Embase:1,Embase:2
Title,Entecavir versus tenofovir for prevention of h...,Macro CD5L(+) deteriorates CD8(+)T cells exhau...
Authors,"Pan L.-X., Wang Y.-Y., Li Z.-H., Luo J.-X., Wu...","Lu J.-C., Wu L.-L., Sun Y.-N., Huang X.-Y., Ga..."
Citation,Trials (2024) 25:1 Article Number: 25. Date of...,Nature Communications (2024) 15:1 Article Numb...
Publication Year,2024,2024
Create Date,1 Dec 2024,1 Dec 2024
Abstract,Background: Entecavir and tenofovir disoproxil...,Intratumoral immune status influences tumor th...
Clinical Trial Numbers,ClinicalTrials.gov (NCT02650271),"ClinicalTrials.gov (NCT03951597, NCT05342194)"
PMID,38183137.0,38245530.0
DOI,10.1186/s13063-023-07742-x,10.1038/s41467-024-44795-1


In [109]:
df_all_unique.sort_values(['Publication Year']).to_csv(os.path.join(litsearch_folder, '20240812_litsearch_final.csv'), index=False)

In [110]:
df_all_unique['Publication Year'].drop_duplicates().values

array([2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017])

In [28]:
# checking
np.transpose(df_all_unique.loc[lambda d: d.DOI=='10.1159/000481243'])

Unnamed: 0,2120
database_index,PubMed_MeSH:375
Title,Transarterial Chemoembolization in Combination...
Authors,"Kudo M, Arizumi T."
Citation,Oncology. 2017;93 Suppl 1:127-134. doi: 10.115...
Publication Year,2017
Create Date,2017/12/20
Abstract,
Clinical Trial Numbers,
PMID,29258086.0
DOI,10.1159/000481243


In [119]:
df_Embase.loc[lambda d: d.DOI=='10.1159/000481243']

Unnamed: 0,Embase_index,Title,Author Names,AiP/IP Entry Date,Source,Publication Year,Volume,Issue,Date of Publication,Abstract,Clinical Trial Numbers,Embase Accession ID,Medline PMID,PUI,DOI,Full Text Link,PMID_DOI


In [120]:
df_Embase.loc[lambda d: ~pd.isnull(d.Title)].loc[lambda d: d.Title.str.contains('Chemoembolization')]

Unnamed: 0,Embase_index,Title,Author Names,AiP/IP Entry Date,Source,Publication Year,Volume,Issue,Date of Publication,Abstract,Clinical Trial Numbers,Embase Accession ID,Medline PMID,PUI,DOI,Full Text Link,PMID_DOI
15,15,Adjuvant Transarterial Chemoembolization with ...,"Peng Z., Fan W., Liu Z., Xiao H., Wu J., Tang ...",2024-04-17,JAMA Surgery (2024) 159:6 (616-624). Date of P...,2024,159,6.0,12 Jun 2024,Importance: Certain patients with hepatocellul...,ClinicalTrials.gov (NCT04143191),,38568599.0,L2031605048,10.1001/jamasurg.2024.0506,http://dx.doi.org/10.1001/jamasurg.2024.0506,38568599(10.1001/jamasurg.2024.0506)
121,121,Stereotactic Body Radiation Therapy With or Wi...,"Dumago M.P., Agas R.A.F., Jainar C.J.E., Yap E...",2023-06-19,Journal of Gastrointestinal Cancer (2023) 54:4...,2023,54,4.0,1 Dec 2023,Purpose: This study aims to review the current...,"ClinicalTrials.gov (NCT01730937, NCT02323360, ...",,37306936.0,L2023788578,10.1007/s12029-023-00940-5,http://dx.doi.org/10.1007/s12029-023-00940-5,37306936(10.1007/s12029-023-00940-5)
256,256,Preliminary Report: International Randomized S...,"Hui C., Hall J., Kothary N., Sze D., Wang D., ...",,American Journal of Clinical Oncology: Cancer ...,2023,46,6.0,1 Jun 2023,Background: TACE is a widely used local therap...,,,,L641866294,10.1097/COC.0000000000001009,http://dx.doi.org/10.1097/COC.0000000000001009,256
343,343,Lenvatinib Combined with Transarterial Chemoem...,"Peng Z., Fan W., Zhu B., Wang G., Sun J., Xiao...",2023-01-11,Journal of Clinical Oncology (2023) 41:1 (117-...,2023,41,1.0,1 Jan 2023,PURPOSE Lenvatinib (LEN) is a first-line thera...,ClinicalTrials.gov (NCT03905967),,35921605.0,L2022080921,10.1200/JCO.22.00392,http://dx.doi.org/10.1200/JCO.22.00392,35921605(10.1200/JCO.22.00392)
366,366,Implications of the TACTICS Trial: Establishin...,Kudo M.,2022-12-20,Liver Cancer (2022) 11:6 (487-496). Date of Pu...,2022,11,6.0,6 Dec 2022,,ClinicalTrials.gov (NCT04712643),,,L2021762833,10.1159/000527404,http://dx.doi.org/10.1159/000527404,366
507,507,Randomized Phase 3 LEAP-012 Study: Transarteri...,"Llovet J.M., Vogel A., Madoff D.C., Finn R.S.,...",2022-02-15,CardioVascular and Interventional Radiology (2...,2022,45,4.0,1 Apr 2022,Purpose: Transarterial chemoembolization (TACE...,ClinicalTrials.gov (NCT04246177),,35119481.0,L2014944576,10.1007/s00270-021-03031-9,http://dx.doi.org/10.1007/s00270-021-03031-9,35119481(10.1007/s00270-021-03031-9)
525,525,Transarterial Chemoembolization for Hepatocell...,"Kotsifa E., Vergadis C., Vailas M., Machairas ...",2022-03-28,Journal of Personalized Medicine (2022) 12:3 A...,2022,12,3.0,1 Mar 2022,Hepatocellular carcinoma (HCC) is the most com...,,,,L2015947550,10.3390/jpm12030436,http://dx.doi.org/10.3390/jpm12030436,525
562,562,"Hepatic Arterial Infusion of Oxaliplatin, Fluo...","Li Q.-J., He M.-K., Chen H.-W., Fang W.-Q., Zh...",2022-11-03,Journal of Clinical Oncology (2022) 40:2 (159-...,2022,40,2.0,10 Jan 2022,"PURPOSE In a previous phase II trial, hepatic ...",ClinicalTrials.gov (NCT02973685),,34648352.0,L2020873817,10.1200/JCO.21.00608,http://dx.doi.org/10.1200/JCO.21.00608,34648352(10.1200/JCO.21.00608)
564,564,Stereotactic Body Radiation Therapy (SBRT) Ver...,"Verbus E.A., Rossi A.J., Teke M., Nugent F.W.,...",2021-06-17,Annals of Surgical Oncology (2022) 29:1 (33-34...,2022,29,1.0,1 Jan 2022,,ClinicalTrials.gov (NCT03960008),,34117572.0,L2012386769,10.1245/s10434-021-10278-6,http://dx.doi.org/10.1245/s10434-021-10278-6,34117572(10.1245/s10434-021-10278-6)
651,651,Long-term Outcomes of Transcatheter Arterial C...,"Zhang Y.J., Chen M.S., Chen Y., Yee Lau W., Pe...",2021-10-01,JAMA Network Open (2021) 4:9 (E2126992). Date ...,2021,4,9.0,27 Sep 2021,IMPORTANCE The long-term outcomes of transcath...,,,34570206.0,L636088806,10.1001/jamanetworkopen.2021.26992,http://dx.doi.org/10.1001/jamanetworkopen.2021...,34570206(10.1001/jamanetworkopen.2021.26992)


In [121]:
df_Embase.loc[lambda d: pd.isnull(d.Title)]

Unnamed: 0,Embase_index,Title,Author Names,AiP/IP Entry Date,Source,Publication Year,Volume,Issue,Date of Publication,Abstract,Clinical Trial Numbers,Embase Accession ID,Medline PMID,PUI,DOI,Full Text Link,PMID_DOI
441,441,,"Koch B.A., Tannapfel A., Uflacker L.",,Oncology Research and Treatment (2022) 45 Supp...,2022,45,,1 Sep 2022,Introduction: An assembly of inflammatory cell...,,,,L640261926,10.1159/000526456,http://dx.doi.org/10.1159/000526456,441
442,442,,"Trojan J., Waldschmidt D., Ehmer U., Goetze T....",,Oncology Research and Treatment (2022) 45 Supp...,2022,45,,1 Sep 2022,Background: The multi-targeted tyrosine kinase...,,,,L640262586,10.1159/000526456,http://dx.doi.org/10.1159/000526456,442


In [124]:
# removing duplicates
df_Pubmed.loc[lambda d: d.PMID_DOI.isin(df_Pubmed_notduplicated_IDs.PMID_DOI.values)].to_csv(os.path.join(litsearch_folder, '20240812_Pubmed search revised.csv'), index=False)

In [125]:
df_Pubmed.loc[lambda d: d.PMID_DOI.isin(df_Pubmed_notduplicated_IDs.PMID_DOI.values)].shape[0] + df_Embase.shape[0]

2029

In [126]:
df_Pubmed.loc[lambda d: d.PMID_DOI.isin(df_Pubmed_notduplicated_IDs.PMID_DOI.values)].shape[0] + df_Embase.shape[0] + 223 + 3

2255

In [127]:
df_Pubmed.shape[0] - df_Pubmed.loc[lambda d: d.PMID_DOI.isin(df_Pubmed_notduplicated_IDs.PMID_DOI.values)].shape[0]

63