# Read libraries

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from copy import copy, deepcopy
from pathlib import Path
from sys import path

path.append( str(Path.cwd().parent) )

In [2]:
import json
import re

import matplotlib.pyplot as plt
import pandas as pd

from collections import Counter
from string import punctuation, whitespace

from Project_libraries.pubmed import ( concatenate_lines, 
                                       extract_publication_date, 
                                       get_article_data )

In [3]:
csv_file = Path.cwd() / 'csv-skinwoundh-set.csv'
abstracts_file = Path.cwd() / 'abstract-skinwoundh-set.txt'

# Import CSV file

In [4]:
df = pd.read_csv(csv_file)
df

Unnamed: 0,PMID,Title,Authors,Citation,First Author,Journal/Book,Publication Year,Create Date,PMCID,NIHMS ID,DOI
0,35187180,Autophagy and skin wound healing,"Ren H, Zhao F, Zhang Q, Huang X, Wang Z.",Burns Trauma. 2022 Feb 16;10:tkac003. doi: 10....,Ren H,Burns Trauma,2022,2022/02/21,PMC8847901,,10.1093/burnst/tkac003
1,30938269,Regenerative Scar-Free Skin Wound Healing,"Monavarian M, Kader S, Moeinzadeh S, Jabbari E.",Tissue Eng Part B Rev. 2019 Aug;25(4):294-311....,Monavarian M,Tissue Eng Part B Rev,2019,2019/04/03,PMC6686695,,10.1089/ten.TEB.2018.0350
2,36230913,Macrophage-Mediated Inflammation in Skin Wound...,"Hassanshahi A, Moradzad M, Ghalamkari S, Fadae...",Cells. 2022 Sep 21;11(19):2953. doi: 10.3390/c...,Hassanshahi A,Cells,2022,2022/10/14,PMC9564023,,10.3390/cells11192953
3,33945225,Accelerated Skin Wound Healing by Electrical S...,"Luo R, Dai J, Zhang J, Li Z.",Adv Healthc Mater. 2021 Aug;10(16):e2100557. d...,Luo R,Adv Healthc Mater,2021,2021/05/04,,,10.1002/adhm.202100557
4,27974711,Skin Wound Healing: An Update on the Current K...,"Sorg H, Tilkorn DJ, Hager S, Hauser J, Mirasts...",Eur Surg Res. 2017;58(1-2):81-94. doi: 10.1159...,Sorg H,Eur Surg Res,2017,2016/12/16,,,10.1159/000454919
...,...,...,...,...,...,...,...,...,...,...,...
2308,19143471,Stress-mediated increases in systemic and loca...,"Sivamani RK, Pullar CE, Manabat-Hidalgo CG, Ro...",PLoS Med. 2009 Jan 13;6(1):e12. doi: 10.1371/j...,Sivamani RK,PLoS Med,2009,2009/01/16,PMC2621262,,10.1371/journal.pmed.1000012
2309,31654655,Orphan nuclear receptor TR3/Nur77 biologics in...,"Chen C, Li Y, Hou S, Bourbon PM, Qin L, Zhao K...",Microvasc Res. 2020 Mar;128:103934. doi: 10.10...,Chen C,Microvasc Res,2020,2019/10/27,,,10.1016/j.mvr.2019.103934
2310,30930165,DLL4 and Jagged1 are angiogenic targets of orp...,"Peng J, Zhao S, Li Y, Niu G, Chen C, Ye T, Zha...",Microvasc Res. 2019 Jul;124:67-75. doi: 10.101...,Peng J,Microvasc Res,2019,2019/04/02,PMC6474785,NIHMS1526146,10.1016/j.mvr.2019.03.006
2311,35581508,Milk fat-globule epidermal growth factor 8: A ...,"Xiao Y, Tang Y, Shu X, Zhu J, Wu X, Zhang Y.",Mol Biol Rep. 2022 Sep;49(9):8883-8893. doi: 1...,Xiao Y,Mol Biol Rep,2022,2022/05/17,,,10.1007/s11033-022-07365-6


In [5]:
n_papers = len(df)
n_journals = len(set(df['Journal/Book']))

print(f"There are {n_papers} published in {n_journals} unique journals.")

There are 2313 published in 716 unique journals.


In [6]:
journals = Counter(df['Journal/Book'])

In [7]:
journals.most_common(20)

[('Int J Mol Sci', 63),
 ('J Invest Dermatol', 62),
 ('Wound Repair Regen', 60),
 ('PLoS One', 43),
 ('Int J Biol Macromol', 37),
 ('Sci Rep', 33),
 ('Stem Cell Res Ther', 31),
 ('Exp Dermatol', 30),
 ('Adv Wound Care (New Rochelle)', 27),
 ('Front Bioeng Biotechnol', 25),
 ('ACS Appl Mater Interfaces', 24),
 ('Mater Sci Eng C Mater Biol Appl', 20),
 ('Molecules', 19),
 ('Am J Pathol', 19),
 ('Burns Trauma', 18),
 ('FASEB J', 18),
 ('Acta Biomater', 18),
 ('J Dermatol Sci', 17),
 ('Biomed Mater', 17),
 ('Carbohydr Polym', 17)]

# Import abstract file

In [8]:
with open(abstracts_file, 'r', encoding = 'utf-8') as f_abs:
    data = f_abs.readlines()

# Identify starting line of each article
#
pattern = '\d{1,4}[.] '

articles_start = []
k = 0
while True:
    
    # Start of record  
    #
    potential_match = re.match(pattern, data[k])
    if potential_match:
        potential_index = int( data[k][potential_match.start(): 
                                       potential_match.end()-2] )
        
        if potential_index - 1 == len(articles_start):
            articles_start.append( k )
        
    k += 1
    if k == len(data):
        break

print(f"There are {len(articles_start)} articles in file." )

# Need to add last line of file so all articles are checked later
articles_start.append( len(data) )

There are 2313 articles in file.


In [9]:
articles = []
retracted_articles = []

for i in range(0, len(articles_start) - 1):
#     print(articles_start[i],articles_start[i+1])
    info = copy( data[articles_start[i]:articles_start[i+1]])
    flag, article = get_article_data(info)
    
    if flag:
        retracted_articles.append( article )
    
    articles.append( article )
    

-----> 49. RETRACTED ARTICLE

-----> 563. RETRACTED ARTICLE

-----> 794. RETRACTED ARTICLE

-----> 1548. RETRACTED ARTICLE

-----> 1723. RETRACTED ARTICLE

-----> 2016. RETRACTED ARTICLE

-----> 2041. RETRACTED ARTICLE



## Manual corrections

In [None]:
i = 183 # type value
info = copy(data[articles_start[i]:
                 articles_start[i+1]])

for line in info:
    print(line.strip())
print()

# for line in concatenate_lines(info):
#     print(line.strip())
#     print()
    
print(get_article_data(info))

In [10]:
i = 402
print(articles[i], '\n')
articles[i]['abstract'] = 'A correction to this article has been published and is linked from the HTML and PDF versions of this paper. The error has not been fixed in the paper.'
articles[i]['other_ids'] = 'DOI: 10.1038/s41598-018-33558-w PMCID: PMC6198006 PMID: 30349056'

print(articles[i], '\n------')

{'retraction': None, 'journal': 'Sci Rep', 'year': 2018, 'date': ' Oct 23', 'volume': '8(1)', 'pages': '15906', 'doi': '10.1038/s41598-018-33558-w', 'title': 'Author Correction: Cathelicidin-OA1, a novel antioxidant peptide identified from an amphibian, accelerates skin wound healing.', 'authors': 'Cao X(1), Wang Y(2), Wu C(3), Li X(4), Fu Z(3), Yang M(3), Bian W(3), Wang S(2), Song Y(3), Tang J(5), Yang X(6)', 'affiliations': 'Author information: (1)Department of Pathology, Faculty of Basic Medical Science, Kunming Medical University, Kunming, 650500, Yunnan, China. (2)Key Laboratory of Chemistry in Ethnic Medicine Resource, State Ethnic Affairs Commission & Ministry of Education, School of Ethnomedicine and Ethnopharmacy, Yunnan Minzu University, Kunming, 650500, Yunnan, China. (3)Department of Anatomy and Histology & Embryology, Faculty of Basic Medical Science, Kunming Medical University, Kunming, 650500, Yunnan, China. (4)Department of Biochemistry and Molecular Biology, Faculty o

In [11]:
i = 599
print(articles[i], '\n')

articles[i]['abstract'] = 'Wound healing is a complex biological process [...].'
articles[i]['other_ids'] = 'DOI: 10.3390/pharmaceutics14061291 PMCID: PMC9231209 PMID: 35745862'

print(articles[i], '\n------')


{'retraction': None, 'journal': 'Pharmaceutics', 'year': 2022, 'date': ' Jun 17', 'volume': '14(6)', 'pages': '1291', 'doi': '10.3390/pharmaceutics14061291', 'title': 'Biomaterials in Skin Wound Healing and Tissue Regenerations-An Overview.', 'authors': 'Konop M(1)', 'affiliations': 'Author information: (1)Department of Experimental Physiology and Pathophysiology, Laboratory of Centre for Preclinical Research, Medical University of Warsaw, 02-091 Warszawa, Poland', 'erratum': None, 'comment': None, 'abstract': None, 'copyright': None, 'other_ids': 'Wound healing is a complex biological process [...].'} 

{'retraction': None, 'journal': 'Pharmaceutics', 'year': 2022, 'date': ' Jun 17', 'volume': '14(6)', 'pages': '1291', 'doi': '10.3390/pharmaceutics14061291', 'title': 'Biomaterials in Skin Wound Healing and Tissue Regenerations-An Overview.', 'authors': 'Konop M(1)', 'affiliations': 'Author information: (1)Department of Experimental Physiology and Pathophysiology, Laboratory of Centre 

In [12]:
i = 606
print(articles[i], '\n')
articles[i]['abstract'] = 'The authors are sorry to report that some of the HPLC data reported in their recently published paper [...]..'
articles[i]['other_ids'] = 'DOI: 10.3390/ijms20174178 PMCID: PMC6747146 PMID: 31454990'

print(articles[i], '\n------')

{'retraction': None, 'journal': 'Int J Mol Sci', 'year': 2019, 'date': ' Aug 26', 'volume': '20(17)', 'pages': '4178', 'doi': '10.3390/ijms20174178', 'title': 'Correction: Huang, H.-M., et al. Effects of Sapindus mukorossi Seed Oil on Skin Wound Healing: In Vivo and In Vitro Testing. Int. J. Mol. Sci. 2019, 20, 2579.', 'authors': 'Chen CC(1)(2), Nien CJ(3), Chen LG(4), Huang KY(3), Chang WJ(5), Huang HM(6)(7)', 'affiliations': 'Author information: (1)Emergency Department, Mackay Momorial Hospital, Taipei 110, Taiwan. (2)Medical School, Mackay Medical College, New Taipei City 252, Taiwan. (3)Graduate Institute of Biomedical Optomechatronics, College of Biomedical Engineering, Taipei Medical University, Taipei 110, Taiwan. (4)Department of Microbiology, Immunology and Biopharmaceuticals, College of Life Sciences, National Chiayi University, Chiayi 600, Taiwan. (5)School of Dentistry, College of Oral Medicine, Taipei Medical University, Taipei 110, Taiwan. (6)Graduate Institute of Biomedi

In [13]:
i = 702
print(articles[i], '\n')
articles[i]['abstract'] = articles[i]['comment']
articles[i]['comment'] = None

print(articles[i], '\n------')

{'retraction': None, 'journal': 'Trends Mol Med', 'year': 2020, 'date': ' Dec', 'volume': '26(12)', 'pages': '1101-1106', 'doi': '10.1016/j.molmed.2020.07.008', 'title': 'Fibroblast Heterogeneity in Wound Healing: Hurdles to Clinical Translation.', 'authors': 'Mascharak S(1), desJardins-Park HE(1), Longaker MT(2)', 'affiliations': 'Author information: (1)Department of Surgery, Division of Plastic and Reconstructive Surgery, Stanford University School of Medicine, Stanford, CA 94305, USA; Institute for Stem Cell Biology and Regenerative Medicine, Stanford University School of Medicine, Stanford, CA 94305, USA. (2)Department of Surgery, Division of Plastic and Reconstructive Surgery, Stanford University School of Medicine, Stanford, CA 94305, USA; Institute for Stem Cell Biology and Regenerative Medicine, Stanford University School of Medicine, Stanford, CA 94305, USA. Electronic address: longaker@stanford.edu', 'erratum': None, 'comment': 'Recent work has revealed that fibroblasts are r

In [14]:
i = 2121
print(articles[i], '\n')
articles[i]['other_ids'] = 'DOI: 10.1631/jzus.B2200447 PMCID: PMC10264175 PMID: 37309042 [Indexed for MEDLINE]'

print(articles[i], '\n------')

{'retraction': None, 'journal': 'J Zhejiang Univ Sci B', 'year': 2023, 'date': ' Jun 15', 'volume': '24(6)', 'pages': '510-523', 'doi': '10.1631/jzus.B2200447', 'title': 'Nanosilver alleviates foreign body reaction and facilitates wound repair by regulating macrophage polarization.', 'authors': 'You C(1), Zhu Z(1), Wang S(1)(2), Wang X(1), Han C(3), Shao H(4)', 'affiliations': "Author information: (1)Department of Burns & Wound Care Center, the Second Affiliated Hospital, Zhejiang University School of Medicine, Hangzhou 310009, China. (2)Department of Burns, the First People's Hospital of Wenling, Wenling 317500, China. (3)Department of Burns & Wound Care Center, the Second Affiliated Hospital, Zhejiang University School of Medicine, Hangzhou 310009, China. 2504131@zju.edu.cn, zrssk@zju.edu.cn. (4)Department of Burns & Wound Care Center, the Second Affiliated Hospital, Zhejiang University School of Medicine, Hangzhou 310009, China. 2504131@zju.edu.cn", 'erratum': None, 'comment': None,

# Verify records

In [15]:
for i, article in enumerate( articles ):
    if ( article['other_ids'][:3] == 'DOI' 
         or article['other_ids'][:4] == 'PMID' 
         or article['other_ids'][:5] == 'PMCID' ):
        continue
        
    print(i, article['other_ids'], '\n')

# Classify articles

In [16]:
to_remove = []
for i, article in enumerate( articles ):
    if article['retraction']:
        print( f"{i} -- {article['journal']}. {article['year']}; {article['volume']} " 
               f"\n\t{article['title']}\n\t{article['retraction']}\n")
        if article['retraction'][:19] == 'Retraction Notice: ':
            to_remove.append(i)
            print(to_remove)

print(to_remove)

48 -- Evid Based Complement Alternat Med. 2022; 2022 
	Porcine Fibrin Sealant Promotes Skin Wound Healing in Rats.
	Retraction in Evid Based Complement Alternat Med. 2023 Oct 4;2023:9757140.

71 -- Evid Based Complement Alternat Med. 2023; 2023 
	Retracted: Porcine Fibrin Sealant Promotes Skin Wound Healing in Rats.
	Retraction Notice: Retraction of Evid Based Complement Alternat Med. 2022 Jun 23;2022:5063625. [This retracts the article DOI: 10.1155/2022/5063625.].

[71]
113 -- Biomater Sci. 2023; 11(3) 
	Retraction: Sericin hydrogels promote skin wound healing with effective regeneration of hair follicles and sebaceous glands after complete loss of epidermis and dermis.
	Retraction Notice: Retraction of Biomater Sci. 2018 Nov 1;6(11):2859-2870.

[71, 113]
131 -- J Healthc Eng. 2023; 2023 
	Retracted: Efficacy of Human Adipose Derived Mesenchymal Stem Cells in Promoting Skin Wound Healing.
	Retraction Notice: Retraction of J Healthc Eng. 2022 Mar 24;2022:6590025. [This retracts the art

In [17]:
for i in reversed(to_remove):
    articles.pop(i)

print(len(articles))

2308


In [18]:
to_remove = []
for i, article in enumerate( articles ):
    if article['erratum']:
        print( f"{i} -- {article['journal']}. {article['year']}; {article['volume']} " 
               f"\n\t{article['title']}\n\t{article['erratum']}\n")
        if article['erratum'][:12] == 'Erratum for ':
            to_remove.append(i)
            print(to_remove)

print(to_remove)

105 -- Front Immunol. 2022; 13 
	Single-Cell RNA-seq Analysis Reveals Cellular Functional Heterogeneity in Dermis Between Fibrotic and Regenerative Wound Healing Fates.
	Erratum in Front Immunol. 2023 Mar 31;14:1175360.

119 -- Br J Pharmacol. 2020; 177(14) 
	Low-concentration DMSO accelerates skin wound healing by Akt/mTOR-mediated cell proliferation and migration in diabetic mice.
	Erratum in Br J Pharmacol. 2020 Dec;177(23):5433.

187 -- Int J Mol Sci. 2019; 20(10) 
	Effects of Sapindus mukorossi Seed Oil on Skin Wound Healing: In Vivo and in Vitro Testing.
	Erratum in Int J Mol Sci. 2019 Aug 26;20(17):

208 -- Int J Nanomedicine. 2022; 17 
	Self-Assembled Nano-Peptide Hydrogels with Human Umbilical Cord Mesenchymal Stem Cell Spheroids Accelerate Diabetic Skin Wound Healing by Inhibiting Inflammation and Promoting Angiogenesis.
	Erratum in Int J Nanomedicine. 2022 Jul 11;17:3057-3058.

293 -- Wound Repair Regen. 2006; 14(1) 
	Genetic analysis of skin wound healing and scarring in a 

In [19]:
for i in reversed(to_remove):
    articles.pop(i)

print(len(articles))

2300


## Dealing with *Comments* is complicated

Some *Comments* are perspectives aiming to publicize the target paper (like News & Views in Nature). These are published in the same issue by different authors.

We remove the actual *Comment* (if available) from the set of publications to analyze. We adjust `comment` key of target paper to convey this positive information. 

Some *Comments* appear to be summaries of the target papers. They are published in a different journal by a subset of the original authors.

We remove the actual *Comment* (if available) from the set of publications to analyze. We adjust `comment` key of target paper to convey this information. 

Some *Comments* appear to be actual criticisms of the target papers. They are published in the same journal as the target bu later than the target and are authored by different researchers.

We remove the actual *Comment* (if available) from the set of publications to analyze. We adjust `comment` key of target paper to convey this negative information.  

In [20]:
to_remove = []
for i, article in enumerate( articles ):
    if article['comment']:
        print('\n', i, 'Focus: ', article['journal'], article['year'], 
              article['date'], article['volume'],  
              article['pages'], article['doi'])
        print('--', article['comment'])
        
        # Extract info about paper discussed in comment
        target = {}
        aux = article['comment'].split(';')[0]
        aux = aux.split('.')
        date_string =  aux[-1].strip()
        target['year'] = int( date_string[:4] )
        target['date'] = date_string[4:]
        target['journal'] = aux[-2].strip()
        
        aux = article['comment'].split(';')[1]
        aux = aux.split(':')
        target['volume'] = aux[0]
        target['pages'] = aux[1].split('.')[0]
        
        if len(aux) > 2:
            target['doi'] = aux[2].split()[0].strip('.')
        else:
            target['doi'] = None
          
        # Extract dates of publication
        date_focus = extract_publication_date(article)
        date_target = extract_publication_date(target)
        
        # Determine type of comment
        #
        # Same journal
        if ( article['journal'] == 
             target['journal'].lstrip('Coment').strip().lstrip('ion').strip() ):
            print('===>>', article['journal'])
            print('===>>', article['year'], target['year'])
            
            # Same year 
            if article['year'] == target['year']:
                print('===>>', article['volume'], target['volume'])
            
                # Same volume
                if target['volume'] and article['volume'] == target['volume']:
                    print('--->', article['date'], target['date'])
                    
                    # Same date
                    if article['date'] == target['date']:
                        print('--->', article['date'])

                        if '-' in article['pages']:
                            focus_pages = article['pages'].split('-')[0]
                            if focus_pages.isnumeric():
                                focus_pages = int(focus_pages)
                        else:
                            focus_pages = article['pages']
                            
                        if '-' in target['pages']:
                            target_pages = target['pages'].split('-')[0]
                            if target_pages.isnumeric():
                                target_pages = int(target_pages)
                        else:
                            target_pages = target['pages']
                            
                        if type(target_pages) != type(focus_pages):
                            target_pages = str(target_pages)
                            focus_pages = str(focus_pages)

                        print('--->', focus_pages, target_pages)
                        
                        # Same date, earlier pages
                        if focus_pages < target_pages:
                            to_remove.append(i)
                            print(f"---> Article {i} is a commentary and is to be removed")

                        # Same date, later pages
                        else:
                            article['comment'] = ('Received highlight article! ' 
                                                  + article['comment'])
                            print('---> Received highlight article\n')

                    # Same volume, later time
                    elif article['date'] > target['date']:
                        to_remove.append(i)
                        print(f"---> Article {i} is a comment and is to be removed")

                    # Same volume, earlier time
                    elif article['date'] < target['date']:
                        article['comment'] = 'Received comment! ' + article['comment']
                        print('---> Received comment\n')

                # Same year, volumes that exist, later volume
                elif target['volume'] and article['volume'] > target['volume']:
                    to_remove.append(i)
                    print(f"---> Article {i} is a comment and is to be removed")
                
                # Same year, volumes that exist, earlier volume
                elif target['volume'] and article['volume'] < target['volume']:
                    article['comment'] = 'Received comment! ' + article['comment']
                    print('---> Received comment\n')

                # Same journal, same year, missing volumes 
                else:
                    # Same journal, same year, missing volumes, same date
                    if article['date'] == target['date']:
                        print('--->', article['date'])

                        if '-' in article['pages']:
                            focus_pages = article['pages'].split('-')[0]
                            if focus_pages.isnumeric():
                                focus_pages = int(focus_pages)
                        if '-' in target['pages']:
                            target_pages = target['pages'].split('-')[0]
                            if target_pages.isnumeric():
                                target_pages = int(target_pages)

                        print('--->', focus_pages, target_pages)

                        # Same journal, same year, missing volumes, same date, earlier pages
                        if focus_pages < target_pages:
                            to_remove.append(i)
                            print(f"---> Article {i} is a commentary and is to be removed")

                        # Same journal, same year, missing volumes, same date, earlier pages
                        else:
                            article['comment'] = ('Received highlight article! ' 
                                                  + article['comment'])
                            print('---> Received highlight article\n')
        
                    # Same journal, same year, missing volumes, later time
                    elif article['date'] > target['date']:
                        to_remove.append(i)
                        print(f"---> Article {i} is a comment and is to be removed")

                    # Same journal, same year, missing volumes, earlier time
                    elif article['date'] < target['date']:
                        article['comment'] = 'Received comment! ' + article['comment']
                        print('---> Received comment\n')
        
            # Same journal, later year 
            elif article['year'] > target['year']:
                to_remove.append(i)
                print(f"---> Article {i} is a comment and is to be removed")

            # Same journal, earlier year
            elif article['year'] < target['year']:
                article['comment'] = 'Received comment! ' + article['comment']
                print('---> Received comment\n')


        # Different journals, later year
        elif article['year'] > target['year']:
            to_remove.append(i)
            print(f"---> Article {i} is a commentary and is to be removed")

        # Different journals, earlier year
        elif article['year'] < target['year']: 
            article['comment'] = 'Received commentary! ' + article['comment']
            print('---> Received commentary\n')

        # Different journals, same year
        else:
            if date_focus > date_target:
                to_remove.append(i)
                print(f"---> Article {i} is a commentary and is to be removed")
            
            elif date_focus < date_target:
                article['comment'] = 'Received commentary! ' + article['comment']
                print('---> Received commentary\n')
            else:
                print('FUCK!!!')

print(to_remove)


 126 Focus:  Chimerism 2012  Apr-Jun 3(2) 45-7 10.4161/chim.20739
-- Comment on Nassar D, Droitcourt C, Mathieu-d’Argent E, Kim MJ, Khosrotehrani K, Aractingi S. Fetal progenitor cells naturally transferred through pregnancy participate in inflammation and angiogenesis during wound healing. FASEB J. 2012;26:149–57. doi: 10.1096/fj.11-180695.
---> Article 126 is a commentary and is to be removed

 257 Focus:  Blood 2012  Jul 19 120(3) 499-500 10.1182/blood-2012-05-430660
-- Comment on Blood. 2012 Jul 19;120(3):613-25.
===>> Blood
===>> 2012 2012
===>> 120(3) 120(3)
--->  Jul 19  Jul 19
--->  Jul 19
---> 499 613
---> Article 257 is a commentary and is to be removed

 287 Focus:  Mil Med Res 2023  Nov 30 10(1) 60 10.1186/s40779-023-00498-0
-- Comment on Mil Med Res. 2023 Aug 17;10(1):36.
===>> Mil Med Res
===>> 2023 2023
===>> 10(1) 10(1)
--->  Nov 30  Aug 17
---> Article 287 is a comment and is to be removed

 298 Focus:  Aesthetic Plast Surg 2023  Jun 47(Suppl 1) 146-147 10.1007/s00266

In [21]:
for i in reversed(to_remove):
    articles.pop(i)

print(len(articles))

2288


## Create collections

In [22]:
pattern1 = 'review'
pattern2 = 'reviewed'


erratum_articles = []
comment_articles = []
review_articles = []
no_abstract_articles = []

for article in articles:
    if not article['retraction']:
        if article['erratum']:
            erratum_articles.append(article)
            continue
            
        if article['comment'] and 'Received comment! ' == article['comment'][:18]:
            comment_articles.append(article)
            continue

        if not article['abstract']:
            no_abstract_articles.append(article)
        else:
            if pattern1 in article['abstract'] or pattern2 in article['abstract']:
                review_articles.append(article)


In [23]:
print(len(articles))
print(len(retracted_articles))
print(len(erratum_articles))
print(len(comment_articles))
print(len(no_abstract_articles))
print(len(review_articles))

2288
12
41
5
14
279


# Save to file

In [24]:
with open(Path.cwd() / 'articles_clean.json', 'w', encoding = 'utf-8') as f_json:
    json.dump(articles, f_json)
    
print('Done saving file!')

Done saving file!
