In [None]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


In [None]:
from Bio import Entrez
import pandas as pd
import numpy as np
import random
random.seed(10)

In [None]:
#Using e-search API to find PubMed search results
def search(query):
    Entrez.email = 'emilyvaitsie@gmail.com'
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='100',
                            retmode='xml',
                            term=query)
    results = Entrez.read(handle)
    return results

In [None]:
#Performing the search with the Mesh terms of Alzheimer Disease
studies = search('bioplastics')
studiesIdList = studies['IdList']
len(studiesIdList)

100

In [None]:
#Reducing the number of papers to 20 for better computational performance
studiesIdList = random.sample(studiesIdList, 20)
len(studiesIdList)

20

In [None]:
#Using e-fetch to get details from the papers obtained from e-search
def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'emilyvaitsi@gmail.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

In [None]:
#Making a DF with article information

title_list= []
abstract_list=[]
journal_list = []
language_list =[]
pubdate_year_list = []
pubdate_month_list = []

studies = fetch_details(studiesIdList)

chunk_size = 100
for chunk_i in range(0, len(studiesIdList), chunk_size):
  chunk = studiesIdList[chunk_i:chunk_i + chunk_size]
  papers = fetch_details(chunk)
  for i, paper in enumerate (papers['PubmedArticle']):
    title_list.append(paper['MedlineCitation']['Article']['ArticleTitle'])
    try:
      abstract_list.append(paper['MedlineCitation']['Article']['Abstract']['AbstractText'][0])
    except:
      abstract_list.append('No Abstract')
    journal_list.append(paper['MedlineCitation']['Article']['Journal']['Title'])
    language_list.append(paper['MedlineCitation']['Article']['Language'][0])
    try:
      pubdate_year_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Year'])
    except:
      pubdate_year_list.append('No Data')
    try:
      pubdate_month_list.append(paper['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate']['Month'])
    except:
      pubdate_month_list.append('No Data')

df = pd.DataFrame(list(zip(
    title_list, abstract_list, journal_list, language_list, pubdate_year_list, pubdate_month_list
    )),
    columns=[
             'Title', 'Abstract', 'Journal', 'Language', 'Year','Month'
             ])
df.shape

(20, 6)

In [None]:
df.head(10)

Unnamed: 0,Title,Abstract,Journal,Language,Year,Month
0,Treatment of food waste contaminated by biopla...,The use of Black Soldier Fly (BSF) larvae in t...,Journal of environmental management,eng,2023,Mar
1,Waste to bioplastics: How close are we to sust...,Increased awareness of environmental sustainab...,"Waste management (New York, N.Y.)",eng,2021,Jan
2,Leads and hurdles to sustainable microbial bio...,"Indiscriminate usage, disposal and recalcitran...",Chemosphere,eng,2022,Oct
3,Editorial.,No Abstract,Biomacromolecules,eng,2017,Feb
4,The challenges of bioplastics in waste managem...,No Abstract,Waste management & research : the journal of t...,eng,2023,Aug
5,Advances in microbial exoenzymes bioengineerin...,Plastic pollution has become a major global co...,Chemosphere,eng,2024,May
6,Bacterial polyhydroxyalkanoates: Still fabulous?,Bacterial polyhydroxyalkanoates (PHA) are poly...,Microbiological research,eng,2016,Nov
7,From Soy Waste to Bioplastics: Industrial Proo...,The global plastic waste problem is pushing fo...,Biomacromolecules,eng,2024,Mar
8,Biopolymers for Enhanced Health Benefits.,The definition of the term biopolymer is often...,International journal of molecular sciences,eng,2023,Nov
9,Production of volatile fatty acids (VFAs) from...,The feasibility of producing volatile fatty ac...,Bioresource technology,eng,2022,Sep


In [None]:
#Standardizing months
df['Month'].replace('Jan', '01', inplace=True)
df['Month'].replace('Feb', '02', inplace=True)
df['Month'].replace('Mar', '03', inplace=True)
df['Month'].replace('Apr', '04', inplace=True)
df['Month'].replace('May', '05', inplace=True)
df['Month'].replace('Jun', '06', inplace=True)
df['Month'].replace('Jul', '07', inplace=True)
df['Month'].replace('Aug', '08', inplace=True)
df['Month'].replace('Sep', '09', inplace=True)
df['Month'].replace('Oct', '10', inplace=True)
df['Month'].replace('Nov', '11', inplace=True)
df['Month'].replace('Dec', '12', inplace=True)
df['Month'].replace('No Data', np.nan, inplace=True)

In [None]:
df.head(10)

Unnamed: 0,Title,Abstract,Journal,Language,Year,Month
0,Treatment of food waste contaminated by biopla...,The use of Black Soldier Fly (BSF) larvae in t...,Journal of environmental management,eng,2023,3
1,Waste to bioplastics: How close are we to sust...,Increased awareness of environmental sustainab...,"Waste management (New York, N.Y.)",eng,2021,1
2,Leads and hurdles to sustainable microbial bio...,"Indiscriminate usage, disposal and recalcitran...",Chemosphere,eng,2022,10
3,Editorial.,No Abstract,Biomacromolecules,eng,2017,2
4,The challenges of bioplastics in waste managem...,No Abstract,Waste management & research : the journal of t...,eng,2023,8
5,Advances in microbial exoenzymes bioengineerin...,Plastic pollution has become a major global co...,Chemosphere,eng,2024,5
6,Bacterial polyhydroxyalkanoates: Still fabulous?,Bacterial polyhydroxyalkanoates (PHA) are poly...,Microbiological research,eng,2016,11
7,From Soy Waste to Bioplastics: Industrial Proo...,The global plastic waste problem is pushing fo...,Biomacromolecules,eng,2024,3
8,Biopolymers for Enhanced Health Benefits.,The definition of the term biopolymer is often...,International journal of molecular sciences,eng,2023,11
9,Production of volatile fatty acids (VFAs) from...,The feasibility of producing volatile fatty ac...,Bioresource technology,eng,2022,9
