In [21]:
import requests
import json

articles = []  # Define an empty list to store the articles

for i in range(0, 14000, 100):
    # Define the query parameters
    query = "neuroscience"
    fields = "title,author,abstract,journal,subject_facet"
    filter = "publication_date:[2019-01-01T00:00:00Z TO 2023-12-31T23:59:59Z], subject_facet:“/Neuroscience/”"
    start = i
    rows = 100

    # Construct the query URL
    url = f"http://api.plos.org/search?q={query}&fl={fields}&fq={filter}&start={start}&rows={rows}"

    # Send the request and get the response
    response = requests.get(url)

    # Check if the response is successful
    if response.status_code == 200:
        # Parse the response as JSON
        data = response.json()

        # Append the articles from this page to the list
        articles.extend(data['response']['docs'])

    else:
        # Print an error message
        print(f"Request failed with status code {response.status_code}")

# Print the total number of articles collected
print(f"Collected {len(articles)} articles.")



Collected 14000 articles.


In [22]:
import requests
import json
import csv
 
with open("neuroscience_articles.csv", mode="w", newline="") as file:
        # Create a CSV writer
        writer = csv.writer(file)
        
        # Write the header row
        writer.writerow(["Title", "Author", "Abstract", "Journal", "Subject"])
        
        # Loop over the articles and write the data to the CSV file
        for article in articles:
            writer.writerow([
                article.get("title", ""),
                article.get("author", ""),
                article.get("abstract", ""),
                article.get("journal", ""),
                article.get("subject_facet", "")
            ])
        

In [23]:
import pandas as pd
df = pd.read_csv("/content/neuroscience_articles.csv")
df.shape

(14000, 5)

In [24]:
df.head(5)

Unnamed: 0,Title,Author,Abstract,Journal,Subject
0,Think: Theory for Africa,"['Christopher B Currin', 'Phumlani N Khoza', '...",[''],PLOS Computational Biology,['/Biology and life sciences/Computational bio...
1,Bridge to neuroscience workshop: An effective ...,"['Alexandra Colón-Rodríguez', 'Chelsea T Tiern...",['\nNeuroscience as a discipline is rarely cov...,PLOS ONE,['/Biology and life sciences/Cell biology/Cell...
2,The Human Brain Project—Synergy between neuros...,"['Katrin Amunts', 'Alois C Knoll', 'Thomas Lip...",['\nThe Human Brain Project (HBP) is a Europea...,PLOS Biology,['/Biology and life sciences/Computational bio...
3,The effect of neuroscientific evidence on sent...,"['Annalise Perricone', 'Arielle Baskin-Sommers...",['\nNeuroscientific evidence is increasingly u...,PLOS ONE,"['/Biology and life sciences/Neuroscience', '/..."
4,Identifying knowledge important to teach about...,"['Pål Kvello', 'Niklas Gericke']",['\nTeaching about the nervous system has beco...,PLOS ONE,['/Biology and life sciences/Anatomy/Nervous s...


In [25]:
df.isnull().sum()

Title        0
Author      31
Abstract     0
Journal      0
Subject      0
dtype: int64

In [26]:
duplicates = df['Abstract'].duplicated().sum()

print(f"There are {duplicates} duplicated values in 'abstract' column.")

There are 271 duplicated values in 'abstract' column.


In [27]:
combined_neuro3 = df.drop_duplicates(subset=['Abstract'], keep='last')

In [30]:
duplicates = combined_neuro3['Abstract'].duplicated().sum()

print(f"There are {duplicates} duplicated values in 'abstract' column.")

There are 0 duplicated values in 'abstract' column.


In [29]:
combined_neuro3.reset_index(inplace=True)

"['/Biology and life sciences/Cell biology/Cell physiology/Cell polarity', '/Biology and life sciences/Cell biology/Cellular types/Animal cells/Neurons', '/Biology and life sciences/Cell biology/Cellular types/Animal cells/Neurons/Interneurons', '/Biology and life sciences/Cell biology/Cellular types/Animal cells/Neurons/Nerve fibers/Axons', '/Biology and life sciences/Cell biology/Cellular types/Animal cells/Neurons/Neuronal dendrites/Neurites', '/Biology and life sciences/Cell biology/Signal transduction/Axon guidance receptors', '/Biology and life sciences/Neuroscience/Cellular neuroscience/Axon guidance', '/Biology and life sciences/Neuroscience/Cellular neuroscience/Axon guidance/Axon guidance receptors', '/Biology and life sciences/Neuroscience/Cellular neuroscience/Axon guidance/Pioneer axons', '/Biology and life sciences/Neuroscience/Cellular neuroscience/Neurons', '/Biology and life sciences/Neuroscience/Cellular neuroscience/Neurons/Interneurons', '/Biology and life sciences/

In [31]:
combined_neuro3.to_csv('/content/drive/MyDrive/saved_models/combined_neuro3.csv', index=False)