## Use multiple Scopus Author IDs to retrieve lists of articles by author

In [2]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
from config import api_key
from pandas.io.json import json_normalize  
import nltk
import re
import io
from nltk.corpus import stopwords, reuters
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import panel as pn
import panel.widgets 
from pathlib import Path
from panel.interact import interact

import hvplot.pandas
import param
from IPython.display import Image
from IPython.core.display import HTML
import time
import datetime as dt

import plotly.express as px

lemmatizer = WordNetLemmatizer()
pn.extension('plotly')



In [11]:
# Search request function 

class search_scopus_dash(param.Parameterized):
    
    Query  = param.String(default="Nanosafety", doc="Insert query term(s)")
    
    Year_Range = param.Range((2005, 2010), bounds=(1970, 2021))
    
    generate_sample_df = param.Action(lambda self: self.update_sample_df(), label="Generate Data", doc="""
      An action callback which will update the sample_df.""")
    
    sample_df = param.DataFrame(doc="""
      The current dataframe of samples.""")
    
    file_name = param.String(default="data.csv", doc="""
      The filename to save to.""")
    
    def __init__(self, **params):
        super().__init__(**params)
        self.update_sample_df()
        self.download = pn.widgets.FileDownload(name="Download Data", filename=self.file_name, callback=self._download_callback, button_type="primary")
        self.table = pn.widgets.DataFrame(self.sample_df.head(10), height=360)
        
        
    @pn.depends('file_name', watch=True)
    def _update_filename(self):
        self.download.filename = self.file_name
    
    def _download_callback(self):
        """
        A FileDownload callback will return a file-like object which can be serialized
        and sent to the client.
        """
        self.download.filename = self.file_name
        sio = io.StringIO()
        self.sample_df.to_csv(sio, index=False)
        sio.seek(0)
        return sio
    
    
    
    @param.depends("Query", "Year_Range")
    def search_request_funt(self):
    
        scopus_search_appended_df = pd.DataFrame()
    
        if self.Year_Range[0] == self.Year_Range[1]:
            date = str(self.Year_Range[0])
        else: 
            date = str(self.Year_Range[0]) + "-" + str(self.Year_Range[1])
        # Declare necessary parameters for Scopus request API search tool
        cursor = "*"
        field = "prism:coverDate,dc:title,dc:description"
        url = "http://api.elsevier.com/content/search/scopus?"
        headers = {"X-ELS-APIKey": api_key, 'Accept':'application/json'}
        parameters = {"query": self.Query, "view": "Complete", "date": date, "field": field, "cursor": cursor}
        article_response = requests.get(url, headers=headers, params=parameters)
        article_response_json = article_response.json()
        total_results = int(article_response_json['search-results']['opensearch:totalResults'])
        while article_response_json['search-results'].get('entry') is not None:
            url = "http://api.elsevier.com/content/search/scopus?"
            parameters = {"query": self.Query, "view": "Complete", "date": date, "field": field, "cursor": cursor}
            article_response = requests.get(url, headers=headers, params= parameters)
            article_response_json = article_response.json()
            if article_response_json['search-results'].get('entry') is not None:
                scopus_articles_df = pd.DataFrame.from_dict(pd.json_normalize(article_response_json, meta=["search-results"], record_path=["search-results", "entry"]), orient="columns")
                date_title_description_df = scopus_articles_df[["prism:coverDate", "dc:title", "dc:description"]]
                date_title_description_df.columns = ['Date','Title','Content']
                pd.to_datetime(date_title_description_df['Date'], format = "%Y-%m-%d")
                date_title_description_df.sort_values(by='Date')
                date_title_description_df = date_title_description_df.set_index('Date')
                scopus_search_appended_df = scopus_search_appended_df.append(date_title_description_df)
                cursor = article_response_json['search-results']['cursor']['@next']
        return scopus_search_appended_df
   
    @pn.depends('file_name', watch=True)
    def update_sample_df(self, event=None):
        self.sample_df = pd.DataFrame(self.search_request_funt())
        
    @pn.depends("sample_df", watch=True)
    def _update_table(self):
        if hasattr(self, "table"):
            self.table.value = self.sample_df.head(10)
    
    def freq_plot_funt(self):
        scopus_search_appended_df = self.search_request_funt()
        scopus_search_appended_df = scopus_search_appended_df.reset_index()
        scopus_search_appended_df['Date'] = pd.to_datetime(scopus_search_appended_df['Date'], format = "%Y-%m-%d")
        scopus_search_appended_year = scopus_search_appended_df.Date.dt.year.unique()
        scopus_search_appended_count = scopus_search_appended_df['Date'].groupby(scopus_search_appended_df.Date.dt.year).agg('count')
        scopus_search_appended_count_df = pd.DataFrame(scopus_search_appended_count)
        scopus_search_appended_count_df.columns = ['Count']
        scopus_search_appended_count_df = scopus_search_appended_count_df.reset_index() 
        plot = scopus_search_appended_count_df.hvplot.line(title= "Total Count per Year for the Word '" + self.Query + "' used in Academic Articles", 
                                    x = "Date", 
                                    y = 'Count',
                                    invert = False, 
                                    height = 600,
                                    width = 800
                                    )
        return plot


    def view(self):
        return pn.Row(pn.Column(
            "## Generate and Download Data from Scopus API Request",
            pn.Row(
                pn.Param(self, parameters=['Query', 'Year_Range', 'generate_sample_df'], show_name=False, widgets={"generate_sample_df": {"button_type": "primary"}}),
                pn.Column(self.param.file_name, self.download, align='end', margin=(10,5,5,5)),
            ),
            "**Scopus DataFrame (10 Rows)**",
            self.table), self.freq_plot_funt)


    
class word_dash(param.Parameterized):

    # Y value multiselectors
    Column_Selector = param.ObjectSelector(default = 'Title', objects=['Title', 'Content'])
    
    # Word Count Slider
    Word_Slider = param.Integer(15, bounds=(5,50))

    # Stop word addition
    Text_Input = param.String(default='', doc= 'Type Words Here, Seperated by a Space')
    
    File_Input = param.Parameter()
    
    data = param.DataFrame()
    
    generate_sample_df = param.Action(lambda self: self.update_sample_df, label="Generate Data", doc=""" An action callback which will update the sample_df.""")
    
    def __init__(self, **params):
        self.param.File_Input.default = pn.widgets.FileInput()
        super().__init__(**params)
        self._update_table()
        self.image_pane = pn.pane.JPG(height= 400, width = 400)
        self.table = pn.widgets.DataFrame(self.data, height = 400, width = 400)
        self.plotly_pane = pn.pane.Plotly(height=400, width = 400)
    
    @pn.depends("File_Input.value", watch=True)
    def _parse_file_input(self):
        value = self.File_Input.value
        if value:
            string_io = io.StringIO(value.decode("utf8"))
            self.data = pd.read_csv(string_io)
        else:
            return
    
    @pn.depends("data", "Column_Selector", "Word_Slider", "Text_Input", watch = True)
    def dataframe_to_string(self):
            scopus = self.data
            if scopus is None:
                return
            else:
                if self.Column_Selector == 'Content':
                    for row in scopus:
                        big_string = ''.join(str(scopus['Content']))
                    sw = set(stopwords.words('english'))
                    regex = re.compile("[^a-zA-Z ]")
                    wordlist = re.sub("[^\w]", " ",  self.Text_Input).split()
                    sw_addons = {'using', 'via', 'based', 'nan', 'date', 'used', 'b', 'tio', 'nanote'}
                    sw_addons.update(wordlist)
                    re_clean = regex.sub('', big_string)
                    words = word_tokenize(re_clean)
                    lem = [lemmatizer.lemmatize(word) for word in words]
                    output = [word.lower() for word in lem if word.lower() not in sw.union(sw_addons)]
                    full_string = ' '.join(output)
                    wc_content = WordCloud(width=800, height=600, background_color="white", max_words= self.Word_Slider).generate(full_string)
                    self.image_pane.object = wc_content.to_image(format = 'jpg')
                    self.plotly_pane.object = wc_content.to_image(format = 'jpg')
                else:
                    for row in scopus:
                        big_string = ''.join(str(scopus['Title']))
                    sw = set(stopwords.words('english'))
                    regex = re.compile("[^a-zA-Z ]")
                    wordlist = re.sub("[^\w]", " ",  self.Text_Input).split()
                    sw_addons = {'using', 'via', 'based', 'nan', 'date', 'used', 'b', 'tio', 'nanote'}
                    sw_addons.update(wordlist)
                    re_clean = regex.sub('', big_string)
                    words = word_tokenize(re_clean)
                    lem = [lemmatizer.lemmatize(word) for word in words]
                    output = [word.lower() for word in lem if word.lower() not in sw.union(sw_addons)]
                    full_string = ' '.join(output)
                    wc_content = WordCloud(width=800, height=600, background_color="white", max_words= self.Word_Slider).generate(full_string)
                    self.image_pane.object = wc_content.to_image(format = 'jpg')
                    self.plotly_pane.object = wc_content.to_image(format = 'jpg')
    
    @pn.depends("data", watch=True)
    def _update_table(self):
        if hasattr(self, "table"):
            self.table.value = self.data.head(10)

    def view(self):
        return pn.Column(self.File_Input, 
                         pn.Param(self, parameters = ['Column_Selector', 'Word_Slider', 'Text_Input', 'generate_sample_df'], show_name = False, widgets = {
                             "Column_Selector": {
                                 "type": pn.widgets.Select(name = 'Title', options=['Title', 'Content'])
                             },
                             "Word_Slider": {
                                 "type": pn.widgets.IntSlider(name = 'Total Words', start=0, end= 50, step = 1, value = 10, value_throttled= 10),
                                 "throttled": True,
                             },
                         }), 
                         "**Wordcloud**", 
                         self.image_pane, self.table, self.plotly_pane
                        )
        
search = search_scopus_dash(name='Query Search Request Below')

search_view = search.view()

word = word_dash()

word_view = word.view()

search_view_tab = pn.template.MaterialTemplate(site="Panel", title="Download and Upload CSV File", main=[search_view, word_view]);

#search_dash_tab = pn.Column('# Download Dataframe and Excel Files Here','### This can take take between a few minutes to several minutes depending on the data size requested', pn.Row(pn.Column(search.param)), background='#f0f0f0')

#freq_plot_tab = pn.Column('# Frequency Plot', pn.Row(search.freq_plot_funt), background='#f0f0f0')

#word_cloud_tab = pn.Column('# Word Cloud', pn.Column(word.controls, word.dataframe_to_string), background='#f0f0f0')

#all_tabs = pn.Tabs(('Data Selection', search_dash_tab), ('Frequency Plot', freq_plot_tab), ('Word Cloud', word_cloud_tab))
search_view_tab.show()

Launching server at http://localhost:53275


<bokeh.server.server.Server at 0x1d7207cf648>

In [None]:
search_scopus_dash().search_request_funt()