## Use multiple Scopus Author IDs to retrieve lists of articles by author

In [90]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
from config import api_key
from pandas.io.json import json_normalize  
import nltk
import re
import io
from nltk.corpus import stopwords, reuters
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud
import panel as pn
import panel.widgets 
from pathlib import Path
from panel.interact import interact
import hvplot.pandas
import param
from IPython.display import Image
from IPython.core.display import HTML
import time
import datetime as dt
lemmatizer = WordNetLemmatizer()
pn.extension()

In [107]:
# Search request function 
class search_scopus_dash(param.Parameterized):

    Query  = param.String(default="Nanosafety", doc="Insert query term(s)")
    
    Year_Range = param.Range((2005, 2010), bounds=(1970, 2021))

    @param.depends('Query', 'Year_Range')
    def search_request_funt(self):
    
        scopus_search_appended_df = pd.DataFrame()
    
        if self.Year_Range[0] == self.Year_Range[1]:
            date = str(self.Year_Range[0])
        else: 
            date = str(self.Year_Range[0]) + "-" + str(self.Year_Range[1])
        # Declare necessary parameters for Scopus request API search tool
        cursor = "*"
        field = "prism:coverDate,dc:title,dc:description"
        url = "http://api.elsevier.com/content/search/scopus?"
        headers = {"X-ELS-APIKey": api_key, 'Accept':'application/json'}
        parameters = {"query": self.Query, "view": "Complete", "date": date, "field": field, "cursor": cursor}
        article_response = requests.get(url, headers=headers, params=parameters)
        article_response_json = article_response.json()
        total_results = int(article_response_json['search-results']['opensearch:totalResults'])
        while article_response_json['search-results'].get('entry') is not None:
            url = "http://api.elsevier.com/content/search/scopus?"
            parameters = {"query": self.Query, "view": "Complete", "date": date, "field": field, "cursor": cursor}
            article_response = requests.get(url, headers=headers, params= parameters)
            article_response_json = article_response.json()
            if article_response_json['search-results'].get('entry') is not None:
                scopus_articles_df = pd.DataFrame.from_dict(pd.json_normalize(article_response_json, meta=["search-results"], record_path=["search-results", "entry"]), orient="columns")
                date_title_description_df = scopus_articles_df[["prism:coverDate", "dc:title", "dc:description"]]
                date_title_description_df.columns = ['Date','Title','Content']
                pd.to_datetime(date_title_description_df['Date'], format = "%Y-%m-%d")
                date_title_description_df.sort_values(by='Date')
                date_title_description_df = date_title_description_df.set_index('Date')
                scopus_search_appended_df = scopus_search_appended_df.append(date_title_description_df)
                cursor = article_response_json['search-results']['cursor']['@next']
        return scopus_search_appended_df



    def make_csv(self):
        csv = self.search_request_funt().to_csv("Scopus_Search_" + "_" + self.Query + "_" + str(self.Year_Range[0]) + "_" + str(self.Year_Range[1]) + ".csv")
        return csv
    
    
    def make_panel_df(self):
        df_panel = pn.widgets.DataFrame(self.search_request_funt(), name = 'Scopus_Request_Dataframe')
        return df_panel
    
    def freq_plot_funt(self):
        scopus_search_appended_df = self.search_request_funt()
        scopus_search_appended_df = scopus_search_appended_df.reset_index()
        scopus_search_appended_df['Date'] = pd.to_datetime(scopus_search_appended_df['Date'], format = "%Y-%m-%d")
        scopus_search_appended_year = scopus_search_appended_df.Date.dt.year.unique()
        scopus_search_appended_count = scopus_search_appended_df['Date'].groupby(scopus_search_appended_df.Date.dt.year).agg('count')
        scopus_search_appended_count_df = pd.DataFrame(scopus_search_appended_count)
        scopus_search_appended_count_df.columns = ['Count']
        scopus_search_appended_count_df = scopus_search_appended_count_df.reset_index() 
        plot = scopus_search_appended_count_df.hvplot.line(title= "Total Count per Year for the Word '" + query + "' used in Academic Articles", 
                                    x = "Date", 
                                    y = 'Count',
                                    invert = False, 
                                    height = 400,
                                    width = 800
                                    )
        return plot

scopus_search_appended_df = search_scopus_dash().search_request_funt
    
class word_cloud_dash(param.Parameterized):
    
    scopus_df = scopus_search_appended_df
    
    # Y value multiselectors
    Column_Selector = param.ObjectSelector(default = 'Title', objects=['Title', 'Content'])

    # Word Count Slider
    Word_Slider = param.Integer(15, bounds=(5,50))

    # Stop word addition
    Text_Input = param.String(default='', doc= 'Type Words Here, Seperated by a Space')
    
    @param.depends('Column_Selector', 'Word_Slider', 'Text_Input')
    def dataframe_to_string(self):
        if self.Column_Selector == 'Content':
            for row in scopus_df:
                big_string = ''.join(str(scopus_df['Content']))
            sw = set(stopwords.words('english'))
            regex = re.compile("[^a-zA-Z ]")
            wordlist = re.sub("[^\w]", " ",  self.Text_Input).split()
            sw_addons = {'using', 'via', 'based', 'nan', 'date', 'used', 'b', 'tio', 'nanote'}
            sw_addons.update(wordlist)
            re_clean = regex.sub('', big_string)
            words = word_tokenize(re_clean)
            lem = [lemmatizer.lemmatize(word) for word in words]
            output = [word.lower() for word in lem if word.lower() not in sw.union(sw_addons)]
            full_string = ' '.join(output)
            wc_content = WordCloud(width=800, height=600, background_color="white", max_words= self.Word_Slider).generate(full_string)
            image_1 = wc_content.to_image()
            return image_1
        else:
            for row in scopus_df:
                big_string = ''.join(str(scopus_df['Title']))
            sw = set(stopwords.words('english'))
            regex = re.compile("[^a-zA-Z ]")
            wordlist = re.sub("[^\w]", " ",  self.Text_Input).split()
            sw_addons = {'using', 'via', 'based', 'nan', 'date', 'used', 'b', 'tio', 'nanote'}
            sw_addons.update(wordlist)
            re_clean = regex.sub('', big_string)
            words = word_tokenize(re_clean)
            lem = [lemmatizer.lemmatize(word) for word in words]
            output = [word.lower() for word in lem if word.lower() not in sw.union(sw_addons)]
            full_string = ' '.join(output)
            wc_content = WordCloud(width=800, height=600, background_color="white", max_words= self.Word_Slider).generate(full_string)
            image_2 = wc_content.to_image()
            return image_2
    
    
search = search_scopus_dash(name='Query Search Request Below')

word = word_cloud_dash(name='Word Cloud')

fd = pn.widgets.FileDownload(callback=search.make_csv, filename="Scopus Search Excel File")

search_dash_tab = pn.Column('# Download Dataframe and Excel Files Here',
                            '### This can take take between a few minutes to several minutes depending on the data size requested',
                            pn.Row(pn.Column(search.param, fd), search.make_panel_df), background='#f0f0f0')

freq_plot_tab = pn.Column('# Frequency Plot', pn.Row(search.freq_plot_funt), background='#f0f0f0')

word_cloud_tab = pn.Column('# Word Cloud', pn.Row(pn.Column(word.param, word.dataframe_to_string)), background='#f0f0f0')

all_tabs = pn.Tabs(('Data Selection', search_dash_tab), ( 'Frequency Plot', freq_plot_tab), ('Word Cloud', word_cloud_tab))
all_tabs.show()

NameError: name 'scopus_df' is not defined