# Imports

In [None]:
from scholarly import scholarly
import pandas as pd

# Function to retrieve scholarly publications data

In [None]:
def search_google_scholar(keyword, from_year, num_results=20):
    """
    Search Google Scholar for scholarly publications based on a keyword and time range.

    Parameters:
    - keyword (str): The keyword to search for in Google Scholar.
    - from_year (int): The starting year for filtering publications.
    - num_results (int, optional): The number of results to fetch. Defaults to 20.

    Returns:
    pandas.DataFrame: A DataFrame containing information about scholarly publications
                      that match the criteria (title, authors, year, citation count,
                      abstract, eprint, url, venue, and Google Scholar link).
    """
    # Perform Google Scholar search
    search_query = scholarly.search_pubs(keyword)

    # Initialize data dictionary to store publication information
    data = {
        'Title': [],
        'Authors': [],
        'Year': [],
        'Citation Count': [],
        'Citations Per Year': [],
        'Venue': [],
        'Abstract': [],
        'Eprint': [],
        'URL': [],
        'Google Scholar Link': []
    }

    # Get the current year
    current_year = pd.Timestamp.now().year  

    # Iterate through search results and retrieve publication information
    for result in search_query:
        try:
            # Extract information from the 'bib' attribute
            publication_info = result.get('bib', {})
            title, authors, year = publication_info.get('title', 'N/A'), publication_info.get('author', ['N/A']), publication_info.get('pub_year', 'N/A')
            citation_count, abstract, eprint = result.get('num_citations', 0), publication_info.get('abstract', 'N/A'), result.get('eprint_url', 'N/A')
            url, venue, scholar_link = result.get('pub_url', 'N/A'), publication_info.get('venue', 'N/A'), result.get('url_scholarbib', 'N/A')

            # Check if title, year, and authors are available and citation count meets criteria
            if title != 'N/A' and year != 'N/A' and authors != 'N/A' and citation_count >= 10:
                data['Title'].append(title)
                data['Authors'].append(', '.join(authors))
                data['Year'].append(year)
                data['Citation Count'].append(citation_count)
                data['Citations Per Year'].append(round(citation_count / (current_year - int(year)), 1))
                data['Venue'].append(venue)
                data['Abstract'].append(abstract)
                data['Eprint'].append(eprint)
                data['URL'].append(url)
                data['Google Scholar Link'].append(scholar_link)

                # Check if enough results have been retrieved
                if len(data['Title']) == num_results:
                    break

        except Exception as e:
            # Log or handle the exception
            print(f"An error occurred: {e}")
            continue  # Skip to the next iteration

    # Convert data dictionary to a pandas DataFrame
    df = pd.DataFrame(data)
    
    # Sort the DataFrame by 'Citation Count' in descending order
    df = df.sort_values(by='Citation Count', ascending=False)
    
    return df

# Run the function and display the data

In [None]:
scholar_publications = search_google_scholar('feature selection', 2000)
scholar_publications