In [37]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
import csv
from selenium.common.exceptions import NoSuchElementException

In [None]:
def scrape_researchgate(query):
  """Scrapes the ResearchGate website for papers and journals that contain the given query.

  Args:
    query: The query to search for.

  Returns:
    A list of dictionaries, where each dictionary contains information about a
    paper or journal, including the title, the name of the author, the publication
    date, the citations, and the link.
  """

  # Create a new Selenium webdriver instance.
  driver = webdriver.Chrome()

  try:
    # Navigate to the ResearchGate website.
    driver.get("https://www.researchgate.net/")

    # Wait for the search bar to be visible.
    search_bar = WebDriverWait(driver, 60).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, "input[aria-label='Search ResearchGate']"))
    )

    # Enter the query in the search bar.
    search_bar.send_keys(query)

    # Find the search button and click it.
    search_button = driver.find_element(By.CSS_SELECTOR, "button#searchButton")
    search_button.click()

    # Wait for the search results to load.
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, ".result"))
    )

    # Get all of the paper and journal results.
    results = driver.find_elements(By.CSS_SELECTOR, ".result")

    # Create a list to store the scraped data.
    scraped_data = []

    # Iterate over the results and scrape the data.
    for result in results:
      title = result.find_element(By.CSS_SELECTOR, "h2.title").text
      author = result.find_element(By.CSS_SELECTOR, ".author").text
      publication_date = result.find_element(By.CSS_SELECTOR, ".publication-date").text
      citations = result.find_element(By.CSS_SELECTOR, ".citations").text
      link = result.find_element(By.CSS_SELECTOR, ".link").get_attribute("href")

      # Create a dictionary to store the scraped data.
      data = {
          "title": title,
          "author": author,
          "publication_date": publication_date,
          "citations": citations,
          "link": link
      }

      # Add the dictionary to the scraped data list.
      scraped_data.append(data)

    return scraped_data

  finally:
    # Close the Selenium webdriver instance.
    driver.quit()

# Scrape the ResearchGate website for papers and journals that contain the words 'heat wave'.
scraped_data = scrape_researchgate("heat wave")

# Save the scraped data to a CSV file.
with open("researchgate_results.csv", "w", newline="", encoding="utf-8") as csvfile:
  writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "publication_date", "citations", "link"])
  writer.writeheader()
  writer.writerows(scraped_data)


In [None]:
#pip install beautifulsoup4

In [22]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_researchgate(query):
  """Scrapes the ResearchGate website for papers and journals that contain the given query.

  Args:
    query: The query to search for.

  Returns:
    A list of dictionaries, where each dictionary contains information about a
    paper or journal, including the title, the name of the author, the publication
    date, the citations, and the link.
  """

  # Make a request to the ResearchGate website.
  response = requests.get("https://www.researchgate.net/search")

  # Parse the HTML response.
  soup = BeautifulSoup(response.content, "html.parser")

  # Extract the data from the HTML.
  data = []

  # Find all of the search results.
  search_results = soup.find_all("div", class_="result")

  # Iterate over the search results and extract the data.
  for search_result in search_results:
    title = search_result.find("h2", class_="title").text
    author = search_result.find("a", class_="author").text
    publication_date = search_result.find("span", class_="publication-date").text
    citations = search_result.find("span", class_="citations").text
    link = search_result.find("a", class_="link").get("href")

    # Create a dictionary to store the scraped data.
    data.append({
      "title": title,
      "author": author,
      "publication_date": publication_date,
      "citations": citations,
      "link": link
    })

  return data

# Scrape the ResearchGate website and save the data to a CSV file.
with open("researchgate_results.csv", "w", newline="", encoding="utf-8") as csvfile:
  writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "publication_date", "citations", "link"])
  writer.writeheader()

  for data in scrape_researchgate("heat wave"):
    writer.writerow(data)


In [19]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(query):
  """Scrapes the Google Scholar website for papers and journals that contain the given query.

  Args:
    query: The query to search for.

  Returns:
    A list of dictionaries, where each dictionary contains information about a
    paper or journal, including the title, the name of the author, the publication
    date, the citations, and the link.
  """

  # Make a request to the Google Scholar website.
  response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

  # Parse the HTML response.
  soup = BeautifulSoup(response.content, "html.parser")

  # Extract the data from the HTML.
  data = []

  # Find all of the search results.
  search_results = soup.find_all("div", class_="gs_r")

  # Iterate over the search results and extract the data.
  for search_result in search_results:
    try:
      title = search_result.find("h3", class_="gs_rt").text
    except AttributeError:
      title = ""

    author = search_result.find("span", class_="gs_ai_name")
    if author is not None:
      author = author.text
    else:
      author = ""

    try:
      publication_date = search_result.find("div", class_="gs_a").text
    except AttributeError:
      publication_date = ""

    try:
      citations = search_result.find("div", class_="gs_ai_c").text
    except AttributeError:
      citations = ""

    try:
      link = search_result.find("a", class_="gs_ai_u").get("href")
    except AttributeError:
      link = ""

    # Create a dictionary to store the scraped data.
    data.append({
      "title": title,
      "author": author,
      "publication_date": publication_date,
      "citations": citations,
      "link": link
    })

  return data

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
  writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "publication_date", "citations", "link"])
  writer.writeheader()

  for data in scrape_google_scholar("heatwave"):
    writer.writerow(data)


In [23]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(query):
  """Scrapes the Google Scholar website for papers and journals that contain the given query.

  Args:
    query: The query to search for.

  Returns:
    A list of dictionaries, where each dictionary contains information about a
    paper or journal, including the title, the name of the author, the publication
    date, the citations, and the link.
  """

  # Make a request to the Google Scholar website.
  response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

  # Parse the HTML response.
  soup = BeautifulSoup(response.content, "html.parser")

  # Extract the data from the HTML.
  data = []

  # Find all of the search results.
  search_results = soup.find_all("div", class_="gs_r")

  # Iterate over the search results and extract the data.
  for search_result in search_results:
    try:
      title = search_result.find("h3", class_="gs_rt").text
    except AttributeError:
      title = ""

    try:
      author = search_result.find("span", class_="gs_ai_name").text
    except AttributeError:
      author = ""

    try:
      publication_date = search_result.find("div", class_="gs_a").text
    except AttributeError:
      publication_date = ""

    try:
      citations = search_result.find("div", class_="gs_ai_c").text
    except AttributeError:
      citations = ""

    try:
      link = search_result.find("a", class_="gs_ai_u").get("href")
    except AttributeError:
      link = ""

    # Create a dictionary to store the scraped data.
    data.append({
      "title": title,
      "author": author,
      "publication_date": publication_date,
      "citations": citations,
      "link": link
    })

  return data

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
  writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "publication_date", "citations", "link"])
  writer.writeheader()

  for data in scrape_google_scholar("heatwave"):
    # Write the empty strings to the CSV file if the author, citations, and link elements are not present.
    if data["author"] == "":
      data["author"] = "None"
    if data["citations"] == "":
      data["citations"] = "None"
    if data["link"] == "":
      data["link"] = "None"

    writer.writerow(data)


In [25]:
def scrape_google_scholar(query):
  """Scrapes the Google Scholar website for papers and journals that contain the given query.

  Args:
    query: The query to search for.

  Returns:
    A list of dictionaries, where each dictionary contains information about a
    paper or journal, including the title, the name of the author, the publication
    date, the citations, and the link.
  """

  # Make a request to the Google Scholar website.
  response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

  # Parse the HTML response.
  soup = BeautifulSoup(response.content, "html.parser")

  # Extract the data from the HTML.
  data = []

  # Find all of the search results.
  search_results = soup.find_all("div", class_="gs_r")

  # Iterate over the search results and extract the data.
  for search_result in search_results:
    try:
      title = search_result.find("h3", class_="gs_rt").text
    except AttributeError:
      title = ""

    try:
      author = search_result.find("span", class_="gs_ai_name").text
    except AttributeError:
      author = ""

    try:
      publication_date = search_result.find("div", class_="gs_a").text
    except AttributeError:
      publication_date = ""

    try:
      citations = search_result.find("div", class_="gs_ai_c").text
    except AttributeError:
      citations = ""

    try:
      # Get the URL of the paper.
      link = search_result.find("a", class_="gs_ai_u").get("href")
    except AttributeError:
      link = ""

    # Create a dictionary to store the scraped data.
    data.append({
      "title": title,
      "author": author,
      "publication_date": publication_date,
      "citations": citations,
      "link": link
    })

  return data

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
  writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "publication_date", "citations", "link"])
  writer.writeheader()

  for data in scrape_google_scholar("heatwave"):
    # Write the empty strings to the CSV file if the author, citations, and link elements are not present.
    if data["author"] == "":
      data["author"] = "None"
    if data["citations"] == "":
      data["citations"] = "None"
    if data["link"] == "":
      data["link"] = "None"

    writer.writerow(data)


In [None]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(query):
    """Scrapes the Google Scholar website for papers and journals that contain the given query.

    Args:
        query: The query to search for.

    Returns:
        A list of dictionaries, where each dictionary contains information about a
        paper or journal, including the title, the name of the author, the publication
        date, the citations, and the link.
    """

    # Make a request to the Google Scholar website.
    response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

    # Parse the HTML response.
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract the data from the HTML.
    data = []

    # Find all of the search results.
    search_results = soup.find_all("div", class_="gs_r")

    # Iterate over the search results and extract the data.
    for search_result in search_results:
        try:
            title = search_result.find("h3", class_="gs_rt").text
        except AttributeError:
            title = ""

        try:
            author = search_result.find("div", class_="gs_a").text
        except AttributeError:
            author = "None"

        try:
            # Get the URL of the paper.
            link = search_result.find("a", class_="gs_or_ggsm").get("href")
        except AttributeError:
            link = "None"

        try:
            citations = search_result.find("div", class_="gs_fl").text
        except AttributeError:
            citations = "None"

        # Create a dictionary to store the scraped data.
        data.append({
            "title": title,
            "author": author,
            "citations": citations,
            "link": link
        })

    return data

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "citations", "link"])
    writer.writeheader()

    for data in scrape_google_scholar("heatwave"):
        writer.writerow(data)


In [26]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(query):
    """Scrapes the Google Scholar website for papers and journals that contain the given query.

    Args:
        query: The query to search for.

    Returns:
        A list of dictionaries, where each dictionary contains information about a
        paper or journal, including the title, the name of the author, the publication
        date, the citations, and the link.
    """

    # Make a request to the Google Scholar website.
    response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

    # Parse the HTML response.
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract the data from the HTML.
    data = []

    # Find all of the search results.
    search_results = soup.find_all("div", class_="gs_r")

    # Iterate over the search results and extract the data.
    for search_result in search_results:
        try:
            title = search_result.find("h3", class_="gs_rt").text
        except AttributeError:
            title = ""

        try:
            author = search_result.find("div", class_="gs_a").text
        except AttributeError:
            author = "None"

        try:
            # Get the URL of the paper.
            link = search_result.find("a", class_="gs_or_ggsm").get("href")
        except AttributeError:
            link = "None"

        try:
            citations = search_result.find("div", class_="gs_fl").text
        except AttributeError:
            citations = "None"

        # Create a dictionary to store the scraped data.
        data.append({
            "title": title,
            "author": author,
            "citations": citations,
            "link": link
        })

    return data

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["title", "author", "citations", "link"])
    writer.writeheader()

    for data in scrape_google_scholar("heatwave"):
        writer.writerow(data)


In [27]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(query):
    """Scrapes the Google Scholar website for papers and journals that contain the given query.

    Args:
        query: The query to search for.

    Returns:
        A list of dictionaries, where each dictionary contains information about a
        paper or journal, including author(s), title, publication date, source, and page numbers.
    """

    # Make a request to the Google Scholar website.
    response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

    # Parse the HTML response.
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract the data from the HTML.
    data = []

    # Find all of the search results.
    search_results = soup.find_all("div", class_="gs_r")

    # Iterate over the search results and extract the data.
    for search_result in search_results:
        try:
            author = search_result.find("div", class_="gs_a").text
        except AttributeError:
            author = "None"

        try:
            title = search_result.find("h3", class_="gs_rt").text
        except AttributeError:
            title = "None"

        try:
            source = search_result.find("div", class_="gs_a").text
        except AttributeError:
            source = "None"

        try:
            publication_date = search_result.find("div", class_="gs_a").text
        except AttributeError:
            publication_date = "None"

        try:
            # Get the URL of the paper.
            link = search_result.find("a", class_="gs_or_ggsm").get("href")
        except AttributeError:
            link = "None"

        try:
            # Extract page numbers if available.
            page_numbers = search_result.find("div", class_="gs_fl").text
        except AttributeError:
            page_numbers = "None"

        # Create a dictionary to store the scraped data.
        data.append({
            "Author(s)": author,
            "Title": title,
            "Publication Date": publication_date,
            "Source": source,
            "Page Numbers": page_numbers,
            "Link": link
        })

    return data

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Author(s)", "Title", "Publication Date", "Source", "Page Numbers", "Link"])
    writer.writeheader()

    for data in scrape_google_scholar("heat waves"):
        writer.writerow(data)


In [28]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(keywords):
    """Scrapes the Google Scholar website for papers and journals that contain the given keywords.

    Args:
        keywords: A list of keywords to search for.

    Returns:
        A list of dictionaries, where each dictionary contains information about a
        paper or journal, including author(s), title, publication date, source, and page numbers.
    """

    # Combine the keywords into a single search query.
    query = "+".join(keywords)

    # Make a request to the Google Scholar website.
    response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

    # Parse the HTML response.
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract the data from the HTML.
    data = []

    # Find all of the search results.
    search_results = soup.find_all("div", class_="gs_r")

    # Iterate over the search results and extract the data.
    for search_result in search_results:
        try:
            author = search_result.find("div", class_="gs_a").text
        except AttributeError:
            author = "None"

        try:
            title = search_result.find("h3", class_="gs_rt").text
        except AttributeError:
            title = "None"

        try:
            source = search_result.find("div", class_="gs_a").text
        except AttributeError:
            source = "None"

        try:
            publication_date = search_result.find("div", class_="gs_a").text
        except AttributeError:
            publication_date = "None"

        try:
            # Get the URL of the paper.
            link = search_result.find("a", class_="gs_or_ggsm").get("href")
        except AttributeError:
            link = "None"

        try:
            # Extract page numbers if available.
            page_numbers = search_result.find("div", class_="gs_fl").text
        except AttributeError:
            page_numbers = "None"

        # Create a dictionary to store the scraped data.
        data.append({
            "Author(s)": author,
            "Title": title,
            "Publication Date": publication_date,
            "Source": source,
            "Page Numbers": page_numbers,
            "Link": link
        })

    return data

# List of keywords to search for.
keywords = ["heatwave", "heat wave"]

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Author(s)", "Title", "Publication Date", "Source", "Page Numbers", "Link"])
    writer.writeheader()

    for keyword in keywords:
        for data in scrape_google_scholar([keyword]):
            writer.writerow(data)


In [29]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_google_scholar(keywords):
    """Scrapes the Google Scholar website for papers and journals that contain the given keywords.

    Args:
        keywords: A list of keywords to search for.

    Returns:
        A list of dictionaries, where each dictionary contains information about a
        paper or journal, including author(s), title, publication date, source, and page numbers.
    """

    # Combine the keywords into a single search query.
    query = "+".join(keywords)

    # Make a request to the Google Scholar website.
    response = requests.get("https://scholar.google.com/scholar?hl=en&q=" + query)

    # Parse the HTML response.
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract the data from the HTML.
    data = []

    # Find all of the search results.
    search_results = soup.find_all("div", class_="gs_r")

    # Iterate over the search results and extract the data.
    for search_result in search_results:
        try:
            author = search_result.find("div", class_="gs_a").text
        except AttributeError:
            author = "None"

        try:
            title_link = search_result.find("h3", class_="gs_rt").find("a")
            title = title_link.text
            # Get the URL of the paper from the link.
            link = title_link.get("href")
        except AttributeError:
            title = "None"
            link = "None"

        try:
            source = search_result.find("div", class_="gs_a").text
        except AttributeError:
            source = "None"

        try:
            publication_date = search_result.find("div", class_="gs_a").text
        except AttributeError:
            publication_date = "None"

        try:
            # Extract page numbers if available.
            page_numbers = search_result.find("div", class_="gs_fl").text
        except AttributeError:
            page_numbers = "None"

        # Create a dictionary to store the scraped data.
        data.append({
            "Author(s)": author,
            "Title": title,
            "Publication Date": publication_date,
            "Source": source,
            "Page Numbers": page_numbers,
            "Link": link
        })

    return data

# List of keywords to search for.
keywords = ["heatwave", "heat wave"]

# Scrape Google Scholar and save the data to a CSV file.
with open("google_scholar_results.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Author(s)", "Title", "Publication Date", "Source", "Page Numbers", "Link"])
    writer.writeheader()

    for keyword in keywords:
        for data in scrape_google_scholar([keyword]):
            writer.writerow(data)


# This is the code i used eventually

In [30]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_researchgate(query):
    """Scrapes ResearchGate for research papers and journals that contain the given query.

    Args:
        query: The query to search for.

    Returns:
        A list of dictionaries, where each dictionary contains information about a
        paper or journal, including author(s), title, publication date, source, and URL.
    """

    # Make a request to the ResearchGate website.
    url = f"https://www.researchgate.net/search?q={query}"
    response = requests.get(url)

    # Parse the HTML response.
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract the data from the HTML.
    data = []

    # Find all of the search results.
    search_results = soup.find_all("div", class_="nova-o-stack__item")

    # Iterate over the search results and extract the data.
    for search_result in search_results:
        try:
            title = search_result.find("h4", class_="nova-e-text nova-e-text--size-l nova-e-text--family-sans-serif nova-e-text--spacing-none nova-e-text--color-inherit nova-v-publication-item__title").text
        except AttributeError:
            title = "None"

        try:
            author = search_result.find("div", class_="nova-v-publication-item__person-list").text
        except AttributeError:
            author = "None"

        try:
            source = search_result.find("div", class_="nova-v-publication-item__meta-right").text
        except AttributeError:
            source = "None"

        try:
            publication_date = search_result.find("span", class_="nova-v-publication-item__meta-data-item").text
        except AttributeError:
            publication_date = "None"

        try:
            # Get the URL of the paper.
            link = search_result.find("a", class_="nova-e-link nova-e-link--color-inherit nova-e-link--theme-bare").get("href")
        except AttributeError:
            link = "None"

        # Create a dictionary to store the scraped data.
        data.append({
            "Author(s)": author,
            "Title": title,
            "Publication Date": publication_date,
            "Source": source,
            "URL": link
        })

    return data

# Example usage:
query = "heatwave"
results = scrape_researchgate(query)

# Save the data to a CSV file.
with open("researchgate_results.csv", "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Author(s)", "Title", "Publication Date", "Source", "URL"])
    writer.writeheader()

    for data in results:
        writer.writerow(data)
