In [2]:
import requests
import pandas as pd
import numpy as np
from ratelimit import limits, RateLimitException, sleep_and_retry
from backoff import on_exception, expo

In [73]:
@sleep_and_retry
@limits(calls=100, period=300)
def query_api(query, scroll=0):
    
    base = "https://api.semanticscholar.org/graph/v1"
    obj = "paper"
    limit = 100
    fields = "abstract,title,year,externalIds"
    
    response = requests.get(f"{base}/{obj}/search?query={query}&limit={limit}&offset={scroll}&fields={fields}").json()

    return response

def scroll(query, scroll=0):
    
    allResults = []
    
    result = query_api(query, scroll=scroll)
    totalResults = result["total"]
    allResults.append(result["data"])
    
    while scroll < totalResults:
        print(scroll)
        scroll = result["next"]
        result = query_api(query, scroll=scroll)
        allResults.append(result["data"])
        
    return allResults

#result = scroll("%22emission%20reduction%22")
result = query_api("emission reduction")
result_ccs = query_api("capture and storage")

In [78]:
df1 = pd.DataFrame(result["data"])
df2 = pd.DataFrame(result_ccs["data"])

In [79]:
def clean_results(df):
    
    # Transform External IDs Array Into Columns
    df["values"] = df["externalIds"].apply(lambda x: x.values())
    df["keys"] = df["externalIds"].apply(lambda x: x.keys())
    df = df.explode(['keys','values'])
    df = df.pivot(index=["paperId","title","abstract","year"], columns="keys", values="values").reset_index()#.set_index("paperId")
    
    # Clean Columns
    df["title"] = df["title"].str.lower()
    df["abstract"] = df["abstract"].str.lower()
    df["year"] = df["year"].fillna(0).astype(int)
    
    df = df.drop_duplicates()
    
    return df

df1 = clean_results(df1)
df2 = clean_results(df2)

In [101]:
# Regex is producing inconsistent results, need to look into

df_results = df2.loc[(df["abstract"].fillna("").str.match(r"^(?=.*[cC](02|arbon))(?=.*[uU](tili.ation|sage)).*$")),["title","abstract"]]

In [104]:
df_results

keys,title,abstract
33,carbon capture and storage: lessons from a sto...,
96,crafting inorganic materials for use in energy...,harnessing solar energy effectively by the jud...
