In [1]:
import pandas as pd
from pybliometrics.scopus import AbstractRetrieval
from tqdm import tqdm
import time

In [2]:
doc = AbstractRetrieval("10.3982/ECTA16566", view='FULL')
doc.subject_areas        

[Area(area='Economics and Econometrics', abbreviation='ECON', code=2002)]

<pybliometrics.scopus.abstract_retrieval.AbstractRetrieval at 0x12fc12bc0>

In [81]:
def get_paper_info(identifier):
    try:
        doc = AbstractRetrieval(identifier, view='FULL')
    except Exception as e:
        # Handle the error, possibly logging it or notifying the user
        return {'error': str(e)}

    paper = {}
    
    # Check each property with getattr and provide a default value if not found
    paper['doi'] = getattr(doc, 'doi', None)
    paper['eid'] = getattr(doc, 'eid', None)
    paper['title'] = getattr(doc, 'title', None)
    paper['abstract'] = getattr(doc, 'abstract', None)
    paper['description'] = getattr(doc, 'description', None)
    
    # Authors might be a list, so we need to check each entry
    if hasattr(doc, 'authors') and doc.authors is not None:
        paper['author_names'] = [getattr(author, 'indexed_name', None) for author in doc.authors]
        paper['author_ids'] = [getattr(author, 'auid', None) for author in doc.authors]
    else:
        paper['author_names'] = []
        paper['author_ids'] = []
    
    # References might be a list, so we need to check each entry
    if hasattr(doc, 'references') and doc.references is not None:
        paper['references'] = [f"2-s2.0-{getattr(ref, 'id', None)}" for ref in doc.references]
    else:
        paper['references'] = []

    paper['cited_by'] = getattr(doc, 'citedby_count', 0)
    
    # Subject areas might be a list, so we need to check each entry
    if hasattr(doc, 'subject_areas') and doc.subject_areas is not None:
        paper['areas'] = [getattr(area, 'area', None) for area in doc.subject_areas]
    else:
        paper['areas'] = []

    return paper



def build_edge_list(df):
    # Initialize an empty list to store the edge tuples
    edges = []

    # Iterate over the DataFrame
    for index, row in df.iterrows():
        source = row['eid']
        # Extract the targets from the references column
        targets = row['references']
        # Create a tuple (source, target) for each target and append to the list
        edges.extend([(source, target) for target in targets])

    # Create a new DataFrame from the list of tuples
    edge_list_df = pd.DataFrame(edges, columns=['Source', 'Target'])
    
    return edge_list_df

In [82]:
df = pd.DataFrame([get_paper_info("10.3982/ECTA16566")])


edge_list_df = build_edge_list(df)

edge_list_df

Unnamed: 0,Source,Target
0,2-s2.0-85069045573,2-s2.0-84884417725
1,2-s2.0-85069045573,2-s2.0-84926199736
2,2-s2.0-85069045573,2-s2.0-85069750760
3,2-s2.0-85069045573,2-s2.0-85069675535
4,2-s2.0-85069045573,2-s2.0-2942746458
...,...,...
100,2-s2.0-85069045573,2-s2.0-0003732362
101,2-s2.0-85069045573,2-s2.0-0022726760
102,2-s2.0-85069045573,2-s2.0-67650844521
103,2-s2.0-85069045573,2-s2.0-85069690296


In [83]:
nature_of_belief = pd.read_csv('data/nature_of_belief.csv').assign(seed_paper = 'nature_of_belief')
structure_of_decision = pd.read_csv('data/structure_of_decision.csv').assign(seed_paper = 'structure_of_decision')

papers = pd.concat([nature_of_belief, structure_of_decision], ignore_index=True)

eid_list = papers["EID"].tolist()

paper_data = []

for paper in tqdm(eid_list):
    paper_data.append(get_paper_info(paper))
    time.sleep(0.1)

100%|██████████| 6359/6359 [41:54<00:00,  2.53it/s]  


In [84]:
paper_details = pd.DataFrame(paper_data)

paper_details.to_csv('data/paper_details.csv', index=False)

In [87]:
paper_details.to_csv('data/node_covars.csv', index=False)

edge_list_df = build_edge_list(df)
edge_list_df.to_csv('data/edge_list.csv', index=False)

In [18]:
papers = pd.read_pickle('data/by category/unified.pkl')

any(str.lower('Spatial Cognition and Consumer Behavior: A Panel Data Approach') in ref for ref in papers.iloc[0]['References'].lower().split(';'))

True

In [21]:
papers.iloc[0]['References'].split(';')[0]

'Adler T., Ben-Akiva M., Joint-Choice Model for Frequency, Destination and Travel Mode for Shopping Trips, Transportation Research Record, 569, pp. 136-150, (1976)'

In [6]:
def check_reference(title, papers):
    

EID                                                  2-s2.0-0000037645
Authors                                         Kwan M.-P.; Hong X.-D.
Author full names    Kwan, Mei-Po (7005364475); Hong, Xiao-Dong (14...
Author(s) ID                                   7005364475; 14823585600
Title                Network-based constraints-oriented choice set ...
Year                                                              1998
Source title                                      Geographical Systems
Volume                                                               5
Issue                                                              1-2
Art. No.                                                          None
Page start                                                         139
Page end                                                           162
Page count                                                        23.0
Cited by                                                           124
DOI   

In [16]:
papers_dict_list = papers.to_dict(orient='records')

# Function to find citations
def find_citations(papers_dict_list):
    citation_graph = {}

    for cited_paper in papers_dict_list:
        if 'Title' in cited_paper and cited_paper['Title']:
            cited_title = cited_paper['Title'].lower()
            citation_graph[cited_title] = []

            for citing_paper in papers_dict_list:
                if 'References' in citing_paper and citing_paper['References']:
                    references = citing_paper['References'].lower()
                    if cited_title in references:
                        citation_graph[cited_title].append(citing_paper['Title'])

    return citation_graph

# Build the citation graph
citation_graph = find_citations(papers_dict_list)
print(citation_graph)

KeyboardInterrupt: 