In [2]:
import pandas as pd
reviews = pd.read_json("data/preprocessed/reviews.jsonl", lines=True)

# Get record with doi "10.1016/j.newar.2024.101694"
record = reviews[reviews["doi"] == "10.1016/j.newar.2024.101694"].iloc[0]
print(type(record))
print(record)

<class 'pandas.core.series.Series'>
bibcode                                         2024NewAR..9801694E
abstract          Stellar multiplicity is among the oldest and r...
aff               [California Institute of Technology, 1216 E Ca...
author                                           [El-Badry, Kareem]
bibstem                                          [NewAR, NewAR..98]
doctype                                                     article
doi                                     10.1016/j.newar.2024.101694
id                                                         28998371
pubdate                                                  2024-06-01
title                                Gaia's binary star renaissance
read_count                                                      359
reference         [1803RSPT...93..339H, 1844MNRAS...6R.136B, 197...
citation_count                                                   25
citation          [2024A&A...688A...1C, 2024AJ....168..156C, 202...
body        

In [3]:
# Get the record's body sentences as a list
sentences = record["body_sentences"]
print(type(sentences))
print(sentences[:3])

<class 'list'>
['1 Introduction Binary stars have long played a foundational role in astrophysics.', 'They underpin precision measurements of stellar physical parameters, enable robust tests of general relativity, and give rise to an extraordinary zoo of observational phenomenology.', 'Millennia after their discovery (e.g. Jetsu and Porceddu, 2015 ), binaries remain at the heart of many of the interesting open questions in astrophysics: binary evolution modeling is key for understanding the origin of gravitational wave events, the spectral energy distributions of high redshift galaxies, and the demographics of exoplanets in the solar neighborhood.']


In [4]:
for ref in record['reference']:
    print(ref)

1803RSPT...93..339H
1844MNRAS...6R.136B
1975ARA&A..13..295V
1983ARA&A..21..343A
1983ApJ...270..365M
1985ApJ...290...15B
1987MNRAS.227...23W
1989ApJ...347..998E
1991A&A...248..485D
1995MNRAS.273..731R
1997A&A...323L..49P
1997A&A...323L..53L
1997ESASP1200.....E
1997MNRAS.285...33B
1998AstL...24..178T
1999A&A...341..121S
2000A&A...360..997T
2001A&A...369..339P
2001AJ....122.3472H
2002MNRAS.331..649B
2004A&A...424..727P
2004ASPC..318..413S
2004ApJ...601..311Y
2005ESASP.576..573F
2006A&A...450..681T
2009MNRAS.392..590B
2010AJ....139.2566D
2010ApJ...711L.138R
2010ApJS..190....1R
2010MNRAS.404.1835K
2011ApJS..192....2S
2011MNRAS.411.2695P
2012A&A...543A...8M
2012LRR....15...10F
2013ARA&A..51..269D
2014ApJ...797...14P
2015A&A...574A.115M
2015ApJ...799....4R
2015MNRAS.454...53B
2015PLoSO..1044140J
2016A&A...595A...1G
2016AcA....66..405S
2016ApJ...823..102C
2016MNRAS.456.2070T
2017A&A...602A..16T
2017AJ....153..257O
2017AJ....153..259O
2017ApJ...850L..13B
2017ApJS..230...15M
2017MNRAS.470.2611M


In [5]:
for i, sentence in enumerate(sentences):
    print(f"Sentence {i}: {sentence}")

Sentence 0: 1 Introduction Binary stars have long played a foundational role in astrophysics.
Sentence 1: They underpin precision measurements of stellar physical parameters, enable robust tests of general relativity, and give rise to an extraordinary zoo of observational phenomenology.
Sentence 2: Millennia after their discovery (e.g. Jetsu and Porceddu, 2015 ), binaries remain at the heart of many of the interesting open questions in astrophysics: binary evolution modeling is key for understanding the origin of gravitational wave events, the spectral energy distributions of high redshift galaxies, and the demographics of exoplanets in the solar neighborhood.
Sentence 3: Astrometry has played a particularly important role for binary star astronomy.
Sentence 4: Painstaking measurements of the relative positions of two stars in resolved optical pairs over the course of decades allowed Herschel (1803) to infer that most of the pairs he studied were orbiting one another.
Sentence 5: By mo

In [6]:
research = pd.read_json("data/preprocessed/research.jsonl", lines=True)


In [9]:
inline_citation = "S 2021"
bibcode = None
print("Looking for a unique bibcode in record refs for inline citation:", inline_citation)
matches = []
year = inline_citation[-4:]  # Extract the year from the inline citation
initial = inline_citation[0]  # Extract the initial from the inline citation
print(f"Looking for year {year} and initial {initial} in record refs")
for bibcode in record["reference"]:
    # Make sure the inline citation starts with author, ends with year
    
    if bibcode.startswith(year) and bibcode.endswith(initial):
        matches.append(bibcode)
if len(matches) == 1:
    print("Found bibcode:", matches[0])
    bibcode = matches[0]
    print("Bibcode:", bibcode)
else:
    print("No unique bibcode found for inline citation:", inline_citation)
    print("Matches found:", matches)


if bibcode:
    if bibcode in research["bibcode"].values:
    # Determine if there is a record in the dataframe with this bibcode
        print(f"Record with bibcode {bibcode} exists in the research dataframe.")
    else:
        print(f"Record with bibcode {bibcode} does not exist in the research dataframe.")
else:
    print(f"Unique bibcode not resolved")

Looking for a unique bibcode in record refs for inline citation: S 2021
Looking for year 2021 and initial S in record refs
Found bibcode: 2021ApJ...907L..33S
Bibcode: 2021ApJ...907L..33S
Record with bibcode 2021ApJ...907L..33S exists in the research dataframe.


No record with bibcode 2022MNRAS.513.5270P found in the research dataframe.


In [None]:
record['body_sentences']

bibcode                                         2024NewAR..9801694E
abstract          Stellar multiplicity is among the oldest and r...
aff               [California Institute of Technology, 1216 E Ca...
author                                           [El-Badry, Kareem]
bibstem                                          [NewAR, NewAR..98]
doctype                                                     article
doi                                     10.1016/j.newar.2024.101694
id                                                         28998371
pubdate                                                  2024-06-01
title                                Gaia's binary star renaissance
read_count                                                      359
reference         [1803RSPT...93..339H, 1844MNRAS...6R.136B, 197...
citation_count                                                   25
citation          [2024A&A...688A...1C, 2024AJ....168..156C, 202...
body              1 Introduction Binary stars ha