In [119]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [136]:
df_arxiv = pd.read_csv('papers/df_arxiv_1000_vfinal.csv') #1000 papers sorted by the published date
df_arxiv

Unnamed: 0,ArxivID,Published,Authors,Title,Abstract,Content
0,2207.04542,2023-04-24,Keith Hawkins,Chemical Cartography with LAMOST and Gaia Reve...,"Chemical Cartography, or mapping, of our Galax...","\nChemical Cartography, or mapping, of our Gal..."
1,2303.01637,2023-04-24,"Nolan Dickson, Vincent Hénault-Brunet, Holger ...",Multimass modelling of Milky Way globular clus...,The distribution of stars and stellar remnants...,\nThe distribution of stars and stellar remnan...
2,2302.06644,2023-04-14,"Stephanie Monty, David Yong, Davide Massari, M...",Peeking beneath the precision floor -- II. Pro...,The assembly history of the Milky Way (MW) is ...,\nThe assembly history of the Milky Way (MW) i...
3,2304.00865,2023-04-03,"Ranjan Kumar, Ananta C. Pradhan, Snehalata Sah...",Globular Cluster UVIT Legacy Survey (GlobULeS)...,We present a far-ultraviolet (FUV) study of ho...,\nWe present a far-ultraviolet (FUV) study of ...
4,2303.15253,2023-03-27,"M. Scalco, A. Livernois, E. Vesperini, M. Libr...",First observational evidence of a relation bet...,Several observational studies have shown that ...,\nSeveral observational studies have shown tha...
...,...,...,...,...,...,...
995,1209.32730,2012-09-14,"Gabriel Perren, Rubén Vázquez, Giovanni Carraro",Photometric distances to young stars in the in...,We present results of the first extensive and ...,arXiv:1209.3273v1 [astro-ph.GA] 14 Sep 2012\...
996,1209.30210,2012-09-13,"Alessia Gualandris, Michela Mapelli, Hagai B. ...",Eccentric disc instability in stellar discs fo...,The inspiral of a turbulent molecular cloud in...,arXiv:1209.3021v1 [astro-ph.GA] 13 Sep 2012\...
997,1209.27080,2012-09-12,"Sarah R. Loebman, Zeljko Ivezic, Thomas R. Qui...",Constraints on the Shape of the Milky Way Dark...,We search for evidence of dark matter in the M...,arXiv:1209.2708v1 [astro-ph.GA] 12 Sep 2012\...
998,1111.66090,2012-09-11,"Ismael Ferrero, Mario G. Abadi, Julio F. Navar...",The dark matter halos of dwarf galaxies: a cha...,The cold dark matter halo mass function is muc...,arXiv:1111.6609v3 [astro-ph.CO] 11 Sep 2012\...


In [137]:

# Convert 'Published' column to datetime
df_arxiv['Published'] = pd.to_datetime(df_arxiv['Published'])

# Dictionary to store the unique keys and their counts
unique_keys = {}

def create_unique_key(row):
    """Creates a unique key from the first author's last name and the publication year."""
    authors = row['Authors'].split(',')
    first_author = authors[0].split()  # extract first author
    first_author_lastname = first_author[-1] if "Collaboration" not in authors[0] else authors[0]
    key = f"{first_author_lastname}_{row['Published'].year}".replace(" ", "_")  # replace spaces with underscores
    
    # Create a unique key given proposed key
    count = unique_keys.get(key, 0)
    unique_keys[key] = count + 1
    if count > 0:
        key = key + chr(ord('a') + (count - 1))

    return key

def create_citation(row):
    """Creates a citation string from the data in the row."""
    authors = row['Authors'].split(',')
    first_author = authors[0].split()  # extract first author
    first_author_lastname = first_author[-1] if "Collaboration" not in authors[0] else authors[0]
    year = row['Published'].year
    suffix = ""
    if row['key'][-1].isalpha():
        suffix = row['key'][-1]
    if len(authors) > 2:
        citation = f"{first_author_lastname} et al. {year}{suffix}"
    else:
        second_author_lastname = ""
        if len(authors) > 1:
            second_author = authors[1].split()  # extract second author if exists
            second_author_lastname = second_author[-1] if "Collaboration" not in second_author else " ".join(second_author)
        citation = f"{first_author_lastname} and {second_author_lastname} {year}{suffix}" if second_author_lastname else f"{first_author_lastname} {year}{suffix}"
    return citation


# Function to create the URL
def create_url(row):
    base_url = 'https://arxiv.org/abs/'
    return base_url + str(row['ArxivID'])

# Sort df_arxiv by 'Published' only
df_arxiv = df_arxiv.sort_values(by='Published')

# Apply the functions to create the unique key, and URL
df_arxiv['key'] = df_arxiv.apply(create_unique_key, axis=1)

# Create citation after 'key' so that the suffix (if any) can be included
df_arxiv['citation'] = df_arxiv.apply(create_citation, axis=1)
df_arxiv['url'] = df_arxiv.apply(create_url, axis=1)


In [138]:
#make df_arxiv['ArxivID'] as string
df_arxiv['ArxivID'] = df_arxiv['ArxivID'].astype(str)
df_arxiv['meta_key'] = df_arxiv['key'] + '_' + df_arxiv['ArxivID']

#order the df by 'Published' 
df_arxiv = df_arxiv.sort_values(by=['Published'], ascending=False)


#reset the index
df_arxiv = df_arxiv.reset_index(drop=True)

df_arxiv.head(10)


Unnamed: 0,ArxivID,Published,Authors,Title,Abstract,Content,key,citation,url,meta_key
0,2207.04542,2023-04-24,Keith Hawkins,Chemical Cartography with LAMOST and Gaia Reve...,"Chemical Cartography, or mapping, of our Galax...","\nChemical Cartography, or mapping, of our Gal...",Hawkins_2023,Hawkins 2023,https://arxiv.org/abs/2207.04542,Hawkins_2023_2207.04542
1,2303.01637,2023-04-24,"Nolan Dickson, Vincent Hénault-Brunet, Holger ...",Multimass modelling of Milky Way globular clus...,The distribution of stars and stellar remnants...,\nThe distribution of stars and stellar remnan...,Dickson_2023,Dickson et al. 2023,https://arxiv.org/abs/2303.01637,Dickson_2023_2303.01637
2,2302.06644,2023-04-14,"Stephanie Monty, David Yong, Davide Massari, M...",Peeking beneath the precision floor -- II. Pro...,The assembly history of the Milky Way (MW) is ...,\nThe assembly history of the Milky Way (MW) i...,Monty_2023,Monty et al. 2023,https://arxiv.org/abs/2302.06644,Monty_2023_2302.06644
3,2304.00865,2023-04-03,"Ranjan Kumar, Ananta C. Pradhan, Snehalata Sah...",Globular Cluster UVIT Legacy Survey (GlobULeS)...,We present a far-ultraviolet (FUV) study of ho...,\nWe present a far-ultraviolet (FUV) study of ...,Kumar_2023,Kumar et al. 2023,https://arxiv.org/abs/2304.00865,Kumar_2023_2304.00865
4,2303.15253,2023-03-27,"M. Scalco, A. Livernois, E. Vesperini, M. Libr...",First observational evidence of a relation bet...,Several observational studies have shown that ...,\nSeveral observational studies have shown tha...,Scalco_2023,Scalco et al. 2023,https://arxiv.org/abs/2303.15253,Scalco_2023_2303.15253
5,2303.08344,2023-03-15,"Dan Qiu, Hao Tian, Jing Li, Chao Liu, Lin Long...",Atmospheric parameters and kinematic informati...,"A catalog of more than 43,000 M giant stars ha...",Research in Astronomy and Astrophysics manuscr...,Qiu_2023,Qiu et al. 2023,https://arxiv.org/abs/2303.08344,Qiu_2023_2303.08344
6,2211.15689,2023-03-15,"Martin P. Rey, Oscar Agertz, Tjitske K. Starke...",VINTERGATAN-GM: The cosmological imprints of e...,We present a new suite of cosmological zoom-in...,\nWe present a new suite of cosmological zoom-...,Rey_2023,Rey et al. 2023,https://arxiv.org/abs/2211.15689,Rey_2023_2211.15689
7,2301.00203,2023-03-14,"A. M. Dmytrenko, P. N. Fedorov, V. S. Akhmetov...",The vertex coordinates of the Galaxy's stellar...,We present the results of determining the coor...,\nWe present the results of determining the co...,Dmytrenko_2023,Dmytrenko et al. 2023,https://arxiv.org/abs/2301.00203,Dmytrenko_2023_2301.00203
8,2211.01006,2023-03-10,"Ioana Ciucă, Daisuke Kawata, Yuan-Sen Ting, Ro...",Chasing the impact of the Gaia-Sausage-Encelad...,We employ our Bayesian Machine Learning framew...,\nWe employ our Bayesian Machine Learning fram...,Ciucă_2023,Ciucă et al. 2023,https://arxiv.org/abs/2211.01006,Ciucă_2023_2211.01006
9,2301.04154,2023-03-03,"Elliot Y. Davies, Adam M. Dillamore, Eugene Va...",Accelerated phase-mixing in the stellar halo d...,"In a galaxy merger, the stars tidally stripped...","\nIn a galaxy merger, the stars tidally stripp...",Davies_2023,Davies et al. 2023,https://arxiv.org/abs/2301.04154,Davies_2023_2301.04154


In [None]:
#find pancino and coll

In [139]:
#how many unique keys are there?
len(set(df_arxiv['ArxivID']))

1000

In [144]:
#save df_arxiv to csv
df_arxiv.to_csv('papers/df_arxiv_1000_vfinal_2.csv', index=False)

In [140]:
df_arxiv.loc[700]

ArxivID                                             1804.10175
Published                                  2018-06-11 00:00:00
Authors      Daisuke Kawata, Junichi Baba, Ioana Ciucă, Mar...
Title        Radial Distribution of Stellar Motions in Gaia...
Abstract     By taking advantage of the superb measurements...
Content      \nBy taking advantage of the superb measuremen...
key                                                Kawata_2018
citation                                    Kawata et al. 2018
url                           https://arxiv.org/abs/1804.10175
meta_key                                Kawata_2018_1804.10175
Name: 700, dtype: object

In [141]:
df_arxiv.loc[649]

ArxivID                                             1803.05927
Published                                  2018-09-25 00:00:00
Authors      Daisuke Kawata, Jo Bovy, Noriyuki Matsunaga, J...
Title        Galactic Rotation from Cepheids with Gaia DR2 ...
Abstract     We apply a simple axisymmetric disc model to 2...
Content      \nWe apply a simple axisymmetric disc model to...
key                                               Kawata_2018a
citation                                   Kawata et al. 2018a
url                           https://arxiv.org/abs/1803.05927
meta_key                               Kawata_2018a_1803.05927
Name: 649, dtype: object

In [143]:
#print arxivID and key and citation for all the entries
for i in range(len(df_arxiv)):
    print(df_arxiv.loc[i]['ArxivID'], df_arxiv.loc[i]['key'], df_arxiv.loc[i]['citation'])
    

2207.04542 Hawkins_2023 Hawkins 2023
2303.01637 Dickson_2023 Dickson et al. 2023
2302.06644 Monty_2023 Monty et al. 2023
2304.00865 Kumar_2023 Kumar et al. 2023
2303.15253 Scalco_2023 Scalco et al. 2023
2303.08344 Qiu_2023 Qiu et al. 2023
2211.15689 Rey_2023 Rey et al. 2023
2301.00203 Dmytrenko_2023 Dmytrenko et al. 2023
2211.01006 Ciucă_2023 Ciucă et al. 2023
2301.04154 Davies_2023 Davies et al. 2023
2210.04245 Pagnini_2023 Pagnini et al. 2023
2302.14524 Widrow_2023 Widrow 2023
2205.01735 Briceño-Morales_2023 Briceño-Morales and Chanamé 2023
2302.12036 Galli_2023 Galli et al. 2023
2302.09851 Karataş_2023 Karataş et al. 2023
2302.10024 Zhang_2023 Zhang and Sanders 2023
2211.04495 Koposov_2023 Koposov et al. 2023
2206.07419 Barmentloo_2023 Barmentloo and Cautun 2023
2209.11331 Medan_2023 Medan and Lépine 2023
2206.01798 Lucey_2023 Lucey et al. 2023
2302.01618 Bobylev_2023 Bobylev and Bajkova 2023
2302.01379 Labini_2023 Labini et al. 2023
2302.00053 Viswanathan_2023 Viswanathan et al. 20

In [145]:
#from this original dataframe, construct a new dataframe df_html, which contains all the columns apart from Content
df_html = df_arxiv.drop(columns=['Content', 'meta_key'])
df_html.head(10)

Unnamed: 0,ArxivID,Published,Authors,Title,Abstract,key,citation,url
0,2207.04542,2023-04-24,Keith Hawkins,Chemical Cartography with LAMOST and Gaia Reve...,"Chemical Cartography, or mapping, of our Galax...",Hawkins_2023,Hawkins 2023,https://arxiv.org/abs/2207.04542
1,2303.01637,2023-04-24,"Nolan Dickson, Vincent Hénault-Brunet, Holger ...",Multimass modelling of Milky Way globular clus...,The distribution of stars and stellar remnants...,Dickson_2023,Dickson et al. 2023,https://arxiv.org/abs/2303.01637
2,2302.06644,2023-04-14,"Stephanie Monty, David Yong, Davide Massari, M...",Peeking beneath the precision floor -- II. Pro...,The assembly history of the Milky Way (MW) is ...,Monty_2023,Monty et al. 2023,https://arxiv.org/abs/2302.06644
3,2304.00865,2023-04-03,"Ranjan Kumar, Ananta C. Pradhan, Snehalata Sah...",Globular Cluster UVIT Legacy Survey (GlobULeS)...,We present a far-ultraviolet (FUV) study of ho...,Kumar_2023,Kumar et al. 2023,https://arxiv.org/abs/2304.00865
4,2303.15253,2023-03-27,"M. Scalco, A. Livernois, E. Vesperini, M. Libr...",First observational evidence of a relation bet...,Several observational studies have shown that ...,Scalco_2023,Scalco et al. 2023,https://arxiv.org/abs/2303.15253
5,2303.08344,2023-03-15,"Dan Qiu, Hao Tian, Jing Li, Chao Liu, Lin Long...",Atmospheric parameters and kinematic informati...,"A catalog of more than 43,000 M giant stars ha...",Qiu_2023,Qiu et al. 2023,https://arxiv.org/abs/2303.08344
6,2211.15689,2023-03-15,"Martin P. Rey, Oscar Agertz, Tjitske K. Starke...",VINTERGATAN-GM: The cosmological imprints of e...,We present a new suite of cosmological zoom-in...,Rey_2023,Rey et al. 2023,https://arxiv.org/abs/2211.15689
7,2301.00203,2023-03-14,"A. M. Dmytrenko, P. N. Fedorov, V. S. Akhmetov...",The vertex coordinates of the Galaxy's stellar...,We present the results of determining the coor...,Dmytrenko_2023,Dmytrenko et al. 2023,https://arxiv.org/abs/2301.00203
8,2211.01006,2023-03-10,"Ioana Ciucă, Daisuke Kawata, Yuan-Sen Ting, Ro...",Chasing the impact of the Gaia-Sausage-Encelad...,We employ our Bayesian Machine Learning framew...,Ciucă_2023,Ciucă et al. 2023,https://arxiv.org/abs/2211.01006
9,2301.04154,2023-03-03,"Elliot Y. Davies, Adam M. Dillamore, Eugene Va...",Accelerated phase-mixing in the stellar halo d...,"In a galaxy merger, the stars tidally stripped...",Davies_2023,Davies et al. 2023,https://arxiv.org/abs/2301.04154


In [146]:
def create_hyperlink(row):
    """Creates a hyperlink for the URL column."""
    return '<a href="{}">{}</a>'.format(row['url'], row['url'])

df_html['url'] = df_html.apply(create_hyperlink, axis=1)


In [147]:
html_table = df_html.to_html(escape=False)  # escape=False is important to render hyperlinks correctly


In [148]:
html = """
<!DOCTYPE html>
<html>
<head>
    <title>Arxiv Astronomy Papers</title>
    <style>
        table {{
            width: 100%;
            border-collapse: collapse;
        }}
        th, td {{
            border: 1px solid black;
            padding: 10px;
            text-align: left;
        }}
    </style>
</head>
<body>
    <h1>Arxiv Astronomy Papers</h1>
    {}
</body>
</html>
""".format(html_table)

# Write the HTML string to a file
with open('arxiv_papers.html', 'w') as f:
    f.write(html)
