In [1]:
import os
import time
import string
import requests
import pandas as pd
from tqdm import tqdm

In [19]:
file_name = "./Notion.csv"

df = pd.read_csv(file_name)

In [20]:
df = df[df['Type'].isna()]
df

Unnamed: 0,Title,Authors,Publication,Year,Type,Key Findings,Relevance,Status,URL,Notes,Tags,Citation,Source,Title_l
0,Anime Sketch Coloring with Swish-gated Residua...,,,,,,,To Assess,,,,,AwesomeAnimeResearch,anime sketch coloring with swish gated residua...
1,Anita Dataset: An Industrial Animation Dataset,,,,,,,To Assess,,,,,Awesome-Animation-Research,anita dataset an industrial animation dataset
2,Attentioned Deep Paint,,,,,,,To Assess,,,,,AwesomeAnimeResearch,attentioned deep paint
3,Automatic Animation Inbetweening,,,,,,,To Assess,,,,,AwesomeAnimeResearch,automatic animation inbetweening
4,Automatic Illumination Effects for 2D Characters,,,,,,,To Assess,,,,,AwesomeAnimeResearch,automatic illumination effects for 2d characters
5,Breaking the cycle—Colleagues are all you need,,,,,,,To Assess,,,,,AwesomeAnimeResearch,breaking the cycle—colleagues are all you need
6,Bridging the Gap: Sketch-Aware Interpolation N...,,,,,,,To Assess,,,,,Awesome-Animation-Research,bridging the gap sketch aware interpolation n...
7,Building a Manga Dataset ”Manga109” with Annot...,,,,,,,To Assess,,,,,AwesomeAnimeResearch,building a manga dataset ”manga109” with annot...
8,CAST: CHARACTER LABELING IN ANIMATION USING SE...,,,,,,,To Assess,,,,,AwesomeAnimeResearch,cast character labeling in animation using se...
9,Competition on Multimodal Emotion Recognition ...,,,,,,,To Assess,,,,,awesome-comics-understanding,competition on multimodal emotion recognition ...


In [21]:
translator = str.maketrans(string.punctuation, " "*len(string.punctuation))

# Retrieve Results

## CrossRef

In [14]:
results = []

for title in tqdm(df['Title']):
    url = f"https://api.crossref.org/works?query.title={title}&rows=1"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data["message"]["items"]:
            item = data["message"]["items"][0]
            results.append({
                "Title": item.get("title", [""])[0],
                "Authors": ", ".join(
                    f"{a.get('given', '')} {a.get('family', '')}"
                    for a in item.get("author", [])
                ),
                "Year": item.get("issued", {}).get("date-parts", [[None]])[0][0],
                "Publication": item.get("publisher", ""),
                "URL": item.get("URL", ""),
            })
        else:
            results.append({
                "Title": title,
                "Authors": "",
                "Year": "",
                "Publication": "",
                "URL": ""
            })
    else:
        results.append({
            "Title": title,
            "Authors": "",
            "Year": "",
            "Publication": "",
            "URL": ""
        })
    
    time.sleep(1)


100%|██████████| 311/311 [10:27<00:00,  2.02s/it]


## Semantic Scholar

In [23]:
results = []

for title in tqdm(df['Title']):
    search_query = "+".join([i for i in title.split(" ")])
    url = f"https://api.semanticscholar.org/graph/v1/paper/search/bulk?query={search_query}&limit=1&fields=title,authors,year,venue,url"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data.get("data"):
            paper = data["data"][0]
            results.append({
                "Title": paper.get("title"),
                "Authors": ", ".join(a['name'] for a in paper.get("authors", [])),
                "Year": paper.get("year"),
                "Publication": paper.get("venue"),
                "URL": paper.get("url")
            })
    else:
        results.append({
            "Title": title,
            "Authors": "",
            "Year": "",
            "Publication": "",
            "URL": ""
        })
    
    time.sleep(0.5)  # Avoid rate limits

100%|██████████| 311/311 [04:13<00:00,  1.22it/s]


## Semantic Scholar (no limit)

In [5]:
results = []

for title in tqdm(df['Title']):
    search_query = "+".join([i for i in title.split(" ")])
    url = f"https://api.semanticscholar.org/graph/v1/paper/search/bulk?query={search_query}&fields=title,authors,year,venue,url"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data.get("data"):
            for paper in data["data"]:
                results.append({
                    "Title": paper.get("title").lower().translate(translator),
                    "Authors": ", ".join(a['name'] for a in paper.get("authors", [])),
                    "Year": paper.get("year"),
                    "Publication": paper.get("venue"),
                    "URL": paper.get("url")
                })
    else:
        results.append({
            "Title": title,
            "Authors": "",
            "Year": "",
            "Publication": "",
            "URL": ""
        })
    
    time.sleep(0.5)  # Avoid rate limits

  0%|          | 0/59 [00:00<?, ?it/s]

100%|██████████| 59/59 [00:46<00:00,  1.28it/s]


## CrossRef (no limit)

In [22]:
results = []

for title in tqdm(df['Title']):
    url = f"https://api.crossref.org/works?query.title={title}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data["message"]["items"]:
            for item in data["message"]["items"]:
                results.append({
                    "Title": item.get("title", [""])[0],
                    "Authors": ", ".join(
                        f"{a.get('given', '')} {a.get('family', '')}"
                        for a in item.get("author", [])
                    ),
                    "Year": item.get("issued", {}).get("date-parts", [[None]])[0][0],
                    "Publication": item.get("publisher", ""),
                    "URL": item.get("URL", ""),
                })
        else:
            results.append({
                "Title": title,
                "Authors": "",
                "Year": "",
                "Publication": "",
                "URL": ""
            })
    else:
        results.append({
            "Title": title,
            "Authors": "",
            "Year": "",
            "Publication": "",
            "URL": ""
        })
    
    time.sleep(1)


100%|██████████| 51/51 [01:26<00:00,  1.69s/it]


# Cleaning up the results

In [23]:
results = pd.DataFrame(results)
results['Title'] = results['Title'].str.translate(translator).str.lower()
results

Unnamed: 0,Title,Authors,Year,Publication,URL
0,anime sketch coloring with swish gated residua...,"Gang Liu, Xin Chen, Yanzhong Hu",2019,Springer Singapore,https://doi.org/10.1007/978-981-13-6473-0_17
1,anime sketch colourization using enhanced pix2...,"Nikhil Prashant Mudhalwadkar, Hamam Mokayed, L...",2023,Springer Nature Switzerland,https://doi.org/10.1007/978-3-031-47634-1_12
2,anime sketch coloring based on self attention ...,"Hang Li, Nianyi Wang, Jie Fang, Ying Jia, Liqi...",2023,Springer Nature Singapore,https://doi.org/10.1007/978-981-99-8552-4_19
3,style transfer for anime sketches with enhance...,"Lvmin Zhang, Yi Ji, Xin Lin, Chunping Liu",2017,IEEE,https://doi.org/10.1109/acpr.2017.61
4,interactive anime sketch colorization with sty...,"Ru-Ting Ye, Wei-Li Wang, Ju-Chin Chen, Kawuu W...",2019,IEEE,https://doi.org/10.1109/taai48200.2019.8959911
...,...,...,...,...,...
977,controllable neural style transfer for dynamic...,"Guilherme Gomes Haetinger, Jingwei Tang, Rapha...",2024,ACM,https://doi.org/10.1145/3641519.3657474
978,image purification through controllable neural...,"Tongtong Zhao, Yuxiao Yan, Ibrahim Shehi Shehu...",2018,IEEE,https://doi.org/10.1109/ictc.2018.8539637
979,computational decomposition of style for contr...,"Minchao Li, Shikui Tu, Lei Xu",2019,Springer International Publishing,https://doi.org/10.1007/978-3-030-36204-1_2
980,unsupervised stereoscopic video style transfer,"Hassan Imani, Md Baharul Islam, Md Atiqur Rahm...",2023,IEEE,https://doi.org/10.1109/asyu58738.2023.10296716


In [24]:
results = results[results['Authors'] != ""].drop_duplicates()
results

Unnamed: 0,Title,Authors,Year,Publication,URL
0,anime sketch coloring with swish gated residua...,"Gang Liu, Xin Chen, Yanzhong Hu",2019,Springer Singapore,https://doi.org/10.1007/978-981-13-6473-0_17
1,anime sketch colourization using enhanced pix2...,"Nikhil Prashant Mudhalwadkar, Hamam Mokayed, L...",2023,Springer Nature Switzerland,https://doi.org/10.1007/978-3-031-47634-1_12
2,anime sketch coloring based on self attention ...,"Hang Li, Nianyi Wang, Jie Fang, Ying Jia, Liqi...",2023,Springer Nature Singapore,https://doi.org/10.1007/978-981-99-8552-4_19
3,style transfer for anime sketches with enhance...,"Lvmin Zhang, Yi Ji, Xin Lin, Chunping Liu",2017,IEEE,https://doi.org/10.1109/acpr.2017.61
4,interactive anime sketch colorization with sty...,"Ru-Ting Ye, Wei-Li Wang, Ju-Chin Chen, Kawuu W...",2019,IEEE,https://doi.org/10.1109/taai48200.2019.8959911
...,...,...,...,...,...
977,controllable neural style transfer for dynamic...,"Guilherme Gomes Haetinger, Jingwei Tang, Rapha...",2024,ACM,https://doi.org/10.1145/3641519.3657474
978,image purification through controllable neural...,"Tongtong Zhao, Yuxiao Yan, Ibrahim Shehi Shehu...",2018,IEEE,https://doi.org/10.1109/ictc.2018.8539637
979,computational decomposition of style for contr...,"Minchao Li, Shikui Tu, Lei Xu",2019,Springer International Publishing,https://doi.org/10.1007/978-3-030-36204-1_2
980,unsupervised stereoscopic video style transfer,"Hassan Imani, Md Baharul Islam, Md Atiqur Rahm...",2023,IEEE,https://doi.org/10.1109/asyu58738.2023.10296716


In [30]:
df['Title_l'] = df['Title'].str.translate(translator).str.lower()
merged = df.merge(results, left_on="Title_l", right_on="Title", how="left", suffixes=("", "_df"))

for col in merged.columns[1:]:
    if not col.endswith("_df") and (f'{col}_df' in merged.columns):
        merged[col] = merged[f'{col}_df']
        merged = merged.drop(columns=[f'{col}_df'])

merged = merged.drop(columns=['Title_l', 'Title_df'])
merged[merged['Authors'].isna() == False]

Unnamed: 0,Title,Authors,Publication,Year,Type,Key Findings,Relevance,Status,URL,Notes,Tags,Citation,Source


In [31]:
if os.path.exists("./successfully_retrieved.csv"):
    prev_df = pd.read_csv("./successfully_retrieved.csv")
    new_df = merged[merged['Authors'].isna() == False]
    combined_df = pd.concat([prev_df, new_df], ignore_index=True).drop_duplicates(subset=['Title'])
else:
    combined_df = merged[merged['Authors'].isna() == False].drop_duplicates()
combined_df.to_csv("successfully_retrieved.csv", index=False)

  combined_df = pd.concat([prev_df, new_df], ignore_index=True).drop_duplicates(subset=['Title'])


In [32]:
df[df['Title'].str.lower().isin(combined_df['Title'].str.lower().tolist()) == False].to_csv("Notion.csv", index=False)