# 1. database names

chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://arxiv.org/pdf/1804.08348v2

### Search for papers mentioning the dataset names.

CK+

MMI

JAFFE

TFD

FER-2013

AFEW7.0

SFEW2.0

Multi-PIE

BU-3DFE

Oulu-CASIA

RaFD

KDEF

EmotioNet

RAF-DB

AffectNet

ExpW

### Filter by citation count > 100 (if possible).
### Filter by publication date from 2020 to 2024.
### Count and display results for each dataset.
 



.

In [25]:
import warnings
warnings.filterwarnings('ignore')

import feedparser
import requests
import pandas as pd
from urllib.parse import quote

# List of dataset names
datasets = [
    "CK+", "MMI", "JAFFE", "TFD", "FER-2013", "AFEW7.0", 
    "SFEW2.0", "Multi-PIE", "BU-3DFE", "Oulu-CASIA", 
    "RaFD", "KDEF", "EmotioNet", "RAF-DB", "AffectNet", "ExpW"
]

# Function to search arXiv for dataset mentions with year filter (2020-2024)
def search_arxiv_with_date(dataset_name, max_results=100):
    encoded_query = quote(dataset_name)
    url = f"http://export.arxiv.org/api/query?search_query=all:{encoded_query}+AND+submittedDate:[20200101+TO+20241231]&start=0&max_results={max_results}"
    response = feedparser.parse(url)
    
    # Count based on publication year
    count = 0
    for entry in response.entries:
        published_year = int(entry.published[:4])  # Extract the year
        if 2020 <= published_year <= 2024:
            count += 1
    
    return count

# Function to search CrossRef for dataset mentions with filters
def search_crossref_with_filters(dataset_name):
    base_url = 'https://api.crossref.org/works'
    params = {
        'query': dataset_name,
        'filter': 'from-pub-date:2020,until-pub-date:2024',
        'rows': 100  # Get up to 100 results
    }
    response = requests.get(base_url, params=params).json()
    
    # Filter based on citation count
    count = 0
    for item in response['message']['items']:
        if 'is-referenced-by-count' in item and item['is-referenced-by-count'] > 100:
            count += 1

    return count

# Create a DataFrame to store the results
results = pd.DataFrame(columns=["Dataset", "arXiv Count (2020-2024)", "CrossRef Count (2020-2024, cited > 100)"])

# Collect the counts for each dataset
for dataset in datasets:
    arxiv_count = search_arxiv_with_date(dataset)
    crossref_count = search_crossref_with_filters(dataset)
    
    # Append the results to the DataFrame
    results = results.append({
        "Dataset": dataset,
        "arXiv Count (2020-2024)": arxiv_count,
        "CrossRef Count (2020-2024, cited > 100)": crossref_count
    }, ignore_index=True)

# Display the results as a table
results


Unnamed: 0,Dataset,arXiv Count (2020-2024),"CrossRef Count (2020-2024, cited > 100)"
0,CK+,28,0
1,MMI,96,0
2,JAFFE,100,0
3,TFD,76,0
4,FER-2013,0,0
5,AFEW7.0,0,0
6,SFEW2.0,0,0
7,Multi-PIE,0,0
8,BU-3DFE,0,0
9,Oulu-CASIA,0,1


In [None]:
import feedparser
import requests
import pandas as pd
from urllib.parse import quote
from scholarly import scholarly

# List of dataset names
datasets = [
    "CK+", "MMI", "JAFFE", "TFD", "FER-2013", "AFEW7.0", 
    "SFEW2.0", "Multi-PIE", "BU-3DFE", "Oulu-CASIA", 
    "RaFD", "KDEF", "EmotioNet", "RAF-DB", "AffectNet", "ExpW"
]

# Function to search arXiv for dataset mentions with year filter (2020-2024)
def search_arxiv_with_date(dataset_name, max_results=100):
    encoded_query = quote(dataset_name)
    url = f"http://export.arxiv.org/api/query?search_query=all:{encoded_query}+AND+submittedDate:[20200101+TO+20241231]&start=0&max_results={max_results}"
    response = feedparser.parse(url)
    
    count = 0
    for entry in response.entries:
        published_year = int(entry.published[:4])  # Extract the year
        if 2020 <= published_year <= 2024:
            count += 1
    
    return count

# Function to search CrossRef for dataset mentions with filters
def search_crossref_with_filters(dataset_name):
    base_url = 'https://api.crossref.org/works'
    params = {
        'query': dataset_name,
        'filter': 'from-pub-date:2020,until-pub-date:2024',
        'rows': 100  # Get up to 100 results
    }
    response = requests.get(base_url, params=params).json()
    
    count = 0
    for item in response['message']['items']:
        if 'is-referenced-by-count' in item and item['is-referenced-by-count'] > 100:
            count += 1

    return count

# Function to search PubMed for dataset mentions
def search_pubmed(dataset_name):
    url = f"https://pubmed.ncbi.nlm.nih.gov/api/search/?term={quote(dataset_name)}&filter=2020:2024"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        return len(data.get("results", []))  # Count results
    else:
        print(f"PubMed API Error: {response.status_code}")
        return 0

# Function to search Scholarly for dataset mentions
def search_scholarly(dataset_name):
    search_query = scholarly.search_pubs(dataset_name)
    count = 0
    for _ in range(100):  # Limit to 100 results
        try:
            pub = next(search_query)
            # Access citation count using the dictionary format
            if 'citedby' in pub and pub['citedby'] > 100:  # Check citation count
                count += 1
        except StopIteration:
            break
    return count

# Create a DataFrame to store the results
results = pd.DataFrame(columns=["Dataset", "arXiv Count (2020-2024)", "CrossRef Count (2020-2024, cited > 100)", "PubMed Count (2020-2024)", "Scholarly Count (cited > 100)"])

# Collect the counts for each dataset
for dataset in datasets:
    arxiv_count = search_arxiv_with_date(dataset)
    crossref_count = search_crossref_with_filters(dataset)
    pubmed_count = search_pubmed(dataset)
    scholarly_count = search_scholarly(dataset)
    
    # Append the results to the DataFrame
    results = results.append({
        "Dataset": dataset,
        "arXiv Count (2020-2024)": arxiv_count,
        "CrossRef Count (2020-2024, cited > 100)": crossref_count,
        "PubMed Count (2020-2024)": pubmed_count,
        "Scholarly Count (cited > 100)": scholarly_count
    }, ignore_index=True)

# Display the results as a table
results

In [None]:
results.to_csv('FERcount.csv')