In [31]:
!pip install numpy==1.24.0 scipy==1.10.0 spacy==3.7.4 scikit-learn==1.2.2 pandas==1.5.3 pdfplumber
!python -m spacy download en_core_web_lg
!python -m spacy download zh_core_web_sm
!pip install https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.7.0/zh_core_web_sm-3.7.0-py3-none-any.whl

Collecting pdfplumber
  Downloading pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pdfminer.six==20250327 (from pdfplumber)
  Downloading pdfminer_six-20250327-py3-none-any.whl.metadata (4.1 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.6-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer_six-20250327-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading pypdfi

In [32]:
# Import statements
import spacy
from spacy.matcher import Matcher
import numpy as np
import os
import re
import joblib
import pdfplumber
from urllib.request import urlopen
from collections import OrderedDict
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline, FeatureUnion

# Initialize language models
nlp_en = spacy.load("en_core_web_sm")
nlp_zh = spacy.load("zh_core_web_sm")

In [33]:
class SectionDetector:
    def __init__(self, nlp=None, model_path=None):
        """Initialize with enhanced model loading and patterns"""
        try:
            self.nlp = nlp or spacy.load("en_core_web_lg")
        except OSError:
            print("Using small model without vectors")
            self.nlp = spacy.load("en_core_web_sm")
            
        self.section_classifier = None
        self.section_hierarchy = {}
        self.matcher = None
        self._initialize_section_patterns()
        
        if model_path:
            self.load_model(model_path)

    def _initialize_section_patterns(self):
        """Enhanced section patterns with variations"""
        self.section_hierarchy = {
            'abstract': {
                'level': 1,
                'patterns': [
                    [{"LOWER": {"IN": ["abstract", "summary"]}}],
                    [{"IS_DIGIT": True}, {"LOWER": "abstract"}],
                    [{"TEXT": {"REGEX": r"^(A|Abstract)$"}}]
                ]
            },
            'introduction': {
                'level': 1,
                'patterns': [
                    [{"LOWER": {"IN": ["introduction", "intro"]}}],
                    [{"TEXT": {"REGEX": r"^1\.?\s?Introduction"}}],
                    [{"IS_DIGIT": True}, {"LOWER": "introduction"}]
                ]
            },
            'methods': {
                'level': 1,
                'patterns': [
                    [{"LOWER": {"IN": ["methods", "methodology"]}}],
                    [{"LOWER": "experimental"}, {"LOWER": "procedure"}],
                    [{"TEXT": {"REGEX": r"^2\.?\s?Methods"}}]
                ]
            },
            'results': {
                'level': 1,
                'patterns': [
                    [{"LOWER": "results"}],
                    [{"TEXT": {"REGEX": r"^3\.?\s?Results"}}],
                    [{"LOWER": "findings"}]
                ]
            },
            'discussion': {
                'level': 1,
                'patterns': [
                    [{"LOWER": "discussion"}],
                    [{"TEXT": {"REGEX": r"^4\.?\s?Discussion"}}],
                    [{"LOWER": "analysis"}]
                ]
            },
            'conclusion': {
                'level': 1,
                'patterns': [
                    [{"LOWER": "conclusion"}],
                    [{"TEXT": {"REGEX": r"^5\.?\s?Conclusion"}}],
                    [{"LOWER": "summary"}]
                ]
            },
            'references': {
                'level': 1,
                'patterns': [
                    [{"LOWER": "references"}],
                    [{"LOWER": "bibliography"}],
                    [{"TEXT": {"REGEX": r"^References$"}}]
                ]
            }
        }
        self.matcher = Matcher(self.nlp.vocab)
        self._refresh_matcher()

    def _refresh_matcher(self):
        self.matcher = Matcher(self.nlp.vocab)
        for section, info in self.section_hierarchy.items():
            for pattern in info['patterns']:
                self.matcher.add(section.upper(), [pattern])

    def add_custom_section(self, name, level, patterns):
        self.section_hierarchy[name] = {'level': level, 'patterns': patterns}
        self._refresh_matcher()

    def _extract_raw_sections(self, text, use_ml):
        doc = self.nlp(text)
        matches = self.matcher(doc)
        sections = OrderedDict()
        current_section = "header"
        last_end = 0

        matches = sorted(matches, key=lambda x: x[1])

        for match_id, start, end in matches:
            section_name = self.nlp.vocab.strings[match_id].lower()
            content = doc[last_end:start].text.strip()
            
            if content:
                sections[current_section] = sections.get(current_section, []) + [content]
            
            current_section = self._classify_section(
                doc[start:end].text, 
                section_name, 
                use_ml
            )
            last_end = end

        if last_end < len(doc):
            sections[current_section] = sections.get(current_section, []) + [doc[last_end:].text.strip()]

        return {k: "\n".join(v) for k, v in sections.items()}

    def _classify_section(self, header_text, rule_based_name, use_ml):
        if use_ml and self.section_classifier:
            try:
                return self.section_classifier.predict([header_text])[0]
            except Exception as e:
                print(f"ML classification failed: {e}, using rule-based")
        return rule_based_name

    def _postprocess_sections(self, sections):
        hierarchy_stack = []
        final_sections = OrderedDict()
        
        for section_name, content in sections.items():
            level = self._get_section_level(section_name)
            
            while hierarchy_stack and hierarchy_stack[-1]['level'] >= level:
                hierarchy_stack.pop()
            
            hierarchy_stack.append({'name': section_name, 'level': level})
            hier_key = "::".join([n['name'] for n in hierarchy_stack])
            final_sections[hier_key] = content

        return final_sections

    def _get_section_level(self, section_name):
        clean_name = section_name.lower().strip()
        
        # Exact match check
        for section, info in self.section_hierarchy.items():
            if section in clean_name:
                return info['level']
        
        # Only check similarity if word vectors are available
        if self.nlp.vocab.vectors.size > 0:  # Correct vector check
            similarity_threshold = 0.8
            for section, info in self.section_hierarchy.items():
                if self.nlp(section).similarity(self.nlp(clean_name)) > similarity_threshold:
                    return info['level']
        
        return 0

    def save_model(self, path):
        state = {
            'classifier': self.section_classifier,
            'hierarchy': self.section_hierarchy,
            'nlp_lang': self.nlp.lang  # Change from nlp_config
        }
        joblib.dump(state, path)
    
    def load_model(self, path):
        state = joblib.load(path)
        self.section_classifier = state['classifier']
        self.section_hierarchy = state['hierarchy']
        self.nlp = spacy.load(state['nlp_lang'] + "_core_web_sm")
        self._refresh_matcher()

    class SpacyTransformer(BaseEstimator, TransformerMixin):
        def __init__(self, nlp):
            self.nlp = nlp
            self.tokenizer = nlp.tokenizer
            
        def transform(self, X):
            processed = []
            for text in X:
                doc = self.nlp(text)
                features = [
                    len(doc), 
                    len(list(doc.sents)),
                    sum(1 for token in doc if token.is_title),
                    sum(1 for token in doc if token.pos_ == "NOUN")
                ]
                processed.append(features)
            return np.array(processed)
            
        def fit(self, X, y=None):
            return self

    def train_classifier(self, X_train, y_train):
        self.section_classifier = Pipeline([
            ('features', FeatureUnion([
                ('tfidf', TfidfVectorizer(
                    tokenizer=self._spacy_tokenizer,
                    token_pattern=None,  # Add this line
                    ngram_range=(1, 2),
                    max_features=3000
                )),
                ('spacy', self.SpacyTransformer(self.nlp))
            ])),
            ('clf', LogisticRegression(
                class_weight='balanced',
                max_iter=1000,
                C=0.1
            ))
        ])
        self.section_classifier.fit(X_train, y_train)

    def _spacy_tokenizer(self, text):
        doc = self.nlp(text)
        return [
            token.lemma_.lower() 
            if not token.is_oov else token.text.lower()
            for token in doc
            if not token.is_stop 
            and not token.is_punct
            and not token.is_space
        ]

    def calculate_accuracy(self, true_labels, predicted_labels):
        return {
            'precision': precision_score(true_labels, predicted_labels, average='weighted'),
            'recall': recall_score(true_labels, predicted_labels, average='weighted'),
            'f1': f1_score(true_labels, predicted_labels, average='weighted')
        }

In [34]:
# Utility functions
def load_training_data():
    """Enhanced training data with real-world variations"""
    training_examples = [
        # English examples
        ("Abstract", "abstract"),
        ("1. Introduction", "introduction"),
        ("2. Methods", "methods"),
        ("3. Results", "results"),
        ("4. Discussion", "discussion"),
        ("5. Conclusion", "conclusion"),
        ("References", "references"),
        ("METHODOLOGY", "methods"),
        ("Key Findings", "results"),
        ("Experimental Design", "methods"),
        
        # Chinese examples
        ("摘要", "abstract"),
        ("1. 引言", "introduction"),
        ("方法", "methods"),
        ("结果", "results"),
        ("讨论", "discussion"),
        ("结论", "conclusion"),
        ("参考文献", "references")
    ]
    return [text for text, label in training_examples], [label for text, label in training_examples]

def calculate_extended_metrics(y_true, y_pred):
    return {
        'precision_macro': precision_score(y_true, y_pred, average='macro'),
        'recall_macro': recall_score(y_true, y_pred, average='macro'),
        'f1_macro': f1_score(y_true, y_pred, average='macro'),
        'precision_weighted': precision_score(y_true, y_pred, average='weighted'),
        'recall_weighted': recall_score(y_true, y_pred, average='weighted'),
        'f1_weighted': f1_score(y_true, y_pred, average='weighted'),
        'class_distribution': {
            cls: {'true': sum(1 for label in y_true if label == cls),
                  'predicted': sum(1 for label in y_pred if label == cls)}
            for cls in set(y_true + y_pred)
        }
    }

def get_paper_text(url):
    """Robust PDF text extraction with error handling"""
    try:
        with urlopen(url) as response:
            with pdfplumber.open(response) as pdf:
                return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
    except Exception as e:
        print(f"Error processing {url}: {str(e)}")
        return ""

def process_papers_with_sections(paper_urls, model_path=None, output_dir="/kaggle/working/papers", lang='en'):
    if lang == 'zh':
        nlp = spacy.load("zh_core_web_sm")
    else:
        nlp = spacy.load("en_core_web_md")  # Use medium model
    
    detector = SectionDetector(nlp=nlp)
    
    if lang == 'zh':
        chinese_sections = {
            '摘要': {'level': 1, 'patterns': [[{"ORTH": "摘要"}]]},
            '引言': {'level': 1, 'patterns': [[{"ORTH": "引言"}]]},
            '方法': {'level': 1, 'patterns': [[{"ORTH": "方法"}]]},
            '结果': {'level': 1, 'patterns': [[{"ORTH": "结果"}]]},
            '讨论': {'level': 1, 'patterns': [[{"ORTH": "讨论"}]]},
            '结论': {'level': 1, 'patterns': [[{"ORTH": "结论"}]]},
            '参考文献': {'level': 1, 'patterns': [[{"ORTH": "参考文献"}]]}
        }
        for name, config in chinese_sections.items():
            detector.add_custom_section(name, config['level'], config['patterns'])
    
    if model_path and os.path.exists(model_path):
        try:
            detector.load_model(model_path)
            use_ml = True
        except Exception as e:
            print(f"Error loading model: {e}")
            use_ml = False
    else:
        use_ml = False
    
    processed_papers = []
    for url in paper_urls:
        try:
            # --- WRAP INDIVIDUAL PAPER PROCESSING IN TRY-EXCEPT ---
            text = get_paper_text(url)  # This could fail
            raw_sections = detector._extract_raw_sections(text, use_ml)  # This could fail
            sections = detector._postprocess_sections(raw_sections)  # This could fail
            
            processed_papers.append({
                'url': url,
                'sections': sections,
                'language': lang
            })
            
            # Optional: Save successful processing results
            if output_dir:
                os.makedirs(output_dir, exist_ok=True)
                filename = os.path.join(output_dir, f"{os.path.basename(url)}.json")
                joblib.dump(processed_papers[-1], filename)
                
        except Exception as e:
            # Handle different error types specifically
            error_msg = f"Error processing {url}: {str(e)}"
            print(error_msg)
            
            # Optional: Save error log
            with open(os.path.join(output_dir, "processing_errors.log"), "a") as f:
                f.write(error_msg + "\n")
                
            continue  # Continue to next paper instead of aborting

    # Final check after processing all papers
    if not processed_papers:
        raise ValueError("No papers processed successfully - check error log")
        
    return processed_papers

In [35]:
# Main execution block
if __name__ == "__main__":
    # 1. Train with enhanced data
    X_train, y_train = load_training_data()
    detector = SectionDetector()
    detector.train_classifier(X_train, y_train)
    
    # 2. Process papers with real URLs
    papers = process_papers_with_sections(
        paper_urls=[
            "https://arxiv.org/pdf/2307.12874",
            "https://arxiv.org/pdf/2303.12940",
            "https://arxiv.org/pdf/1802.04351",
            "https://arxiv.org/pdf/2306.08168",
            "https://arxiv.org/pdf/2503.15964",
            "https://www.jetir.org/papers/JETIR2405D82.pdf",
            "https://www.cs.ucf.edu/~czou/research/subWallet-Blockchain-2019.pdf",
            "https://www.cs.ucf.edu/~czou/research/Hossein-TrustCom-2020.pdf",
            "https://www.cs.ucf.edu/~czou/research/HosseinDissertation-2020.pdf",
            "https://dl.gi.de/server/api/core/bitstreams/aaa640a1-f8dd-4514-ad72-b809932072cc/content",
            "https://eprint.iacr.org/2023/062.pdf",
            "https://eprint.iacr.org/2022/075.pdf",    
            "https://eprint.iacr.org/2023/1234.pdf",
            "https://eprint.iacr.org/2020/300.pdf",
            "https://eprint.iacr.org/2023/312.pdf",
            "https://policyreview.info/pdf/policyreview-2016-3-427.pdf",
            "https://eprint.iacr.org/2016/013.pdf",
            "https://arxiv.org/pdf/1906.00245",
            "https://escholarship.org/content/qt7fh678d6/qt7fh678d6.pdf?t=pn651y",
            "https://re.public.polimi.it/bitstream/11311/1056221/6/11311-1056221%20Giudici.pdf",
            "https://research-api.cbs.dk/ws/files/44436178/ole_bjerg_how_is_bitcoin_money_postprint.pdf",
            "https://www.bis.org/fsi/publ/insights49.pdf",
            "https://www.scirp.org/pdf/ojbm_1534496.pdf",
            "https://www.bis.org/publ/work1066.pdf",
            "http://khcnbinhduong.gov.vn/ImageUpload/file/TTTK%20KCN/2019/Nguon%20tin%20KHCN/Blockchain_A3.pdf",
            "https://e-space.mmu.ac.uk/627269/1/Manuscript_Final%20JCLP.pdf",
            "https://pdfs.semanticscholar.org/9900/c9c91f9f78fa0adb6915855084396654363c.pdf?_gl=1*7q1z9h*_gcl_au*MTkxMDg1NzA4NC4xNzQ4MDIxMDA4*_ga*Mjc1MDg5MDkuMTc0ODAyMTAwOA..*_ga_H7P4ZT52H5*czE3NDgwMjEwMDckbzEkZzEkdDE3NDgwMjExNzkkajE1JGwwJGgwJGR1YWNJOGg3VW43bWFscGZjZ056LU5TM0lXc0Jtc0drMW93",
            "https://www.newyorkfed.org/medialibrary/media/research/epr/2024/EPR_2024_digital-assets_azar.pdf",
            "https://journals.law.harvard.edu/hblr/wp-content/uploads/sites/87/2025/03/04_HLB_15_1_Noked171-216.pdf",
            "https://www.stern.nyu.edu/sites/default/files/2024-07/Glucksman_Sak_2024.pdf",
            "https://www.tigta.gov/sites/default/files/reports/2024-07/2024300030fr_0.pdf",
            "https://www.fsb.org/uploads/Crypto-Council-for-Innovation.pdf",
            "https://www.cs.ucf.edu/~czou/research/HosseinDissertation-2020.pdf",
            "https://ndbf.nebraska.gov/sites/default/files/industries/Digital%20Asset%20Depository%20Nebraska%20Custody%20and%20Fiduciary%20Services%20Examination%20Manual.pdf",
            "https://www.swlegal.com/media/filer_public/2d/f7/2df70b84-cb3c-4578-9943-8b3ea024abf9/sw_nl_january_2024_english.pdf",
            "https://www.willkie.com/-/media/files/publications/2024/12/law360---sec-custody-rule-creates-crypto-compliance-conundrum.pdf",
            "https://www.henrystewartpublications.com/sites/default/files/Opportunities%20in%20digital%20assets%20and%20digital%20custody-Tracking%20the%20modernisation%20of%20standard%20custody%20offering%20-%20Ignatowicz%20%26%20Taudes%20JSOC%2015-3.pdf",
            "https://www.gdf.io/wp-content/uploads/2019/02/GDF-Crypto-Asset-Safekeeping_20-April-2019-2-cust-providers-additions-1-2.pdf",
            "https://www.occ.gov/topics/charters-and-licensing/interpretations-and-actions/2020/int1170.pdf",
            "https://www.gemini.com/static/documents/guide-to-crypto-custody.pdf",
            "https://orbilu.uni.lu/bitstream/10993/62083/1/ZetzscheSinnigNikolakopoulou_Crypto%20custody_CMLJ%202024.pdf",
            "https://www.esrb.europa.eu/pub/pdf/reports/esrb.cryptoassetsanddecentralisedfinance202305~9792140acd.en.pdf",
            "https://repository.uel.ac.uk/download/df676586f4e9f8a89df529a36841d83d4750539805189a8951032ee4c2f0c16c/99798/challenges-and-approaches-to-regulating-decentralized-finance.pdf",
            "https://repository.uel.ac.uk/download/ca8bad2f5fab17596c44927643b4da1473ef7ef79862fe3ca05ea9251bd4db8b/1599957/Financial%20Crime%20update%20%282020%29.pdf",
            "https://www.iacpcybercenter.org/wp-content/uploads/2018/03/Bitcoin.pdf",
            "https://www.ussc.gov/sites/default/files/pdf/training/Podcasts/SPT_Emerging-Tech-Terms.pdf",
            "https://www.ussc.gov/sites/default/files/pdf/training/annual-national-training-seminar/2018-materials/emerging-tech_glossary-crypto.pdf",
            "https://www.ussc.gov/sites/default/files/pdf/training/annual-national-training-seminar/2018-materials/emerging-tech_glossary-phishing.pdf",
            "https://www.ussc.gov/sites/default/files/pdf/training/annual-national-training-seminar/2018/Emerging_Tech_Bitcoin_Crypto.pdf",
            "https://www.ussc.gov/sites/default/files/pdf/training/annual-national-training-seminar/2019/emerging-tech_white-paper.pdf",
            "https://openaccess.uoc.edu/bitstream/10609/151551/1/Rahmanikivi_cbt22_empirical.pdf",
            "https://ics.uci.edu/~dabrowsa/dabrowski-defi21-hwwallet.pdf",
            "https://fc19.ifca.ai/preproceedings/93-preproceedings.pdf",
            "https://www.jkroll.com/papers/bitcoin_threshold_signatures.pdf",
            "https://corporates.db.com/files/documents/publications/db-polygo-digital-id-wp-42pp-web-secured.pdf",
            "https://www.napier.ac.uk/-/media/worktribe/output-2839021/smart-contract-attacks-and-protections.ashx",
            "https://www.cyprusbarassociation.org/images/6._Crypto_Wallets.pdf",
            "https://computerscience.unicam.it/marcantoni/tesi/Ethereum%20Smart%20Contracts%20Optimization.pdf",
            "https://cspecc.utsa.edu/publications/files/Refereed_Papers/2020_Choo_BCPPA-blockchain-cond-priv-auth-prot.pdf",
            "https://www.ekonomika.org.rs/sr/PDF/ekonomika/2019/clanci19-3/7.pdf",
            "https://assets.cureusjournals.com/artifacts/upload/review_article/pdf/1099/20250319-214523-194a3z.pdf"
        ],
        lang='en'
    )
    
    # 3. Enhanced evaluation
    if papers:
        # Dynamic label alignment
        class_mapping = {
            'header': 'header',
            'abstract': 'abstract',
            'introduction': 'introduction',
            'methods': 'methods',
            'results': 'results',
            'discussion': 'discussion'
        }
    
    y_true, y_pred = [], []
    for paper in papers:  # Changed from processed_papers to papers
        sections = list(paper['sections'].keys())
        
        # Only process papers with ≥1 section
        if not sections:
            continue  
        
        # Align mock labels with actual sections
        predicted_labels = [class_mapping.get(s.upper(), s.lower()) for s in sections]
        mock_true_labels = ['abstract', 'introduction', 'methods', 'results'][:len(predicted_labels)]
        
        y_true.extend(mock_true_labels)
        y_pred.extend(predicted_labels)
    
    # Final validation
    if not y_true or not y_pred:
        print("Warning: No valid labels for evaluation")
    else:
        base_metrics = detector.calculate_accuracy(y_true, y_pred)
        extended_metrics = calculate_extended_metrics(y_true, y_pred)  # Define here

        # Print results
        print("\nBase Metrics:")
        print(f"Weighted Precision: {base_metrics['precision']:.2f}")
        print(f"Weighted Recall: {base_metrics['recall']:.2f}")
        print(f"Weighted F1: {base_metrics['f1']:.2f}")

        print("\nExtended Metrics:")
        print(f"Macro Precision: {extended_metrics['precision_macro']:.2f}")
        print(f"Macro Recall: {extended_metrics['recall_macro']:.2f}")
        print(f"Macro F1: {extended_metrics['f1_macro']:.2f}")

        print("\nClass Distribution Analysis:")
        for cls, counts in extended_metrics['class_distribution'].items():
            accuracy = counts['true'] / len(y_true) if len(y_true) > 0 else 0
            print(f"{cls}:")
            print(f"  True samples: {counts['true']}")
            print(f"  Predicted samples: {counts['predicted']}")
            print(f"  Accuracy: {accuracy:.2f}")

Error processing https://arxiv.org/pdf/2307.12874: seek
Error processing https://arxiv.org/pdf/2303.12940: seek
Error processing https://arxiv.org/pdf/1802.04351: seek
Error processing https://arxiv.org/pdf/2306.08168: seek
Error processing https://arxiv.org/pdf/2503.15964: seek
Error processing https://www.jetir.org/papers/JETIR2405D82.pdf: seek
Error processing https://www.cs.ucf.edu/~czou/research/subWallet-Blockchain-2019.pdf: seek
Error processing https://www.cs.ucf.edu/~czou/research/Hossein-TrustCom-2020.pdf: seek
Error processing https://www.cs.ucf.edu/~czou/research/HosseinDissertation-2020.pdf: seek
Error processing https://dl.gi.de/server/api/core/bitstreams/aaa640a1-f8dd-4514-ad72-b809932072cc/content: seek
Error processing https://eprint.iacr.org/2023/062.pdf: seek
Error processing https://eprint.iacr.org/2022/075.pdf: seek
Error processing https://eprint.iacr.org/2023/1234.pdf: seek
Error processing https://eprint.iacr.org/2020/300.pdf: seek
Error processing https://eprin