#### Import Dependencies

In [1]:
import requests
from bs4 import BeautifulSoup
import json, os
import pandas as pd
from tqdm import tqdm
import traceback
import PyPDF2

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.options.mode.chained_assignment = None

#### Collect Crypto Data
- Get data for top 5k crypto by marketcap from coinmarketcap.com
- Save data to a file (Prevent usage of Api limit)

In [2]:
def get_top_5k_cryptos() -> pd.DataFrame:
    
    api_key = '1d13c476-343c-4717-b648-8a4a1b939b42'
    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'

    headers = {
        'Accepts': 'application/json',
        'X-CMC_PRO_API_KEY': api_key,
    }
    
    params = {
        'start': '1',
        'limit': '5000',  # You can adjust this to get more or fewer listings
        'convert': 'USD',
        'sort': 'market_cap'
    }
    
    response = requests.get(url, headers=headers, params=params)
    data = response.json()

    if response.status_code == 200:
        df = pd.DataFrame(data['data'])
        return df
        print(f'Data Saved..')
    else:
        print(f"Error {response.status_code}: {data['status']['error_message']}")
        
# df = get_top_5k_cryptos()
# df.to_csv('../data/topk_crypto.csv', index=False)
# print('Collected top 5k crypto.')

#### Load the dataset

In [3]:
k = 1000
df = pd.read_csv('../data/topk_crypto.csv').head(k)
print(f'Dimensions of the sataset: {df.shape}')
display(df.sample(1))


Dimensions of the sataset: (1000, 18)


Unnamed: 0,id,name,symbol,slug,num_market_pairs,date_added,tags,max_supply,circulating_supply,total_supply,infinite_supply,platform,cmc_rank,self_reported_circulating_supply,self_reported_market_cap,tvl_ratio,last_updated,quote
420,22461,Hashflow,HFT,hashflow,110,2022-10-31T07:02:20.000Z,"['collectibles-nfts', 'decentralized-exchange-dex-token', 'defi', 'gaming', 'interoperability', 'dao', 'ethereum-ecosystem', 'binance-smart-chain', 'dex', 'binance-launchpool', 'coinbase-ventures-portfolio', 'avalanche-ecosystem', 'solana-ecosystem', 'dcg-portfolio', 'dragonfly-capital-portfolio', 'electric-capital-portfolio', 'fabric-ventures-portfolio', 'galaxy-digital-portfolio', 'polygon-ecosystem', 'arbitrum-ecosytem', 'cross-chain', 'optimism-ecosystem', 'jump-crypto', 'dwf-labs-portfolio']",,406745800.0,1000000000.0,False,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ETH', 'slug': 'ethereum', 'token_address': '0xb3999F658C0391d94A37f7FF328F3feC942BcADC'}",421,,,,2024-06-06T04:12:00.000Z,"{'USD': {'price': 0.3134934681213295, 'volume_24h': 6482437.38145102, 'volume_change_24h': -41.6085, 'percent_change_1h': -0.51438079, 'percent_change_24h': -0.35324347, 'percent_change_7d': 5.31047042, 'percent_change_30d': -1.58063837, 'percent_change_60d': -23.96552503, 'percent_change_90d': -35.48488846, 'market_cap': 127512156.10134901, 'market_cap_dominance': 0.0048, 'fully_diluted_market_cap': 313493468.12, 'tvl': None, 'last_updated': '2024-06-06T04:12:00.000Z'}}"


#### Get whitepaper links for each of the topk cryptos

In [4]:
def get_whitepaper_link(slug):
    url = f'https://coinmarketcap.com/currencies/{slug}/'
    response = requests.get(url)
    if response.status_code != 200:
        return None
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    divs = soup.find_all('div', class_='sc-d1ede7e3-0 sc-7f0f401-0 gRSwoF gQoblf')
    
    for div in divs:
        if 'whitepaper' not in div.get_text().lower():
            continue
        
        # Find the whitepaper link within this div
        for a_tag in div.find_all('a', href=True):
            return a_tag['href']
    
    return None

# tqdm.pandas()
# df['whitepaper_link'] = df['slug'].progress_apply(get_whitepaper_link)
# df.to_csv(f'../data/top{k}_crypto_w_whitepaper_link.csv', index=False)


#### Loading dataset with whitepaper links

In [5]:
df = pd.read_csv('../data/top1000_crypto_w_whitepaper_link.csv')
extend_df = pd.read_csv('../data/whitepaper_link_corrections.csv')


df = df.merge(extend_df, on='slug', how='left', suffixes=('', '_extend'))
# Override the 'whitepaper_link' column with the values from 'whitepaper_link_extend'
df['whitepaper_link'] = df['whitepaper_link_extend'].combine_first(df['whitepaper_link'])
# Drop the extended 'whitepaper_link' column
df = df.drop(columns=['whitepaper_link_extend'])

df = df.drop(columns = ['tags', 'quote'])
df = df[df['whitepaper_link'].notna()]
print(f'Shape of dataframe: ', df.shape)
print(f'Number of whitepaper links: ', df.whitepaper_link.nunique())
display(df.head(5))


Shape of dataframe:  (423, 17)
Number of whitepaper links:  413


Unnamed: 0,id,name,symbol,slug,num_market_pairs,date_added,max_supply,circulating_supply,total_supply,infinite_supply,platform,cmc_rank,self_reported_circulating_supply,self_reported_market_cap,tvl_ratio,last_updated,whitepaper_link
0,1,Bitcoin,BTC,bitcoin,11101,2010-07-13T00:00:00.000Z,21000000.0,19708080.0,19708080.0,False,,1,,,,2024-06-06T04:12:00.000Z,https://bitcoin.org/bitcoin.pdf
1,1027,Ethereum,ETH,ethereum,9038,2015-08-07T00:00:00.000Z,,120149100.0,120149100.0,True,,2,,,,2024-06-06T04:12:00.000Z,https://ethereum.org/en/whitepaper/
2,825,Tether USDt,USDT,tether,87800,2015-02-25T00:00:00.000Z,,112392100000.0,115086100000.0,True,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ETH', 'slug': 'ethereum', 'token_address': '0xdac17f958d2ee523a2206206994597c13d831ec7'}",3,,,,2024-06-06T04:12:00.000Z,https://assets.ctfassets.net/vyse88cgwfbl/5UWgHMvz071t2Cq5yTw5vi/c9798ea8db99311bf90ebe0810938b01/TetherWhitePaper.pdf
4,5426,Solana,SOL,solana,674,2020-04-10T00:00:00.000Z,,459920200.0,577565700.0,True,,5,,,,2024-06-06T04:12:00.000Z,https://solana.com/solana-whitepaper.pdf
5,3408,USDC,USDC,usd-coin,19594,2018-10-08T00:00:00.000Z,,32601130000.0,32601130000.0,False,"{'id': 1027, 'name': 'Ethereum', 'symbol': 'ETH', 'slug': 'ethereum', 'token_address': '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48'}",6,,,,2024-06-06T04:12:00.000Z,https://f.hubspotusercontent30.net/hubfs/9304636/PDF/centre-whitepaper.pdf


#### Download Content

In [6]:
# Function to verify PDF structure
def verify_pdf(file_name):
    try:
        with open(file_name, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            # Check if the PDF has at least one page
            if len(reader.pages) > 0:
                return True
            else:
                return False
    except Exception as e:
        print(f'An error occurred while verifying {file_name}. Error: {e}')
        # traceback.print_exc()
        return False
    
# Function to download PDF
def fetch_webpage_content(url, file_name):
    try:
        response = requests.get(url)
        
        if response.status_code == 200:
            content = response.content
            
            if 'pdf' in file_name:
                with open(file_name, 'wb') as file:
                    file.write(content)
                    
                if not verify_pdf(file_name):
                    invalid_file_name = file_name.replace('.pdf', '_invalid.pdf')
                    os.rename(file_name, invalid_file_name)
                
            else:
                soup = BeautifulSoup(content, 'html.parser', from_encoding="iso-8859-1")
                content = soup.get_text(separator='\n')
                
                with open(file_name, 'w') as f:
                    f.write(content)
                
        else:
            print(f'Failed to download {file_name}, with url: {url}')
    except Exception as e:
        print(f'An error occurred while downloading {file_name}, with url: {url}, Error is: {e}')
        
        

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    url = row['whitepaper_link']
    type = 'pdf' if 'pdf' in url else 'txt'
    file_name = f"../data/whitepapers/{index + 1}_{row['slug']}.{type}"
    
    if type == 'txt' and os.path.exists(file_name):
        os.remove(file_name)
    
    if not os.path.exists(file_name):
        fetch_webpage_content(url, file_name)
        
        

    

  5%|▌         | 23/423 [00:06<01:47,  3.74it/s]

Failed to download ../data/whitepapers/29_hedera.txt, with url: https://www.hedera.com/papers


  6%|▌         | 26/423 [00:07<01:40,  3.95it/s]

Failed to download ../data/whitepapers/33_immutable-x.txt, with url: https://support.immutable.com/hc/en-us/articles/4405227590799


  7%|▋         | 28/423 [00:07<01:35,  4.13it/s]

Failed to download ../data/whitepapers/35_cosmos.txt, with url: https://cosmos.network/resources/whitepaper


  7%|▋         | 31/423 [00:08<02:08,  3.04it/s]

An error occurred while verifying ../data/whitepapers/38_stellar.pdf. Error: EOF marker not found


  8%|▊         | 32/423 [00:08<02:08,  3.03it/s]

An error occurred while verifying ../data/whitepapers/39_monero.pdf. Error: EOF marker not found


 10%|▉         | 41/423 [00:14<03:27,  1.84it/s]

Failed to download ../data/whitepapers/51_fantom.txt, with url: https://fantom.foundation/fantom-research-papers/


 11%|█         | 45/423 [00:15<01:50,  3.42it/s]

Failed to download ../data/whitepapers/55_thorchain.txt, with url: https://github.com/thorchain/Resources/tree/master/Whitepapers/THORChain/whitepaper-en.md


 12%|█▏        | 50/423 [00:17<02:38,  2.36it/s]

An error occurred while verifying ../data/whitepapers/61_fetch.pdf. Error: EOF marker not found


 13%|█▎        | 54/423 [00:20<03:16,  1.88it/s]

An error occurred while verifying ../data/whitepapers/66_aave.pdf. Error: EOF marker not found


 13%|█▎        | 55/423 [00:20<03:13,  1.90it/s]

An error occurred while verifying ../data/whitepapers/67_sei.pdf. Error: EOF marker not found


 16%|█▋        | 69/423 [00:29<04:39,  1.26it/s]

An error occurred while verifying ../data/whitepapers/87_the-sandbox.pdf. Error: EOF marker not found


 17%|█▋        | 72/423 [00:29<02:39,  2.21it/s]

Failed to download ../data/whitepapers/90_mina.txt, with url: https://minaprotocol.com/docs
Failed to download ../data/whitepapers/92_pendle.txt, with url: https://docs.pendle.finance/resources/lite-paper


 18%|█▊        | 78/423 [00:31<01:36,  3.57it/s]

An error occurred while verifying ../data/whitepapers/100_gnosis-gno.pdf. Error: EOF marker not found


 19%|█▉        | 81/423 [00:32<02:22,  2.40it/s]

An error occurred while verifying ../data/whitepapers/104_nexo.pdf. Error: EOF marker not found


 21%|██        | 87/423 [00:36<02:37,  2.14it/s]

An error occurred while verifying ../data/whitepapers/112_dexe.pdf. Error: EOF marker not found


 21%|██        | 88/423 [00:37<02:53,  1.94it/s]

An error occurred while verifying ../data/whitepapers/113_iota.pdf. Error: EOF marker not found


 22%|██▏       | 91/423 [00:38<02:56,  1.88it/s]

Failed to download ../data/whitepapers/116_usdd.pdf, with url: https://usdd.network/USDD-en.pdf


 22%|██▏       | 93/423 [00:40<03:11,  1.72it/s]

Failed to download ../data/whitepapers/118_axelar.pdf, with url: https://axelar.network/wp-content/uploads/2021/07/axelar_whitepaper.pdf


 26%|██▌       | 108/423 [00:48<03:59,  1.31it/s]

Failed to download ../data/whitepapers/141_ftx-token.txt, with url: https://docs.google.com/document/d/1u5MOkENoWP8PGcjuoKqRkNP5Gl1LLRB9JvAHwffQ7ec/view


 27%|██▋       | 116/423 [00:51<02:22,  2.16it/s]

Failed to download ../data/whitepapers/152_raydium.pdf, with url: https://raydium.io/Raydium-Litepaper.pdf


 28%|██▊       | 117/423 [00:52<02:37,  1.94it/s]

Failed to download ../data/whitepapers/153_0x.pdf, with url: https://0xproject.com/pdfs/0x_white_paper.pdf


 30%|██▉       | 126/423 [00:56<02:40,  1.85it/s]

Failed to download ../data/whitepapers/163_pax-gold.txt, with url: https://www.paxos.com/pax-gold-whitepaper


 30%|███       | 128/423 [00:56<02:10,  2.27it/s]

Failed to download ../data/whitepapers/165_reserve-rights.txt, with url: https://reserve.org/protocol/2021_version/#main-content


 30%|███       | 129/423 [00:57<02:25,  2.02it/s]

An error occurred while verifying ../data/whitepapers/166_zcash.pdf. Error: EOF marker not found


 32%|███▏      | 134/423 [01:02<04:06,  1.17it/s]

Failed to download ../data/whitepapers/173_binaryx-new.txt, with url: https://binaryx.pro/whitepaper


 32%|███▏      | 135/423 [01:02<03:19,  1.44it/s]

Failed to download ../data/whitepapers/174_biconomy.txt, with url: https://www.biconomy.io/litepaper


 33%|███▎      | 140/423 [01:05<02:49,  1.67it/s]

An error occurred while verifying ../data/whitepapers/181_arcblock.pdf. Error: EOF marker not found


 37%|███▋      | 157/423 [01:10<01:52,  2.36it/s]

Failed to download ../data/whitepapers/200_centrifuge.pdf, with url: https://centrifuge.io/cfg_token_summary.pdf


 40%|████      | 171/423 [01:13<00:41,  6.02it/s]

Failed to download ../data/whitepapers/223_coinbase-wrapped-staked-eth.txt, with url: https://www.coinbase.com/cbeth/whitepaper


 41%|████      | 174/423 [01:14<00:36,  6.74it/s]

Failed to download ../data/whitepapers/232_restaked-swell-ethereum.txt, with url: https://docs.swellnetwork.io/swell/what-is-swell


 44%|████▍     | 188/423 [01:21<01:44,  2.24it/s]

Failed to download ../data/whitepapers/253_hex.txt, with url: https://hex.win/techspecs.html


 45%|████▌     | 191/423 [01:21<01:12,  3.18it/s]

Failed to download ../data/whitepapers/257_telcoin.pdf, with url: https://www.telco.in/docs/whitepaper.pdf


 48%|████▊     | 202/423 [01:27<01:54,  1.93it/s]

Failed to download ../data/whitepapers/273_band-protocol.pdf, with url: https://bandprotocol.com/whitepaper-3.0.1.pdf


 49%|████▉     | 208/423 [01:29<01:42,  2.10it/s]

Failed to download ../data/whitepapers/279_rlc.pdf, with url: https://iex.ec/app/uploads/2017/04/iExec-WPv2.0-English.pdf


 50%|█████     | 213/423 [01:31<01:29,  2.36it/s]

An error occurred while downloading ../data/whitepapers/286_finschia.txt, with url: https://www.finschia.network/whitepaper/, Error is: HTTPSConnectionPool(host='www.finschia.network', port=443): Max retries exceeded with url: /whitepaper/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x13faee020>: Failed to resolve 'www.finschia.network' ([Errno 8] nodename nor servname provided, or not known)"))


 51%|█████▏    | 217/423 [01:33<01:15,  2.72it/s]

Failed to download ../data/whitepapers/290_wax.txt, with url: https://developer.wax.io/api-docs-tools/


 52%|█████▏    | 219/423 [01:34<01:38,  2.06it/s]

Failed to download ../data/whitepapers/294_beldex.pdf, with url: https://beldex.io/whitepaper.pdf


 55%|█████▍    | 231/423 [01:44<03:19,  1.04s/it]

Failed to download ../data/whitepapers/308_prom.pdf, with url: https://prom.io/whitepaper.pdf


 55%|█████▌    | 233/423 [01:44<02:10,  1.45it/s]

An error occurred while downloading ../data/whitepapers/310_marlin.txt, with url: https://www.marlin.pro/whitepaper, Error is: HTTPSConnectionPool(host='www.marlin.pro', port=443): Max retries exceeded with url: /whitepaper (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x13ffb4190>: Failed to resolve 'www.marlin.pro' ([Errno 8] nodename nor servname provided, or not known)"))


 57%|█████▋    | 239/423 [01:46<01:15,  2.44it/s]

An error occurred while verifying ../data/whitepapers/317_digibyte.pdf. Error: EOF marker not found


 57%|█████▋    | 240/423 [01:46<01:04,  2.86it/s]

Failed to download ../data/whitepapers/318_propy.pdf, with url: https://tokensale.propy.com/Propy-White-Paper-17-Jul-2017.pdf


 57%|█████▋    | 242/423 [01:48<01:40,  1.80it/s]

Failed to download ../data/whitepapers/320_lisk.txt, with url: https://lisk.com/documentation/lisk-sdk/index.html


 58%|█████▊    | 246/423 [01:50<01:10,  2.51it/s]

An error occurred while downloading ../data/whitepapers/326_wavax.pdf, with url: https://files.avalabs.org/papers/consensus.pdf, Error is: HTTPSConnectionPool(host='files.avalabs.org', port=443): Max retries exceeded with url: /papers/consensus.pdf (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x13ff3c850>: Failed to resolve 'files.avalabs.org' ([Errno 8] nodename nor servname provided, or not known)"))


 60%|█████▉    | 252/423 [01:52<01:09,  2.45it/s]

Failed to download ../data/whitepapers/336_venus.pdf, with url: https://venus.io/Whitepaper.pdf


 61%|██████    | 256/423 [01:53<00:42,  3.96it/s]

An error occurred while downloading ../data/whitepapers/342_flex.pdf, with url: https://coinflex.com/documents/CoinFLEX-Whitepaper.pdf, Error is: HTTPSConnectionPool(host='coinflex.com', port=443): Max retries exceeded with url: /documents/CoinFLEX-Whitepaper.pdf (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x15c1a3100>: Failed to resolve 'coinflex.com' ([Errno 8] nodename nor servname provided, or not known)"))


 63%|██████▎   | 265/423 [01:57<00:56,  2.81it/s]

Failed to download ../data/whitepapers/359_keep-network.txt, with url: https://keep.network/whitepaper


 64%|██████▎   | 269/423 [01:58<00:37,  4.11it/s]

Failed to download ../data/whitepapers/363_world-mobile-token.pdf, with url: https://worldmobiletoken.com/WhitePaper.pdf


 64%|██████▍   | 271/423 [02:00<01:27,  1.73it/s]

An error occurred while verifying ../data/whitepapers/365_vulcan-forged-pyr.pdf. Error: EOF marker not found


 68%|██████▊   | 286/423 [02:07<01:29,  1.53it/s]

An error occurred while verifying ../data/whitepapers/385_paxos-standard.pdf. Error: EOF marker not found


 69%|██████▊   | 290/423 [02:09<01:01,  2.16it/s]

Failed to download ../data/whitepapers/389_stratis-new.pdf, with url: https://www.stratisplatform.com/files/Stratis_Whitepaper.pdf


 69%|██████▉   | 291/423 [02:09<01:06,  1.97it/s]

Failed to download ../data/whitepapers/390_ark.pdf, with url: https://arkscic.com/Whitepaper.pdf


 70%|██████▉   | 294/423 [02:12<01:25,  1.51it/s]

An error occurred while verifying ../data/whitepapers/396_pundix-new.pdf. Error: EOF marker not found


 70%|███████   | 298/423 [02:13<01:02,  1.99it/s]

An error occurred while verifying ../data/whitepapers/403_status.pdf. Error: EOF marker not found


 71%|███████   | 301/423 [02:15<01:07,  1.80it/s]

Failed to download ../data/whitepapers/407_velo.pdf, with url: https://velo.org/doc/Velo_Whitepaper_EN.pdf


 72%|███████▏  | 304/423 [02:17<01:20,  1.47it/s]

An error occurred while verifying ../data/whitepapers/411_stargate-finance.pdf. Error: EOF marker not found


 76%|███████▌  | 321/423 [02:23<00:46,  2.20it/s]

An error occurred while verifying ../data/whitepapers/433_wink.pdf. Error: EOF marker not found


 78%|███████▊  | 328/423 [02:26<00:38,  2.46it/s]

An error occurred while verifying ../data/whitepapers/442_bazaars.pdf. Error: EOF marker not found


 79%|███████▊  | 333/423 [02:29<00:40,  2.21it/s]

Failed to download ../data/whitepapers/451_gains-network.txt, with url: https://gainsnetwork.gitbook.io/docs-home/


 79%|███████▉  | 335/423 [02:34<02:01,  1.39s/it]

Failed to download ../data/whitepapers/454_dkargo.pdf, with url: https://dkargo.io/resources/dkargo_wp_en_new.pdf


 80%|███████▉  | 338/423 [02:35<01:03,  1.33it/s]

Failed to download ../data/whitepapers/460_mines-of-dalarnia.pdf, with url: https://www.minesofdalarnia.com/assets/MoD-Litepaper-updated-27-Oct.pdf


 80%|████████  | 339/423 [02:35<00:55,  1.53it/s]

An error occurred while verifying ../data/whitepapers/461_orchid.pdf. Error: EOF marker not found


 81%|████████  | 341/423 [02:36<00:37,  2.20it/s]

An error occurred while verifying ../data/whitepapers/463_bluzelle.pdf. Error: EOF marker not found


 82%|████████▏ | 347/423 [02:44<01:13,  1.04it/s]

Failed to download ../data/whitepapers/469_radio-caca.txt, with url: https://www.raca3.com/whitePaper


 84%|████████▍ | 356/423 [02:48<00:43,  1.56it/s]

Failed to download ../data/whitepapers/484_medibloc.pdf, with url: https://medibloc-homepage.oss-us-west-1.aliyuncs.com/whitepaper/medibloc_whitepaper_en.pdf


 87%|████████▋ | 366/423 [02:50<00:13,  4.16it/s]

An error occurred while downloading ../data/whitepapers/500_node-ai.txt, with url: https://docs.nodeai.app/, Error is: HTTPSConnectionPool(host='docs.nodeai.app', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x15c263880>: Failed to resolve 'docs.nodeai.app' ([Errno 8] nodename nor servname provided, or not known)"))


 87%|████████▋ | 367/423 [02:50<00:13,  4.09it/s]

Failed to download ../data/whitepapers/502_energy-web-token.pdf, with url: https://www.energyweb.org/wp-content/uploads/2019/12/EnergyWeb-EWDOS-VisionPurpose-vFinal-20191211.pdf


 87%|████████▋ | 368/423 [02:51<00:20,  2.64it/s]

Failed to download ../data/whitepapers/503_origin-protocol.txt, with url: https://www.originprotocol.com/litepaper


 89%|████████▉ | 376/423 [02:55<00:22,  2.10it/s]

Failed to download ../data/whitepapers/512_entangle.txt, with url: https://entangle.gitbook.io/entangle/


 91%|█████████ | 385/423 [02:59<00:13,  2.90it/s]

Failed to download ../data/whitepapers/520_veruscoin.pdf, with url: https://veruscoin.io/downloads/papers/VerusVision.pdf


 92%|█████████▏| 390/423 [03:01<00:12,  2.60it/s]

Failed to download ../data/whitepapers/530_ren.pdf, with url: https://renproject.io/litepaper.pdf


 94%|█████████▍| 398/423 [03:04<00:07,  3.20it/s]

An error occurred while verifying ../data/whitepapers/543_memetoon.pdf. Error: EOF marker not found


 96%|█████████▌| 405/423 [03:07<00:07,  2.32it/s]

Failed to download ../data/whitepapers/554_cortex.txt, with url: https://cortexlabs.ai/cortex_2_0_whitepaper_en


100%|██████████| 423/423 [03:17<00:00,  2.15it/s]
