In [1]:
import pandas as pd
import arxiv2text
import pandas as pd
from datetime import datetime

In [2]:
df = pd.read_csv("cs_ai_24400.csv")

In [3]:
df

Unnamed: 0,title,abstract,arxiv_url,pdf_url,published_date,doi
0,Explainable Spatio-Temporal Graph Neural Networks,Spatio-temporal graph neural networks (STGNNs)...,http://arxiv.org/abs/2310.17149v1,http://arxiv.org/pdf/2310.17149v1,2023-10-26,
1,Meaning and understanding in large language mo...,Can a machine understand the meanings of natur...,http://arxiv.org/abs/2310.17407v1,http://arxiv.org/pdf/2310.17407v1,2023-10-26,
2,Foundation Model Based Native AI Framework in ...,Future wireless communication networks are in ...,http://arxiv.org/abs/2310.17471v1,http://arxiv.org/pdf/2310.17471v1,2023-10-26,
3,Handshape recognition for Argentinian Sign Lan...,Automatic sign language recognition is an impo...,http://arxiv.org/abs/2310.17427v1,http://arxiv.org/pdf/2310.17427v1,2023-10-26,
4,Efficient Data Fusion using the Tsetlin Machine,We propose a novel way of assessing and fusing...,http://arxiv.org/abs/2310.17207v1,http://arxiv.org/pdf/2310.17207v1,2023-10-26,
...,...,...,...,...,...,...
24395,The Difficulties of Learning Logic Programs wi...,As real logic programmers normally use cut (!)...,http://arxiv.org/abs/cs/9311101v1,http://arxiv.org/pdf/cs/9311101v1,1993-11-01,
24396,Software Agents: Completing Patterns and Const...,To support the goal of allowing users to recor...,http://arxiv.org/abs/cs/9311102v1,http://arxiv.org/pdf/cs/9311102v1,1993-11-01,
24397,An Empirical Analysis of Search in GSAT,We describe an extensive study of search in GS...,http://arxiv.org/abs/cs/9309101v1,http://arxiv.org/pdf/cs/9309101v1,1993-09-01,
24398,A Market-Oriented Programming Environment and ...,Market price systems constitute a well-underst...,http://arxiv.org/abs/cs/9308102v1,http://arxiv.org/pdf/cs/9308102v1,1993-08-01,


In [4]:
df['published_date'] = pd.to_datetime(df['published_date'])

df = df[df['published_date'] >= datetime(2018, 1, 1)]

In [5]:
pdf_url_list = df['pdf_url'].to_list()

In [9]:
import os
import io
import requests
from pdfminer.high_level import extract_text
from pdfminer.layout import LAParams

def arxiv_to_md(pdf_url: str, output_folder: str, proxy_url: str) -> None:
    """
    Extracts the Abstract and Introduction sections from a PDF from an arXiv URL
    and saves them as a Markdown file.

    Args:
        pdf_url (str): The URL of the PDF on arXiv.
        output_folder (str): The folder where the Markdown file will be saved.
        proxy_url (str): The URL of the proxy server.

    Returns:
        None

    Example:
    ```python
    pdf_url = "https://arxiv.org/pdf/2310.06825"
    output_folder = "output"
    proxy_url = "http://QlAtu8MRubKCSlkyDpxaMw:@smartproxy.crawlbase.com:8012"
    arxiv_to_md(pdf_url, output_folder, proxy_url)
    ```
    """
    os.makedirs(output_folder, exist_ok=True)
    filename = os.path.join(output_folder, pdf_url.split("/")[-1] + ".md")

    session = requests.Session()
    session.verify = False
    session.proxies = {"http": proxy_url, "https": proxy_url}

    extracted_text = extract_text(io.BytesIO(session.get(pdf_url).content), laparams=LAParams())

    start_abstract = extracted_text.find("Abstract")
    start_introduction = extracted_text.find("Introduction")
    end_abstract = start_introduction if start_introduction != -1 else None

    abstract = extracted_text[start_abstract:end_abstract].strip()
    introduction = extracted_text[start_introduction:].strip() if start_introduction != -1 else ""

    markdown_text = f"# {abstract}\n\n# {introduction}"

    with open(filename, "w", encoding="utf-8") as markdown_file:
        markdown_file.write(markdown_text)

    print(f"Markdown content saved to {filename}")


In [7]:
proxy_url = "http://QlAtu8MRubKCSlkyDpxaMw:@smartproxy.crawlbase.com:8012"


In [10]:
from tqdm import tqdm

for i in tqdm(range(len(pdf_url_list))):
    if i>1000:
        arxiv_to_md(pdf_url_list[i], "cs_ai_markdown", proxy_url)
        print(f"Process on {i}: Completed")

  6%|▌         | 1002/16327 [00:09<02:29, 102.82it/s]

Markdown content saved to cs_ai_markdown\2307.15090v1.md
Process on 1001: Completed


  6%|▌         | 1003/16327 [00:24<07:53, 32.36it/s] 

Markdown content saved to cs_ai_markdown\2307.14953v2.md
Process on 1002: Completed


  6%|▌         | 1004/16327 [00:35<13:28, 18.96it/s]

Markdown content saved to cs_ai_markdown\2308.02435v1.md
Process on 1003: Completed


  6%|▌         | 1005/16327 [00:43<19:24, 13.16it/s]

Markdown content saved to cs_ai_markdown\2308.02510v2.md
Process on 1004: Completed
Markdown content saved to cs_ai_markdown\2307.14893v1.md
Process on 1005: Completed


  6%|▌         | 1007/16327 [01:12<49:27,  5.16it/s]

Markdown content saved to cs_ai_markdown\2308.02032v1.md
Process on 1006: Completed


  6%|▌         | 1008/16327 [01:20<1:01:03,  4.18it/s]

Markdown content saved to cs_ai_markdown\2307.14549v1.md
Process on 1007: Completed
Markdown content saved to cs_ai_markdown\2307.15016v2.md
Process on 1008: Completed


  6%|▌         | 1010/16327 [01:38<1:37:02,  2.63it/s]

Markdown content saved to cs_ai_markdown\2307.14634v1.md
Process on 1009: Completed


  6%|▌         | 1011/16327 [01:55<2:25:40,  1.75it/s]

Markdown content saved to cs_ai_markdown\2307.15224v1.md
Process on 1010: Completed


  6%|▌         | 1012/16327 [02:01<2:49:47,  1.50it/s]

Markdown content saved to cs_ai_markdown\2307.14740v1.md
Process on 1011: Completed


  6%|▌         | 1013/16327 [02:14<4:01:27,  1.06it/s]

Markdown content saved to cs_ai_markdown\2310.02375v1.md
Process on 1012: Completed


  6%|▌         | 1014/16327 [02:20<4:45:44,  1.12s/it]

Markdown content saved to cs_ai_markdown\2308.02443v1.md
Process on 1013: Completed


  6%|▌         | 1015/16327 [02:50<10:01:34,  2.36s/it]

Markdown content saved to cs_ai_markdown\2307.14660v1.md
Process on 1014: Completed


  6%|▌         | 1016/16327 [02:58<11:19:04,  2.66s/it]

Markdown content saved to cs_ai_markdown\2307.14799v3.md
Process on 1015: Completed


  6%|▌         | 1017/16327 [03:32<21:27:19,  5.05s/it]

Markdown content saved to cs_ai_markdown\2307.14783v1.md
Process on 1016: Completed


  6%|▌         | 1018/16327 [03:39<22:04:06,  5.19s/it]

Markdown content saved to cs_ai_markdown\2307.14556v1.md
Process on 1017: Completed


  6%|▌         | 1019/16327 [04:02<31:35:34,  7.43s/it]

Markdown content saved to cs_ai_markdown\2307.14568v1.md
Process on 1018: Completed


  6%|▌         | 1020/16327 [04:49<56:32:04, 13.30s/it]

Markdown content saved to cs_ai_markdown\2307.16770v1.md
Process on 1019: Completed


  6%|▋         | 1021/16327 [04:55<51:37:56, 12.14s/it]

Markdown content saved to cs_ai_markdown\2307.14464v1.md
Process on 1020: Completed


  6%|▋         | 1022/16327 [05:33<72:58:24, 17.16s/it]

Markdown content saved to cs_ai_markdown\2308.03770v1.md
Process on 1021: Completed


  6%|▋         | 1023/16327 [05:43<66:26:07, 15.63s/it]

Markdown content saved to cs_ai_markdown\2307.14335v1.md
Process on 1022: Completed


  6%|▋         | 1024/16327 [05:51<58:30:21, 13.76s/it]

Markdown content saved to cs_ai_markdown\2307.14283v1.md
Process on 1023: Completed


  6%|▋         | 1025/16327 [06:09<63:29:58, 14.94s/it]

Markdown content saved to cs_ai_markdown\2307.14057v1.md
Process on 1024: Completed


  6%|▋         | 1026/16327 [06:35<76:22:09, 17.97s/it]

Markdown content saved to cs_ai_markdown\2307.14119v1.md
Process on 1025: Completed


  6%|▋         | 1027/16327 [06:42<63:28:37, 14.94s/it]

Markdown content saved to cs_ai_markdown\2307.14517v1.md
Process on 1026: Completed


  6%|▋         | 1028/16327 [06:50<54:55:50, 12.93s/it]

Markdown content saved to cs_ai_markdown\2307.14544v1.md
Process on 1027: Completed


  6%|▋         | 1029/16327 [06:57<47:37:05, 11.21s/it]

Markdown content saved to cs_ai_markdown\2307.14109v1.md
Process on 1028: Completed


  6%|▋         | 1030/16327 [07:03<40:36:01,  9.55s/it]

Markdown content saved to cs_ai_markdown\2307.14246v1.md
Process on 1029: Completed


  6%|▋         | 1031/16327 [07:08<35:02:28,  8.25s/it]

Markdown content saved to cs_ai_markdown\2307.14134v1.md
Process on 1030: Completed


  6%|▋         | 1032/16327 [07:12<30:02:08,  7.07s/it]

Markdown content saved to cs_ai_markdown\2307.14226v1.md
Process on 1031: Completed


  6%|▋         | 1033/16327 [07:17<27:22:20,  6.44s/it]

Markdown content saved to cs_ai_markdown\2307.14294v1.md
Process on 1032: Completed


  6%|▋         | 1034/16327 [07:24<27:43:42,  6.53s/it]

Markdown content saved to cs_ai_markdown\2307.14239v1.md
Process on 1033: Completed


  6%|▋         | 1035/16327 [07:29<26:30:46,  6.24s/it]

Markdown content saved to cs_ai_markdown\2307.15717v1.md
Process on 1034: Completed


  6%|▋         | 1036/16327 [07:39<30:47:46,  7.25s/it]

Markdown content saved to cs_ai_markdown\2307.13582v3.md
Process on 1035: Completed


  6%|▋         | 1037/16327 [08:08<58:38:43, 13.81s/it]

Markdown content saved to cs_ai_markdown\2307.13463v1.md
Process on 1036: Completed


  6%|▋         | 1038/16327 [08:21<57:41:04, 13.58s/it]

Markdown content saved to cs_ai_markdown\2307.13345v2.md
Process on 1037: Completed


  6%|▋         | 1039/16327 [08:30<51:31:48, 12.13s/it]

Markdown content saved to cs_ai_markdown\2307.13432v1.md
Process on 1038: Completed


  6%|▋         | 1040/16327 [08:37<45:31:55, 10.72s/it]

Markdown content saved to cs_ai_markdown\2308.02042v1.md
Process on 1039: Completed


  6%|▋         | 1041/16327 [08:43<38:30:39,  9.07s/it]

Markdown content saved to cs_ai_markdown\2308.02031v1.md
Process on 1040: Completed


  6%|▋         | 1042/16327 [09:16<69:01:01, 16.26s/it]

Markdown content saved to cs_ai_markdown\2307.13777v2.md
Process on 1041: Completed


  6%|▋         | 1043/16327 [09:23<58:00:26, 13.66s/it]

Markdown content saved to cs_ai_markdown\2307.13776v1.md
Process on 1042: Completed


  6%|▋         | 1044/16327 [09:32<51:21:38, 12.10s/it]

Markdown content saved to cs_ai_markdown\2307.13715v1.md
Process on 1043: Completed


  6%|▋         | 1045/16327 [09:40<46:59:13, 11.07s/it]

Markdown content saved to cs_ai_markdown\2307.13815v2.md
Process on 1044: Completed


  6%|▋         | 1046/16327 [09:49<43:26:36, 10.23s/it]

Markdown content saved to cs_ai_markdown\2307.13642v1.md
Process on 1045: Completed


  6%|▋         | 1047/16327 [09:58<41:50:15,  9.86s/it]

Markdown content saved to cs_ai_markdown\2309.12320v1.md
Process on 1046: Completed


  6%|▋         | 1048/16327 [10:07<40:52:55,  9.63s/it]

Markdown content saved to cs_ai_markdown\2307.13658v1.md
Process on 1047: Completed


  6%|▋         | 1049/16327 [10:14<38:17:19,  9.02s/it]

Markdown content saved to cs_ai_markdown\2307.13269v1.md
Process on 1048: Completed


  6%|▋         | 1050/16327 [10:22<36:09:22,  8.52s/it]

Markdown content saved to cs_ai_markdown\2307.13275v1.md
Process on 1049: Completed


  6%|▋         | 1051/16327 [10:30<36:29:25,  8.60s/it]

Markdown content saved to cs_ai_markdown\2307.13270v1.md
Process on 1050: Completed


  6%|▋         | 1052/16327 [10:44<42:33:29, 10.03s/it]

Markdown content saved to cs_ai_markdown\2307.14381v1.md
Process on 1051: Completed


  6%|▋         | 1053/16327 [10:51<38:22:22,  9.04s/it]

Markdown content saved to cs_ai_markdown\2308.03769v1.md
Process on 1052: Completed


  6%|▋         | 1054/16327 [11:39<88:40:23, 20.90s/it]

Markdown content saved to cs_ai_markdown\2307.15715v1.md
Process on 1053: Completed


  6%|▋         | 1055/16327 [11:50<75:57:27, 17.91s/it]

Markdown content saved to cs_ai_markdown\2307.13173v1.md
Process on 1054: Completed


  6%|▋         | 1056/16327 [12:05<72:52:15, 17.18s/it]

Markdown content saved to cs_ai_markdown\2307.12626v2.md
Process on 1055: Completed


  6%|▋         | 1057/16327 [12:19<68:21:51, 16.12s/it]

Markdown content saved to cs_ai_markdown\2307.13014v2.md
Process on 1056: Completed


  6%|▋         | 1058/16327 [12:28<59:05:24, 13.93s/it]

Markdown content saved to cs_ai_markdown\2307.12906v2.md
Process on 1057: Completed


  6%|▋         | 1059/16327 [12:39<55:34:04, 13.10s/it]

Markdown content saved to cs_ai_markdown\2307.12775v1.md
Process on 1058: Completed


  6%|▋         | 1060/16327 [12:57<61:56:32, 14.61s/it]

Markdown content saved to cs_ai_markdown\2307.12620v1.md
Process on 1059: Completed


  6%|▋         | 1061/16327 [13:03<51:18:09, 12.10s/it]

Markdown content saved to cs_ai_markdown\2307.13705v1.md
Process on 1060: Completed


  7%|▋         | 1062/16327 [13:12<46:52:59, 11.06s/it]

Markdown content saved to cs_ai_markdown\2307.12970v1.md
Process on 1061: Completed


  7%|▋         | 1063/16327 [13:24<47:31:53, 11.21s/it]

Markdown content saved to cs_ai_markdown\2307.12344v2.md
Process on 1062: Completed


  7%|▋         | 1064/16327 [13:31<42:04:24,  9.92s/it]

Markdown content saved to cs_ai_markdown\2307.12218v1.md
Process on 1063: Completed


  7%|▋         | 1065/16327 [14:06<73:58:31, 17.45s/it]

Markdown content saved to cs_ai_markdown\2307.14355v1.md
Process on 1064: Completed


  7%|▋         | 1066/16327 [14:16<65:32:24, 15.46s/it]

Markdown content saved to cs_ai_markdown\2308.04440v2.md
Process on 1065: Completed


  7%|▋         | 1067/16327 [14:22<53:31:05, 12.63s/it]

Markdown content saved to cs_ai_markdown\2307.16762v1.md
Process on 1066: Completed


  7%|▋         | 1068/16327 [14:30<47:36:28, 11.23s/it]

Markdown content saved to cs_ai_markdown\2307.12445v1.md
Process on 1067: Completed


  7%|▋         | 1069/16327 [14:48<55:08:08, 13.01s/it]

Markdown content saved to cs_ai_markdown\2307.12115v1.md
Process on 1068: Completed


  7%|▋         | 1070/16327 [14:56<49:49:31, 11.76s/it]

Markdown content saved to cs_ai_markdown\2307.12185v1.md
Process on 1069: Completed


  7%|▋         | 1071/16327 [15:01<40:53:24,  9.65s/it]

Markdown content saved to cs_ai_markdown\2307.12184v1.md
Process on 1070: Completed


  7%|▋         | 1072/16327 [15:33<69:38:20, 16.43s/it]

Markdown content saved to cs_ai_markdown\2307.12087v1.md
Process on 1071: Completed


  7%|▋         | 1073/16327 [15:42<59:11:42, 13.97s/it]

Markdown content saved to cs_ai_markdown\2308.04439v2.md
Process on 1072: Completed


  7%|▋         | 1074/16327 [15:52<55:08:42, 13.02s/it]

Markdown content saved to cs_ai_markdown\2307.12166v1.md
Process on 1073: Completed


  7%|▋         | 1075/16327 [16:00<48:07:42, 11.36s/it]

Markdown content saved to cs_ai_markdown\2307.12133v1.md
Process on 1074: Completed


  7%|▋         | 1076/16327 [16:15<52:33:59, 12.41s/it]

Markdown content saved to cs_ai_markdown\2307.12143v1.md
Process on 1075: Completed


  7%|▋         | 1077/16327 [16:21<45:05:05, 10.64s/it]

Markdown content saved to cs_ai_markdown\2307.12128v1.md
Process on 1076: Completed


  7%|▋         | 1078/16327 [16:39<53:53:36, 12.72s/it]

Markdown content saved to cs_ai_markdown\2307.11794v1.md
Process on 1077: Completed


  7%|▋         | 1079/16327 [16:51<53:03:49, 12.53s/it]

Markdown content saved to cs_ai_markdown\2307.11516v1.md
Process on 1078: Completed


  7%|▋         | 1080/16327 [17:03<52:48:27, 12.47s/it]

Markdown content saved to cs_ai_markdown\2307.13704v3.md
Process on 1079: Completed


  7%|▋         | 1081/16327 [17:15<51:39:19, 12.20s/it]

Markdown content saved to cs_ai_markdown\2307.12776v1.md
Process on 1080: Completed


  7%|▋         | 1082/16327 [17:21<43:48:27, 10.34s/it]

Markdown content saved to cs_ai_markdown\2308.04436v1.md
Process on 1081: Completed


  7%|▋         | 1083/16327 [17:29<41:05:33,  9.70s/it]

Markdown content saved to cs_ai_markdown\2307.11286v1.md
Process on 1082: Completed


  7%|▋         | 1084/16327 [17:34<35:34:43,  8.40s/it]

Markdown content saved to cs_ai_markdown\2307.11452v1.md
Process on 1083: Completed


  7%|▋         | 1085/16327 [17:44<37:08:28,  8.77s/it]

Markdown content saved to cs_ai_markdown\2307.11643v3.md
Process on 1084: Completed


  7%|▋         | 1086/16327 [18:07<54:42:47, 12.92s/it]

Markdown content saved to cs_ai_markdown\2307.11554v1.md
Process on 1085: Completed


  7%|▋         | 1087/16327 [18:12<45:40:34, 10.79s/it]

Markdown content saved to cs_ai_markdown\2307.10549v1.md
Process on 1086: Completed


  7%|▋         | 1088/16327 [18:20<42:00:18,  9.92s/it]

Markdown content saved to cs_ai_markdown\2307.11128v1.md
Process on 1087: Completed


  7%|▋         | 1089/16327 [18:54<72:10:26, 17.05s/it]

Markdown content saved to cs_ai_markdown\2307.10600v1.md
Process on 1088: Completed


  7%|▋         | 1090/16327 [19:02<61:07:50, 14.44s/it]

Markdown content saved to cs_ai_markdown\2307.10751v1.md
Process on 1089: Completed


  7%|▋         | 1091/16327 [19:26<72:59:08, 17.25s/it]

Markdown content saved to cs_ai_markdown\2307.11206v1.md
Process on 1090: Completed


  7%|▋         | 1092/16327 [19:32<58:28:20, 13.82s/it]

Markdown content saved to cs_ai_markdown\2307.10680v1.md
Process on 1091: Completed


  7%|▋         | 1093/16327 [19:37<47:07:25, 11.14s/it]

Markdown content saved to cs_ai_markdown\2307.10563v1.md
Process on 1092: Completed


  7%|▋         | 1094/16327 [19:43<40:55:40,  9.67s/it]

Markdown content saved to cs_ai_markdown\2307.10932v2.md
Process on 1093: Completed


  7%|▋         | 1095/16327 [19:50<36:42:46,  8.68s/it]

Markdown content saved to cs_ai_markdown\2307.11784v1.md
Process on 1094: Completed


  7%|▋         | 1096/16327 [19:59<37:12:12,  8.79s/it]

Markdown content saved to cs_ai_markdown\2307.11787v2.md
Process on 1095: Completed


  7%|▋         | 1097/16327 [20:09<39:40:23,  9.38s/it]

Markdown content saved to cs_ai_markdown\2307.11046v1.md
Process on 1096: Completed


  7%|▋         | 1098/16327 [20:20<41:17:32,  9.76s/it]

Markdown content saved to cs_ai_markdown\2307.10693v1.md
Process on 1097: Completed


  7%|▋         | 1099/16327 [20:30<41:46:09,  9.87s/it]

Markdown content saved to cs_ai_markdown\2307.11114v1.md
Process on 1098: Completed


  7%|▋         | 1100/16327 [20:41<43:17:08, 10.23s/it]

Markdown content saved to cs_ai_markdown\2307.10991v2.md
Process on 1099: Completed


  7%|▋         | 1101/16327 [20:56<49:28:47, 11.70s/it]

Markdown content saved to cs_ai_markdown\2307.10690v1.md
Process on 1100: Completed


  7%|▋         | 1102/16327 [21:05<45:23:42, 10.73s/it]

Markdown content saved to cs_ai_markdown\2307.10802v1.md
Process on 1101: Completed


  7%|▋         | 1103/16327 [21:13<41:58:20,  9.93s/it]

Markdown content saved to cs_ai_markdown\2307.11779v1.md
Process on 1102: Completed


  7%|▋         | 1104/16327 [22:10<102:07:49, 24.15s/it]

Markdown content saved to cs_ai_markdown\2307.10004v1.md
Process on 1103: Completed


  7%|▋         | 1105/16327 [22:22<86:40:48, 20.50s/it] 

Markdown content saved to cs_ai_markdown\2307.09711v1.md
Process on 1104: Completed


  7%|▋         | 1106/16327 [22:49<94:12:44, 22.28s/it]

Markdown content saved to cs_ai_markdown\2307.10169v1.md
Process on 1105: Completed


  7%|▋         | 1107/16327 [22:57<76:30:42, 18.10s/it]

Markdown content saved to cs_ai_markdown\2307.09767v1.md
Process on 1106: Completed


  7%|▋         | 1108/16327 [23:04<62:19:49, 14.74s/it]

Markdown content saved to cs_ai_markdown\2307.09706v1.md
Process on 1107: Completed


  7%|▋         | 1109/16327 [23:12<53:20:38, 12.62s/it]

Markdown content saved to cs_ai_markdown\2307.09891v1.md
Process on 1108: Completed


  7%|▋         | 1110/16327 [23:22<51:09:49, 12.10s/it]

Markdown content saved to cs_ai_markdown\2307.10408v1.md
Process on 1109: Completed


  7%|▋         | 1111/16327 [23:33<49:34:10, 11.73s/it]

Markdown content saved to cs_ai_markdown\2307.10003v1.md
Process on 1110: Completed


  7%|▋         | 1112/16327 [23:41<44:18:13, 10.48s/it]

Markdown content saved to cs_ai_markdown\2307.09885v1.md
Process on 1111: Completed


  7%|▋         | 1113/16327 [23:50<43:04:25, 10.19s/it]

Markdown content saved to cs_ai_markdown\2307.10171v1.md
Process on 1112: Completed


  7%|▋         | 1114/16327 [24:04<47:09:07, 11.16s/it]

Markdown content saved to cs_ai_markdown\2307.10491v1.md
Process on 1113: Completed


  7%|▋         | 1115/16327 [24:12<43:12:39, 10.23s/it]

Markdown content saved to cs_ai_markdown\2307.09770v1.md
Process on 1114: Completed


  7%|▋         | 1116/16327 [24:23<43:57:50, 10.40s/it]

Markdown content saved to cs_ai_markdown\2307.10032v1.md
Process on 1115: Completed


  7%|▋         | 1117/16327 [24:30<40:11:04,  9.51s/it]

Markdown content saved to cs_ai_markdown\2307.10085v3.md
Process on 1116: Completed


  7%|▋         | 1118/16327 [24:44<46:17:46, 10.96s/it]

Markdown content saved to cs_ai_markdown\2307.09494v1.md
Process on 1117: Completed


  7%|▋         | 1119/16327 [24:52<41:57:02,  9.93s/it]

Markdown content saved to cs_ai_markdown\2307.08974v1.md
Process on 1118: Completed


  7%|▋         | 1120/16327 [25:18<62:56:45, 14.90s/it]

Markdown content saved to cs_ai_markdown\2308.00001v1.md
Process on 1119: Completed


  7%|▋         | 1121/16327 [25:38<68:33:57, 16.23s/it]

Markdown content saved to cs_ai_markdown\2307.09321v1.md
Process on 1120: Completed


  7%|▋         | 1122/16327 [25:45<57:44:01, 13.67s/it]

Markdown content saved to cs_ai_markdown\2307.09636v1.md
Process on 1121: Completed


  7%|▋         | 1123/16327 [25:54<51:21:41, 12.16s/it]

Markdown content saved to cs_ai_markdown\2307.08987v1.md
Process on 1122: Completed


  7%|▋         | 1124/16327 [26:24<73:20:47, 17.37s/it]

Markdown content saved to cs_ai_markdown\2307.09683v3.md
Process on 1123: Completed


  7%|▋         | 1125/16327 [26:36<66:55:23, 15.85s/it]

Markdown content saved to cs_ai_markdown\2307.09042v2.md
Process on 1124: Completed


  7%|▋         | 1126/16327 [27:04<82:28:40, 19.53s/it]

Markdown content saved to cs_ai_markdown\2307.09426v2.md
Process on 1125: Completed


  7%|▋         | 1127/16327 [27:18<75:18:52, 17.84s/it]

Markdown content saved to cs_ai_markdown\2307.09665v1.md
Process on 1126: Completed


  7%|▋         | 1128/16327 [27:27<64:11:24, 15.20s/it]

Markdown content saved to cs_ai_markdown\2307.09447v3.md
Process on 1127: Completed


  7%|▋         | 1129/16327 [27:35<54:27:12, 12.90s/it]

Markdown content saved to cs_ai_markdown\2307.09673v3.md
Process on 1128: Completed


  7%|▋         | 1130/16327 [27:43<49:16:01, 11.67s/it]

Markdown content saved to cs_ai_markdown\2307.08933v1.md
Process on 1129: Completed


  7%|▋         | 1131/16327 [27:53<46:39:01, 11.05s/it]

Markdown content saved to cs_ai_markdown\2307.09072v1.md
Process on 1130: Completed


  7%|▋         | 1132/16327 [27:59<40:46:24,  9.66s/it]

Markdown content saved to cs_ai_markdown\2307.10292v1.md
Process on 1131: Completed


  7%|▋         | 1133/16327 [28:09<40:22:54,  9.57s/it]

Markdown content saved to cs_ai_markdown\2307.09343v3.md
Process on 1132: Completed


  7%|▋         | 1134/16327 [28:19<41:46:37,  9.90s/it]

Markdown content saved to cs_ai_markdown\2307.08481v2.md
Process on 1133: Completed


  7%|▋         | 1135/16327 [28:24<35:12:11,  8.34s/it]

Markdown content saved to cs_ai_markdown\2307.08461v3.md
Process on 1134: Completed


  7%|▋         | 1136/16327 [28:49<56:47:49, 13.46s/it]

Markdown content saved to cs_ai_markdown\2307.08189v1.md
Process on 1135: Completed


  7%|▋         | 1137/16327 [29:04<58:26:57, 13.85s/it]

Markdown content saved to cs_ai_markdown\2307.08233v1.md
Process on 1136: Completed


  7%|▋         | 1138/16327 [29:15<54:58:33, 13.03s/it]

Markdown content saved to cs_ai_markdown\2307.08526v1.md
Process on 1137: Completed


  7%|▋         | 1139/16327 [29:26<52:15:32, 12.39s/it]

Markdown content saved to cs_ai_markdown\2307.08225v1.md
Process on 1138: Completed


  7%|▋         | 1140/16327 [29:36<49:28:02, 11.73s/it]

Markdown content saved to cs_ai_markdown\2307.08401v1.md
Process on 1139: Completed


  7%|▋         | 1141/16327 [29:42<41:35:27,  9.86s/it]

Markdown content saved to cs_ai_markdown\2307.08564v1.md
Process on 1140: Completed


  7%|▋         | 1142/16327 [30:10<64:50:36, 15.37s/it]

Markdown content saved to cs_ai_markdown\2307.08262v1.md
Process on 1141: Completed


  7%|▋         | 1143/16327 [30:16<52:58:03, 12.56s/it]

Markdown content saved to cs_ai_markdown\2307.08700v1.md
Process on 1142: Completed


  7%|▋         | 1144/16327 [30:28<51:48:54, 12.29s/it]

Markdown content saved to cs_ai_markdown\2307.08197v1.md
Process on 1143: Completed


  7%|▋         | 1145/16327 [30:33<43:16:30, 10.26s/it]

Markdown content saved to cs_ai_markdown\2307.08532v1.md
Process on 1144: Completed


  7%|▋         | 1146/16327 [30:54<56:44:46, 13.46s/it]

Markdown content saved to cs_ai_markdown\2307.08421v2.md
Process on 1145: Completed


  7%|▋         | 1147/16327 [31:02<49:14:58, 11.68s/it]

Markdown content saved to cs_ai_markdown\2307.08411v1.md
Process on 1146: Completed


  7%|▋         | 1148/16327 [31:10<45:04:34, 10.69s/it]

Markdown content saved to cs_ai_markdown\2307.10267v1.md
Process on 1147: Completed


  7%|▋         | 1149/16327 [31:17<40:17:38,  9.56s/it]

Markdown content saved to cs_ai_markdown\2307.08598v1.md
Process on 1148: Completed


  7%|▋         | 1150/16327 [31:28<41:36:02,  9.87s/it]

Markdown content saved to cs_ai_markdown\2307.08171v1.md
Process on 1149: Completed


  7%|▋         | 1151/16327 [31:36<39:10:20,  9.29s/it]

Markdown content saved to cs_ai_markdown\2307.07691v1.md
Process on 1150: Completed


  7%|▋         | 1152/16327 [31:42<36:04:01,  8.56s/it]

Markdown content saved to cs_ai_markdown\2307.10223v2.md
Process on 1151: Completed


  7%|▋         | 1153/16327 [31:48<32:07:29,  7.62s/it]

Markdown content saved to cs_ai_markdown\2307.09166v1.md
Process on 1152: Completed


  7%|▋         | 1154/16327 [31:54<30:02:09,  7.13s/it]

Markdown content saved to cs_ai_markdown\2307.11765v1.md
Process on 1153: Completed


  7%|▋         | 1155/16327 [32:01<30:21:23,  7.20s/it]

Markdown content saved to cs_ai_markdown\2307.07872v1.md
Process on 1154: Completed


  7%|▋         | 1156/16327 [32:24<49:43:41, 11.80s/it]

Markdown content saved to cs_ai_markdown\2307.07734v1.md
Process on 1155: Completed


  7%|▋         | 1157/16327 [32:39<53:29:16, 12.69s/it]

Markdown content saved to cs_ai_markdown\2307.07871v1.md
Process on 1156: Completed


  7%|▋         | 1158/16327 [32:57<60:52:52, 14.45s/it]

Markdown content saved to cs_ai_markdown\2307.07717v1.md
Process on 1157: Completed


  7%|▋         | 1159/16327 [33:09<57:29:13, 13.64s/it]

Markdown content saved to cs_ai_markdown\2307.10225v1.md
Process on 1158: Completed


  7%|▋         | 1160/16327 [33:44<85:01:37, 20.18s/it]

Markdown content saved to cs_ai_markdown\2307.07700v1.md
Process on 1159: Completed


  7%|▋         | 1161/16327 [34:10<91:37:09, 21.75s/it]

Markdown content saved to cs_ai_markdown\2307.07146v1.md
Process on 1160: Completed


  7%|▋         | 1162/16327 [34:30<89:56:05, 21.35s/it]

Markdown content saved to cs_ai_markdown\2307.08494v1.md
Process on 1161: Completed


  7%|▋         | 1163/16327 [34:35<69:35:02, 16.52s/it]

Markdown content saved to cs_ai_markdown\2307.11761v1.md
Process on 1162: Completed


  7%|▋         | 1164/16327 [34:44<59:43:16, 14.18s/it]

Markdown content saved to cs_ai_markdown\2307.07286v1.md
Process on 1163: Completed


  7%|▋         | 1165/16327 [35:37<108:42:17, 25.81s/it]

Markdown content saved to cs_ai_markdown\2307.11760v5.md
Process on 1164: Completed


  7%|▋         | 1166/16327 [35:43<83:58:11, 19.94s/it] 

Markdown content saved to cs_ai_markdown\2307.06932v2.md
Process on 1165: Completed


  7%|▋         | 1167/16327 [35:52<69:57:29, 16.61s/it]

Markdown content saved to cs_ai_markdown\2307.06822v3.md
Process on 1166: Completed


  7%|▋         | 1168/16327 [35:59<57:21:21, 13.62s/it]

Markdown content saved to cs_ai_markdown\2307.06616v1.md
Process on 1167: Completed


  7%|▋         | 1169/16327 [36:07<50:02:48, 11.89s/it]

Markdown content saved to cs_ai_markdown\2307.06963v1.md
Process on 1168: Completed


  7%|▋         | 1170/16327 [36:15<45:28:20, 10.80s/it]

Markdown content saved to cs_ai_markdown\2307.13699v1.md
Process on 1169: Completed


  7%|▋         | 1171/16327 [36:26<45:24:36, 10.79s/it]

Markdown content saved to cs_ai_markdown\2307.06501v2.md
Process on 1170: Completed


  7%|▋         | 1172/16327 [36:51<64:16:22, 15.27s/it]

Markdown content saved to cs_ai_markdown\2307.06941v1.md
Process on 1171: Completed


  7%|▋         | 1173/16327 [37:06<62:51:05, 14.93s/it]

Markdown content saved to cs_ai_markdown\2307.06682v1.md
Process on 1172: Completed


  7%|▋         | 1174/16327 [37:13<53:13:40, 12.65s/it]

Markdown content saved to cs_ai_markdown\2307.07066v1.md
Process on 1173: Completed


  7%|▋         | 1175/16327 [37:22<48:17:11, 11.47s/it]

Markdown content saved to cs_ai_markdown\2307.07059v1.md
Process on 1174: Completed


  7%|▋         | 1176/16327 [37:48<67:37:34, 16.07s/it]

Markdown content saved to cs_ai_markdown\2307.06701v1.md
Process on 1175: Completed


  7%|▋         | 1177/16327 [37:57<57:59:53, 13.78s/it]

Markdown content saved to cs_ai_markdown\2307.06794v1.md
Process on 1176: Completed


  7%|▋         | 1178/16327 [38:20<70:28:50, 16.75s/it]

Markdown content saved to cs_ai_markdown\2307.06687v2.md
Process on 1177: Completed


  7%|▋         | 1179/16327 [38:34<66:49:28, 15.88s/it]

Markdown content saved to cs_ai_markdown\2307.06521v1.md
Process on 1178: Completed


  7%|▋         | 1180/16327 [38:41<55:39:19, 13.23s/it]

Markdown content saved to cs_ai_markdown\2307.06148v2.md
Process on 1179: Completed


  7%|▋         | 1181/16327 [39:12<77:23:36, 18.40s/it]

Markdown content saved to cs_ai_markdown\2307.06159v1.md
Process on 1180: Completed


  7%|▋         | 1182/16327 [39:20<65:04:23, 15.47s/it]

Markdown content saved to cs_ai_markdown\2307.06162v1.md
Process on 1181: Completed


  7%|▋         | 1183/16327 [39:37<66:10:13, 15.73s/it]

Markdown content saved to cs_ai_markdown\2308.01919v1.md
Process on 1182: Completed


  7%|▋         | 1184/16327 [39:53<66:25:19, 15.79s/it]

Markdown content saved to cs_ai_markdown\2307.06082v1.md
Process on 1183: Completed


  7%|▋         | 1185/16327 [40:01<57:09:31, 13.59s/it]

Markdown content saved to cs_ai_markdown\2307.06177v1.md
Process on 1184: Completed


  7%|▋         | 1186/16327 [40:11<52:43:53, 12.54s/it]

Markdown content saved to cs_ai_markdown\2307.05782v2.md
Process on 1185: Completed


  7%|▋         | 1187/16327 [40:22<50:20:41, 11.97s/it]

Markdown content saved to cs_ai_markdown\2307.10198v1.md
Process on 1186: Completed


  7%|▋         | 1188/16327 [40:47<66:27:17, 15.80s/it]

Markdown content saved to cs_ai_markdown\2307.05832v2.md
Process on 1187: Completed


  7%|▋         | 1189/16327 [40:53<54:34:12, 12.98s/it]

Markdown content saved to cs_ai_markdown\2307.05156v1.md
Process on 1188: Completed


  7%|▋         | 1190/16327 [41:02<50:02:48, 11.90s/it]

Markdown content saved to cs_ai_markdown\2307.05104v1.md
Process on 1189: Completed


  7%|▋         | 1191/16327 [41:11<46:16:33, 11.01s/it]

Markdown content saved to cs_ai_markdown\2307.04986v1.md
Process on 1190: Completed


  7%|▋         | 1192/16327 [41:44<74:09:58, 17.64s/it]

Markdown content saved to cs_ai_markdown\2307.04941v1.md
Process on 1191: Completed


  7%|▋         | 1193/16327 [42:13<87:20:54, 20.78s/it]

Markdown content saved to cs_ai_markdown\2307.05194v1.md
Process on 1192: Completed


  7%|▋         | 1194/16327 [42:19<69:41:35, 16.58s/it]

Markdown content saved to cs_ai_markdown\2307.05150v1.md
Process on 1193: Completed


  7%|▋         | 1195/16327 [42:28<59:16:44, 14.10s/it]

Markdown content saved to cs_ai_markdown\2307.05066v1.md
Process on 1194: Completed


  7%|▋         | 1196/16327 [42:40<56:54:27, 13.54s/it]

Markdown content saved to cs_ai_markdown\2307.05069v1.md
Process on 1195: Completed


  7%|▋         | 1197/16327 [43:06<73:20:00, 17.45s/it]

Markdown content saved to cs_ai_markdown\2307.05396v1.md
Process on 1196: Completed


  7%|▋         | 1198/16327 [43:36<88:08:56, 20.98s/it]

Markdown content saved to cs_ai_markdown\2307.05071v1.md
Process on 1197: Completed


  7%|▋         | 1199/16327 [44:05<98:13:28, 23.37s/it]

Markdown content saved to cs_ai_markdown\2307.05631v1.md
Process on 1198: Completed


  7%|▋         | 1200/16327 [44:13<79:31:32, 18.93s/it]

Markdown content saved to cs_ai_markdown\2307.07524v1.md
Process on 1199: Completed


  7%|▋         | 1201/16327 [44:51<102:53:24, 24.49s/it]

Markdown content saved to cs_ai_markdown\2307.10993v1.md
Process on 1200: Completed


  7%|▋         | 1202/16327 [44:58<80:45:12, 19.22s/it] 

Markdown content saved to cs_ai_markdown\2307.05068v1.md
Process on 1201: Completed


  7%|▋         | 1203/16327 [45:03<63:35:37, 15.14s/it]

Markdown content saved to cs_ai_markdown\2307.07526v1.md
Process on 1202: Completed


  7%|▋         | 1204/16327 [45:19<64:33:09, 15.37s/it]

Markdown content saved to cs_ai_markdown\2307.05447v1.md
Process on 1203: Completed


  7%|▋         | 1205/16327 [46:02<98:43:48, 23.50s/it]

Markdown content saved to cs_ai_markdown\2307.05300v2.md
Process on 1204: Completed


  7%|▋         | 1206/16327 [46:27<100:45:10, 23.99s/it]

Markdown content saved to cs_ai_markdown\2307.05629v1.md
Process on 1205: Completed


  7%|▋         | 1207/16327 [46:38<84:12:54, 20.05s/it] 

Markdown content saved to cs_ai_markdown\2307.05614v1.md
Process on 1206: Completed


  7%|▋         | 1208/16327 [46:57<83:51:14, 19.97s/it]

Markdown content saved to cs_ai_markdown\2307.06950v1.md
Process on 1207: Completed


  7%|▋         | 1209/16327 [47:12<77:09:29, 18.37s/it]

Markdown content saved to cs_ai_markdown\2307.04608v1.md
Process on 1208: Completed


  7%|▋         | 1210/16327 [47:18<61:42:56, 14.70s/it]

Markdown content saved to cs_ai_markdown\2307.04507v1.md
Process on 1209: Completed


  7%|▋         | 1211/16327 [47:27<53:52:53, 12.83s/it]

Markdown content saved to cs_ai_markdown\2307.04468v1.md
Process on 1210: Completed


  7%|▋         | 1212/16327 [47:33<45:41:22, 10.88s/it]

Markdown content saved to cs_ai_markdown\2307.04292v1.md
Process on 1211: Completed


  7%|▋         | 1213/16327 [47:43<44:33:54, 10.61s/it]

Markdown content saved to cs_ai_markdown\2307.04368v2.md
Process on 1212: Completed


  7%|▋         | 1214/16327 [47:56<47:02:31, 11.21s/it]

Markdown content saved to cs_ai_markdown\2307.04607v1.md
Process on 1213: Completed


  7%|▋         | 1215/16327 [48:03<42:41:35, 10.17s/it]

Markdown content saved to cs_ai_markdown\2307.04893v2.md
Process on 1214: Completed


  7%|▋         | 1216/16327 [48:16<45:46:17, 10.90s/it]

Markdown content saved to cs_ai_markdown\2307.04533v1.md
Process on 1215: Completed


  7%|▋         | 1217/16327 [48:28<47:22:26, 11.29s/it]

Markdown content saved to cs_ai_markdown\2307.04429v1.md
Process on 1216: Completed


  7%|▋         | 1218/16327 [48:51<62:17:29, 14.84s/it]

Markdown content saved to cs_ai_markdown\2307.04345v2.md
Process on 1217: Completed


  7%|▋         | 1219/16327 [49:03<58:44:55, 14.00s/it]

Markdown content saved to cs_ai_markdown\2307.05582v1.md
Process on 1218: Completed


  7%|▋         | 1220/16327 [49:12<51:58:30, 12.39s/it]

Markdown content saved to cs_ai_markdown\2307.04333v2.md
Process on 1219: Completed


  7%|▋         | 1221/16327 [49:40<71:46:18, 17.10s/it]

Markdown content saved to cs_ai_markdown\2307.05360v2.md
Process on 1220: Completed


  7%|▋         | 1222/16327 [49:49<61:23:53, 14.63s/it]

Markdown content saved to cs_ai_markdown\2307.04701v1.md
Process on 1221: Completed


  7%|▋         | 1223/16327 [50:00<56:36:59, 13.49s/it]

Markdown content saved to cs_ai_markdown\2307.04849v1.md
Process on 1222: Completed


  7%|▋         | 1224/16327 [50:40<90:11:55, 21.50s/it]

Markdown content saved to cs_ai_markdown\2307.04515v1.md
Process on 1223: Completed


  8%|▊         | 1225/16327 [50:48<73:00:34, 17.40s/it]

Markdown content saved to cs_ai_markdown\2307.04599v1.md
Process on 1224: Completed


  8%|▊         | 1226/16327 [51:04<71:11:36, 16.97s/it]

Markdown content saved to cs_ai_markdown\2307.04341v1.md
Process on 1225: Completed


  8%|▊         | 1227/16327 [51:16<65:33:22, 15.63s/it]

Markdown content saved to cs_ai_markdown\2307.04336v1.md
Process on 1226: Completed


  8%|▊         | 1228/16327 [51:25<57:01:30, 13.60s/it]

Markdown content saved to cs_ai_markdown\2307.04137v1.md
Process on 1227: Completed


  8%|▊         | 1229/16327 [51:32<48:51:32, 11.65s/it]

Markdown content saved to cs_ai_markdown\2307.04217v1.md
Process on 1228: Completed


  8%|▊         | 1230/16327 [52:25<100:25:46, 23.95s/it]

Markdown content saved to cs_ai_markdown\2307.04131v1.md
Process on 1229: Completed


  8%|▊         | 1230/16327 [52:29<10:44:13,  2.56s/it] 


PDFSyntaxError: No /Root object! - Is this really a PDF?