In [8]:
# Summary: Text Extraction from PDF files (Resume Example)

import re
from pdfminer.high_level import extract_text
import os

# Path to the provided PDF file 
pdf_path = r"C:\Documents\AkcoraCV.pdf"

# Check if the file exists
if not os.path.exists(pdf_path):
    print(f"Error: File {pdf_path} does not exist.")
    exit()

# Extract text from the PDF
print("Starting text extraction...")
all_text = extract_text(pdf_path)
print(f"Text extraction complete. First 500 characters: {all_text[:500]}")

# Define regex pattern to locate "Peer Reviewed Conference Papers" section
conference_pattern = r"Peer Reviewed Conference Papers(.*?)Journal Publications"
match = re.search(conference_pattern, all_text, re.DOTALL)

if match:
    conference_text = match.group(1).strip()
    print("Conference Publications section found.")
else:
    print("Conference Publications section not found.")
    exit()

# Split the text by common paper separators (cid:5 or bullet points)
papers = re.split(r'(?:\(cid:5\)|cid:5|•|\n\*)', conference_text)

# Process each paper to extract only the title
titles = []
for paper in papers:
    if not paper.strip():
        continue
        
    # Clean up the paper text - remove line breaks that might split titles
    paper = re.sub(r'\n(?=[a-z])', ' ', paper.strip())
    
    # Extract the title (everything up to the first author or year indicator)
    # Titles typically end when we see patterns like initials or years
    title_match = re.match(r'([^,]+?)(?=\s+[A-Z]\.\s*[A-Z]\.|\s+[A-Z]\.|\s+\d{4}|\s+\(\d{4}\))', paper)
    
    if title_match:
        title = title_match.group(1).strip()
        # Remove any trailing periods that aren't part of abbreviations
        title = re.sub(r'\.$', '', title)
        titles.append(title)

# Print titles in the required format
for title in titles:
    if title:  # Skip empty titles
        print(f"* {title}")

Starting text extraction...
Text extraction complete. First 500 characters: Cuneyt Gurcan Akcora
University of Central Florida

BA1-419, 12744 Pegasus Drive, Orlando, Florida, 32816
Web: cakcora.github.io

E-mail: cuneyt.akcora@ucf.edu

Research Interests

• Explainable artiﬁcial intelligence

• Data Science on complex networks, large scale graph analysis

• Nonparametric statistics, bootstrap on graphs

• Deep learning and graph mining on Blockchain networks

• Machine learning for privacy and security research on online social networks

• Topological data analysis

Wo
Conference Publications section found.
* GOttack: Universal Adversarial Attacks on Graph Neural Networks via Graph Orbits

Learning
* Chainlet Orbits: Topological Address Embedding for the Bitcoin Blockchain
* On the Impact of the Lightning Network on Bitcoin Transaction Fees and Network Value
* GraphPulse: Topological representations for temporal graph property prediction
* Smart Vectorizations for Single and Multipara