In [1]:
import re
import time
import nltk
import spacy
from nltk.tokenize import word_tokenize

nltk.download('punkt')
nlp = spacy.load("en_core_web_sm")
sentence = "The quick brown fox jumps over the lazy dog. Python programming is both fun and educational."

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/amenhasfaw/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
def manual_tokenize(sentence):
    return re.findall(r"\b\w+\b", sentence)

def nltk_tokenize(sentence):
    return word_tokenize(sentence)

def spacy_tokenize(sentence):
    doc = nlp(sentence)
    return [token.text for token in doc]

def measure_time(func, sentence):
    start_time = time.time()
    func(sentence)
    end_time = time.time()
    return end_time - start_time

In [3]:
time_manual = measure_time(manual_tokenize, sentence)
time_nltk = measure_time(nltk_tokenize, sentence)
time_spacy = measure_time(spacy_tokenize, sentence)

print(f"Manual Tokenization Time: {time_manual:.6f} seconds")
print(f"NLTK Tokenization Time: {time_nltk:.6f} seconds")
print(f"SpaCy Tokenization Time: {time_spacy:.6f} seconds")


Manual Tokenization Time: 0.000415 seconds
NLTK Tokenization Time: 0.014466 seconds
SpaCy Tokenization Time: 0.021717 seconds
