To compare the **execution timing and performance** of **NLTK and spaCy,** you can use the *built-in* **time**  module in Python.

In [1]:
import time
import nltk
import spacy

# NLTK example
nltk_start = time.time()
nltk.download('punkt')
nltk_end = time.time()
nltk_time = nltk_end - nltk_start
print(f"{nltk_time:.6f}")

# spaCy example
spacy_start = time.time()
spacy.load('en_core_web_sm')
spacy_end = time.time()
spacy_time = spacy_end - spacy_start
print(f"{spacy_time:.6f}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


0.155001
2.093861


In [2]:
# NLTK Example
nltk_start = time.time()
sentence = "The quick brown fox jumps over the lazy dog."
tokens = nltk.word_tokenize(sentence)
nltk_end = time.time()
nltk_time = nltk_end - nltk_start
print(f"NLTK Tokenization Time : {nltk_time:.6f} seconds.")

# spaCy Example
spacy_start = time.time()
spacy_nlp = spacy.load('en_core_web_sm')
doc = spacy_nlp(sentence)
spacy_end = time.time()
spacy_time = spacy_end - spacy_start
print(f"spaCy Tokenization Time : {nltk_time:.6f} seconds.")

# Compare the execution times
print(f"\nNLTK is {nltk_time/spacy_time:.6f} times slower than spaCy.")

NLTK Tokenization Time : 0.020876 seconds.
spaCy Tokenization Time : 0.020876 seconds.

NLTK is 0.024739 times slower than spaCy.


To compare the timing of execution for performance metrics between **NLTK** and **spaCy,** you can use Python's **`time`** module or **`timeit`** module.
<br>Below are examples for both methods: using time for a simple timing comparison and timeit for more precise measurements.

# Method 1: Using `time` Module
The **`time`** module provides a simple way to measure the elapsed time for small code snippets.

## Tokenization

In [3]:
import time
import nltk
from nltk.tokenize import word_tokenize
import spacy

# Text for processing
text = "SpaCy is an amazing NLP library."

# NLTK Timing
start_time = time.time()
nltk.download('punkt')
nltk_tokens = word_tokenize(text)
nltk_time = time.time() - start_time
print(f"NLTK Tokenization Time: {nltk_time:.6f} seconds")

# spaCy Timing
nlp = spacy.load("en_core_web_sm")
start_time = time.time()
doc = nlp(text)
spacy_tokens = [token.text for token in doc]
spacy_time = time.time() - start_time
print(f"spaCy Tokenization Time: {spacy_time:.6f} seconds")

# Compare the execution times
print(f"\nNLTK is {nltk_time/spacy_time:.6f} times slower than spaCy.")

NLTK Tokenization Time: 0.000662 seconds


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


spaCy Tokenization Time: 0.008684 seconds

NLTK is 0.076245 times slower than spaCy.


# Method 2: Using timeit Module
The **`timeit`** module is more accurate for measuring the execution time of small code snippets.

## Tokenization

In [4]:
import timeit
import nltk
from nltk.tokenize import word_tokenize
import spacy

# Text for processing
text = "SpaCy is an amazing NLP library."

# NLTK Timing
nltk_setup = """
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
text = "SpaCy is an amazing NLP library."
"""
nltk_code = "word_tokenize(text)"
nltk_time = timeit.timeit(stmt=nltk_code, setup=nltk_setup, number=1000)
print(f"NLTK Tokenization Time (1000 runs): {nltk_time:.6f} seconds")

# spaCy Timing
spacy_setup = """
import spacy
nlp = spacy.load("en_core_web_sm")
text = "SpaCy is an amazing NLP library."
"""
spacy_code = "nlp(text)"
spacy_time = timeit.timeit(stmt=spacy_code, setup=spacy_setup, number=1000)
print(f"spaCy Tokenization Time (1000 runs): {spacy_time:.6f} seconds")

# Compare the execution times
print(f"\nNLTK is {nltk_time/spacy_time:.6f} times slower than spaCy.")

NLTK Tokenization Time (1000 runs): 0.101097 seconds


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


spaCy Tokenization Time (1000 runs): 8.741541 seconds

NLTK is 0.011565 times slower than spaCy.


## Part-Of_Speech Tagging

In [5]:
import timeit
import nltk
from nltk.tokenize import word_tokenize
import spacy

# Text for processing
text = "SpaCy is an amazing NLP library."

# NLTK Timing
nltk_setup = """
import nltk
from nltk.tokenize import word_tokenize
nltk.download('averaged_perceptron_tagger')
text = "SpaCy is an amazing NLP library."
tokens = word_tokenize(text)
"""
nltk_code = "nltk.pos_tag(tokens)"
nltk_time = timeit.timeit(stmt=nltk_code, setup=nltk_setup, number=1000)
print(f"NLTK POS Tagging Time (1000 runs): {nltk_time:.6f} seconds")

# spaCy Timing
spacy_setup = """
import spacy
nlp = spacy.load("en_core_web_sm")
text = "SpaCy is an amazing NLP library."
"""
spacy_code = """
doc = nlp(text)
[(token.text, token.pos_) for token in doc]
"""
spacy_time = timeit.timeit(stmt=spacy_code, setup=spacy_setup, number=1000)
print(f"spaCy POS Tagging Time (1000 runs): {spacy_time:.6f} seconds")

# Compare the execution times
print(f"\nNLTK is {nltk_time/spacy_time:.6f} times slower than spaCy.")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


NLTK POS Tagging Time (1000 runs): 0.617091 seconds
spaCy POS Tagging Time (1000 runs): 7.435949 seconds

NLTK is 0.082988 times slower than spaCy.
