In [1]:
import pandas as pd
import math
from collections import Counter

In [2]:
def compute_tf(document):
    word_count = Counter(document)
    tf = {word: count/len(document) for word, count in word_count.items()}
    return tf

def compute_idf(documents):
    N = len(documents)
    idf = {}
    all_words = set(word for doc in documents for word in doc)
    for word in all_words:
        count = sum(1 for doc in documents if word in doc)
        idf[word] = math.log(N/count)
    return idf

def compute_tfidf(document, idf):
    tfidf = {}
    tf = compute_tf(document)
    for word, tf_value in tf.items():
        tfidf[word] = tf_value * idf[word]
    return tfidf

In [3]:
# New data
data = [
    "A quick brown fox jumps over the lazy dog What a fox",
    "A quick brown fox jumps over the lazy fox What a fox"
]

In [4]:
# Split data into tokens
documents = [doc.split() for doc in data]

In [5]:
# Compute TF for each document
tf_data = [compute_tf(doc) for doc in documents]

In [6]:
# Create DataFrame for TF
tf_df = pd.DataFrame(tf_data).fillna(0)
print("TF Scores:")
print(tf_df)

TF Scores:
          A     quick     brown       fox     jumps      over       the  \
0  0.083333  0.083333  0.083333  0.166667  0.083333  0.083333  0.083333   
1  0.083333  0.083333  0.083333  0.250000  0.083333  0.083333  0.083333   

       lazy       dog      What         a  
0  0.083333  0.083333  0.083333  0.083333  
1  0.083333  0.000000  0.083333  0.083333  


In [7]:
# Compute IDF
idf = compute_idf(documents)
idf_df = pd.DataFrame([idf]).fillna(0)
print("\nIDF Scores:")
print(idf_df)


IDF Scores:
   quick    A       dog    a  lazy  jumps  What  the  fox  brown  over
0    0.0  0.0  0.693147  0.0   0.0    0.0   0.0  0.0  0.0    0.0   0.0


In [8]:
# Compute TF-IDF for each document
tfidf_data = [compute_tfidf(doc, idf) for doc in documents]

In [9]:
# Create DataFrame for TF-IDF
tfidf_df = pd.DataFrame(tfidf_data).fillna(0)
print("\nTF-IDF Scores:")
print(tfidf_df)


TF-IDF Scores:
     A  quick  brown  fox  jumps  over  the  lazy       dog  What    a
0  0.0    0.0    0.0  0.0    0.0   0.0  0.0   0.0  0.057762   0.0  0.0
1  0.0    0.0    0.0  0.0    0.0   0.0  0.0   0.0  0.000000   0.0  0.0
