Question:

Write a function word_frequency(file_path) that reads a text file, counts the frequency of each word (case-insensitive), and returns a dictionary where keys are words and values are their frequencies. Punctuation should be ignored.

Example: If a text file named my_text.txt contains "Hello world! This is a test. Hello again.", the function call word_frequency('my_text.txt') should return {'hello': 2, 'world': 1, 'this': 1, 'is': 1, 'a': 1, 'test': 1, 'again': 1}.

In [11]:
import string
def word_frequency(file_path):
    """
    Reads a text file, counts the frequency of each word (case-insensitive),
    and returns a dictionary of frequencies. Punctuation is ignored.
    """
    # 1. Create a translation table to remove all punctuation
    # This maps every character in string.punctuation to None (removal)
    translator=str.maketrans(" ", " ", string.punctuation)

    #2. initiate an empty dictionary to store the count

    word_counts={}

    try:
        #3. open the file in read mode
        with open(file_path, "r") as f:
            
            #4. read the entire file, make it lower case, and remove punctuation
            text=f.read().lower().translate(translator)

            #5. split the text into words
            words=text.split()

            #6. loop through the list and count each word
            for word in words:
                if word in word_counts:
                    word_counts[word]+=1
                else:
                    word_counts[word]=1
                    
        #7. return the word_counts dictionary
        return word_counts

    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found")
        return {}
    except Exception as e:
        print(f"An error occured, {e}")
        return {}

In [12]:
word_frequency("my_text.txt")

{'hello': 2, 'world': 1, 'this': 1, 'is': 1, 'a': 1, 'test': 1, 'again': 1}

# Solutions 2

In [4]:
import string
from collections import Counter

def word_frequency_by_counter(file_path):
    
    try:
        #1. open and read the file
        with open(file_path,"r") as file:
            content=file.read().lower()

            #2. remove punctuation
            translator=str.maketrans(" ", " ", string.punctuation)
            cleaned_content=content.translate(translator)

            #3. get separate words from the origial lines
            words=cleaned_content.split()

            #4. return using Counter function
            return Counter(words)
            
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found")
        return {}
            

In [5]:
word_frequency_by_counter("my_text.txt")

Counter({'hello': 2,
         'world': 1,
         'this': 1,
         'is': 1,
         'a': 1,
         'test': 1,
         'again': 1})