In [38]:
import re
from datetime import datetime
import os

In [39]:
def process(row : str, products : dict[str, int], ratings : dict[str : int]) -> None :
    """
    Process a single review row and update product counts and ratings.

    Args:
        row (str): A single line of review data.
        products (dict[str, int]): Dictionary mapping product IDs to review counts.
        ratings (dict[str, int]): Dictionary mapping product IDs to total ratings.

    Raises:
        ValueError: If the row has invalid format or contains invalid values.
    """
    
    # Pattern to recognize text
    pattern = re.compile("\"[^\"]*\"")
    row = re.sub(pattern, "", row)
    # Breaking the string into individual tokens
    row = row.strip().split()
    
    if len(row) > 4 :
        raise ValueError("Extra Values")
    
    # Validating if all values are correct
    if len(row[0]) == 6 or row[0].isalnum() :
        if len(row[1]) == 10 or row[1].isalnum() :
            if datetime.strptime(row[2], "%Y-%m-%d") :
                if 1 <= int(row[3]) <= 5 :
                    if row[1] in products :
                        products[row[1]] += 1
                    else :
                        products[row[1]] = 1
                    if row[1] in ratings :
                        ratings[row[1]] += int(row[3])
                    else :
                        ratings[row[1]] = int(row[3])
                    return
    # Don't know what error but if some happens
    raise ValueError("Some Error")

In [40]:
def read(file : str, products : dict[str, int], ratings : dict[str : int]) -> tuple[int, int] :
    """
    Read a file and process each review row.

    Args:
        file (str): Path to the file.
        products (dict[str, int]): Dictionary storing product counts.
        ratings (dict[str, int]): Dictionary storing total ratings.

    Returns:
        tuple[int, int]: (valid_count, invalid_count)
    """
    
    valid = 0
    invalid = 0

    try :
        with open(file.replace("\\", "/"), "r") as f :
            data = f.readlines()
            for row in data :
                try :
                    process(row, products, ratings)
                    valid += 1
                except Exception as e :
                    # If processing fails, mark as invalid
                    invalid += 1
    except Exception as e :
        print(f"Cannot open file {file}")
        print(f"Error : {e}")
    return (valid, invalid)

In [41]:
def start(dir : str, products : dict[str, int], ratings : dict[str, int]) -> tuple[int, int] :
    """
    Recursively process all files in a directory and subdirectories.

    Args:
        dir (str): Directory path.
        products (dict[str, int]): Dictionary for product counts.
        ratings (dict[str, int]): Dictionary for total ratings.

    Returns:
        tuple[int, int]: (total_valid, total_invalid)
    """
    
    valid = 0
    invalid = 0
    for name in os.listdir(dir) :
        file = os.path.join(dir, name)
        if os.path.isfile(file) :
            valid1, invalid1 = read(file, products, ratings)
            valid += valid1
            invalid += invalid1
        elif os.path.isdir(file) : # Recursive search for nested directories
            valid1, invalid1 = start(file, products, ratings)
            valid += valid1
            invalid += invalid1
    return (valid, invalid)

In [42]:
def main() :
    """
    Main entry point of the program.

    - Reads review files from 'files/' directory.
    - Processes reviews and validates them.
    - Computes average ratings per product.
    - Identifies top 3 products by rating.
    - Writes summary report to 'summary.txt'.
    """
     
    dir = r"files"

    products = {} # Stores number of reviews per product
    ratings = {} # Stores total ratings per product

    # Process directory recursively
    valid, invalid = start(dir, products, ratings)

    # Compute average ratings
    average = {product : ratings[product] / products[product] for product in products.keys()}

    # Get top 3 products based on average rating
    top_keys = sorted(average, reverse = True, key = lambda x : average[x])[:3]

    # Write results to summary file
    with open("summary.txt", "w+") as f :
        f.write(f"1) The total number of reviews processed -> {valid + invalid}\n")
        f.write(f"2) The total number of valid reviews -> {valid}\n")
        f.write(f"3) The total number of invalid reviews -> {invalid}\n")
        f.write("4) Top 3 products with the highest average ratings\n")

        for i, key in enumerate(top_keys) :
            f.write(f"\t{i + 1}) {key} -> {average[key]}\n")

In [43]:
if __name__ == "__main__" :
    main()

 ## OUTPUT

```text
1) The total number of reviews processed -> 100
2) The total number of valid reviews -> 94
3) The total number of invalid reviews -> 6
4) Top 3 products with the highest average ratings
	1) NSVSSEHGIJ -> 4.0
	2) GQVGTRCM4M -> 4.0
	3) X71RH0U2ZS -> 3.625

 ## CONCLUSION

The Python script reliably ingests multiple review files, extracts structured fields with regular expressions, and validates records to separate valid and invalid reviews. It computes per-product average ratings, identifies the top three products, and writes a clear, auditable summary.txt. Robust error handling and efficient, scalable processing ensure it performs well on large datasets, turning unstructured feedback into actionable product insights.