
## Question 1 — Python Basics (Word Frequency)

**Task**  
Write a Python function to count the frequency of each word in a given text.

**Requirements**  
1. Input is an English text string.  
2. Normalize the text (ignore case, strip punctuation; whether to keep contractions like `don't` is up to you, but be consistent).  
3. Return a mapping of `word -> count`.  
4. Provide the result sorted by frequency (descending), then alphabetically for ties.


In [17]:

# Part 1 — Code Skeleton
from typing import Dict, Iterable, Tuple
import re
from collections import Counter

def word_frequency(text: str) -> Dict[str, int]:
    """Return a frequency dictionary mapping word -> count."""
    pass


def sorted_frequencies(freqs: Dict[str, int]) -> Iterable[Tuple[str, int]]:
    """Return (word, count) pairs sorted by count desc then word asc."""
    pass


In [None]:

# Part 1 — Sample Data & Validation
sample_text = """The quick brown fox jumps over the lazy dog.
The dog wasn't amused; the fox wasn't either!"""

def _run_part1_checks():
    try:
        freqs = word_frequency(sample_text)
    except Exception as e:
        print("❌ word_frequency raised an exception:", repr(e))
        return

    if not isinstance(freqs, dict) or not freqs:
        print("⚠️ word_frequency seems unimplemented or returned an empty/invalid result.")
        print("Returned:", freqs)
        return

    try:
        ordered = list(sorted_frequencies(freqs))
    except Exception as e:
        print("❌ sorted_frequencies raised an exception:", repr(e))
        return

    if not ordered or not isinstance(ordered[0], tuple):
        print("⚠️ sorted_frequencies returned an unexpected value:", ordered[:5])
        return

    print("Top 10:", ordered[:10])
    # Light sanity checks
    assert ordered[0][1] >= 2, "Expected highest frequency >= 2 for sample text"
    words = dict(ordered)
    assert 'fox' in words and words['fox'] >= 1, "Expected 'fox' to appear"
    assert 'the' in words and words['the'] >= 3, "Expected 'the' to appear >= 3 times (case-insensitive)"
    print("✅ Part 1 validation passed.")

_run_part1_checks()



## Question 2 — Pandas Aggregation (from CSV)

**Task**  
Using Pandas, read the provided CSV file **`products.csv`** and compute per-product rating statistics.

**Input**  
`products.csv` (placed in the same directory as this notebook) with columns:  
- `product` — product name (string)  
- `rating` — numeric rating (float)

**Requirements**  
1. Read `products.csv` into a DataFrame.  
2. Group by `product` and compute:  
   - average rating (`Average_Rating`)  
   - number of ratings (`Num_Ratings`)  
3. Return a DataFrame with columns `['product', 'Average_Rating', 'Num_Ratings']` sorted by:  
   - `Average_Rating` descending, then  
   - `Num_Ratings` descending, then  
   - `product` ascending.  
4. Do not format numbers as strings in the returned DataFrame.


In [16]:

# Part 2 — Code Skeleton
import pandas as pd

def load_products_csv(path: str = "./data/products.csv") -> pd.DataFrame:
    """Load products.csv from the given path and return a DataFrame."""
    return pd.read_csv(path)


def aggregate_product_ratings(df: pd.DataFrame) -> pd.DataFrame:
    """Return a DataFrame with columns ['product', 'Average_Rating', 'Num_Ratings']
    aggregated and sorted per the requirements.
    """
    pass

In [None]:

# Part 2 — Validation
import os

def _find_csv():
    # Prefer local './data/products.csv', fall back to '/mnt/data/products.csv' if present (e.g., hosted environment)
    candidates = ["./data/products.csv", "/mnt/data/products.csv"]
    for p in candidates:
        if os.path.exists(p):
            return p
    return None

csv_path = _find_csv()
if csv_path is None:
    print("❌ Could not find 'products.csv'. Place it next to this notebook and rerun.")
else:
    try:
        products_df = load_products_csv(csv_path)
        print("Loaded:", csv_path, "shape=", products_df.shape)
    except Exception as e:
        print("❌ Failed to load CSV:", repr(e))
        products_df = None

    if products_df is not None:
        try:
            out = aggregate_product_ratings(products_df)
        except Exception as e:
            print("❌ aggregate_product_ratings raised an exception:", repr(e))
            out = None

        if out is None:
            print("⚠️ Function returned None.")
        else:
            # Basic contract checks
            expected_cols = {"product", "Average_Rating", "Num_Ratings"}
            if not isinstance(out, pd.DataFrame):
                print("⚠️ Expected a DataFrame, got:", type(out))
            elif not expected_cols.issubset(out.columns):
                print("⚠️ Missing expected columns. Got:", list(out.columns))
            else:
                # Light sanity prints
                print(out.head(10))
                # If the dataset matches the provided sample (6 products, 10 ratings each),
                # some light sanity checks can be useful:
                try:
                    assert out['Num_Ratings'].max() >= 10, "Expected at least 10 ratings for some product"
                except Exception as e:
                    print("ℹ️ Skipping strict assertions:", e)
                print("✅ Part 2 validation executed.")
