# Ed Helper

This notebook is meant to assist with [Between-Class Participation grading](assistant_guide.md#between-class-participation-tracking). To use:

1. Download the Discussion data
   1. [Go to Ed]({{discussions_url}})
   1. Open analytics
   1. Download the Threads JSON
1. [Make a copy of this notebook](../assignments.md#getting-started)
1. [Upload the data](../assignments/open_ended.md#storing-data)
1. Adjust the filename and dates below
1. Run all cells in the notebook
1. Review the student contributions at the bottom


In [None]:
import pandas as pd

FILENAME = "FILEPATH.json"
# dates are inclusive; 1-based
START = pd.Timestamp(year=2026, month=1, day=28, tz="US/Eastern")
# add a day, since the default time is 00:00:00
END = pd.Timestamp(year=2026, month=2, day=3, tz="US/Eastern") + pd.Timedelta(days=1)

## Load data


In [None]:
import json
import pandas as pd

data = json.load(open(FILENAME))
threads = pd.json_normalize(data)
# threads

In [None]:
# threads.info()

## Include replies

The JSON data includes reples (comments and answers) as nested under each post.


In [None]:
def flatten_nested_items(items_df):
    """
    Recursively flatten all nested comments/answers at any depth.

    Args:
        items_df: DataFrame with potential nested comments or answers

    Returns:
        Flattened DataFrame with all nested items included
    """
    if items_df.empty:
        return items_df

    all_nested = []

    # Extract nested items from both "comments" and "answers" fields
    for field in ["comments", "answers"]:
        if field in items_df.columns:
            nested_json = items_df[field].explode().dropna()
            nested = pd.json_normalize(nested_json)
            if not nested.empty:
                all_nested.append(nested)

    if not all_nested:
        return items_df

    # Combine all nested items
    nested_combined = pd.concat(all_nested).reset_index(drop=True)

    # Recursively flatten the nested items
    nested_flattened = flatten_nested_items(nested_combined)

    # Combine original items with flattened nested items
    return pd.concat([items_df, nested_flattened]).reset_index(drop=True)


# Flatten at all levels
posts = flatten_nested_items(threads)
posts

In [None]:
posts["created_at"] = pd.to_datetime(posts["created_at"])
# posts["created_at"]

## Filter


In [None]:
output = posts[(posts["created_at"] >= START) & (posts["created_at"] < END)]
print(output["created_at"].min())
print(output["created_at"].max())

## Prep output


In [None]:
# exclude the instructors
output = output[output["user.role"] != "admin"]

# sort by name
output = output.sort_values(["user.name", "created_at"])

# only include a subset of the columns
output = output[
    [
        "user.name",
        "url",
        # "created_at",
        # "title",
        "text",
    ]
]

# make links clickable
# https://stackoverflow.com/a/20043785/358804
output["url"] = output["url"].apply(lambda url: f'<a href="{url}">Open</a>')

# render newlines
# https://stackoverflow.com/a/56881411/358804
styled = output.style.set_properties(
    **{
        "text-align": "left",
        "white-space": "pre-wrap",
    }
)

## Output


In [None]:
from IPython.display import HTML

HTML(styled.to_html(escape=False))