In [3]:
import requests
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()
APIFY_TOKEN = os.getenv("APIFY_API_KEY")

# Ensure the token is present
if not APIFY_TOKEN:
    raise OSError("APIFY_API_KEY not found in environment variables.")

# TikTok video URL to scrape
tiktok_url = "https://www.tiktok.com/@cioccatalin1/video/7456347160892706070"

# API endpoint
run_url = f"https://api.apify.com/v2/acts/clockworks~tiktok-comments-scraper/run-sync-get-dataset-items?token={APIFY_TOKEN}"

# Input payload for the actor
payload = {
    "postURLs": [tiktok_url],
    "commentsPerPost": 50,
    "maxRepliesPerComment": 0,
    "resultsPerPage": 50,
}

# Make the POST request
response = requests.post(run_url, json=payload)
response.raise_for_status()
comments = response.json()

# Print a few comments
for i, comment in enumerate(comments[:5], start=1):
    print(f"{i}. {comment.get('text')} (by {comment.get('username')})")

1. La Ferrari più bella che ci sia!! (by None)
2. ma quanto è bella sempre intramontabile 💪 (by None)
3. ma davvero c'e' chi preferisce la countach? (by None)
4. Le supersportive Italiane,sono le migliori in assoluto....... (by None)
5. Bellezza e fascino senza tempo😍😍😍, la Ferrari testa rossa ha linee belle. che sembra uscita ora dalla fabbrica (by None)


In [None]:
"""
Fetch TikTok search results + comments in one go.

Requires:
    pip install requests python-dotenv
"""

from __future__ import annotations
import os
import requests
from dotenv import load_dotenv
from typing import List, Dict, Any

# ──────────────────────────────────────────────────────────────────────────────
#  Environment & constants
# ──────────────────────────────────────────────────────────────────────────────
load_dotenv()
APIFY_TOKEN = os.getenv("APIFY_API_KEY")
if not APIFY_TOKEN:
    raise OSError("APIFY_API_KEY not found in environment variables (.env)")

SEARCH_ACTOR = "epctex~tiktok-search-scraper"
COMMENT_ACTOR = "clockworks~tiktok-comments-scraper"

SEARCH_URL = f"https://api.apify.com/v2/acts/{SEARCH_ACTOR}/run-sync-get-dataset-items?token={APIFY_TOKEN}"
COMMENT_URL = f"https://api.apify.com/v2/acts/{COMMENT_ACTOR}/run-sync-get-dataset-items?token={APIFY_TOKEN}"


# ──────────────────────────────────────────────────────────────────────────────
#  Helpers
# ──────────────────────────────────────────────────────────────────────────────
def _search_tiktok(keyword: str, max_items: int = 10) -> list[dict[str, Any]]:
    """Run the search actor and return raw dataset items."""
    payload = {
        "search": [keyword],
        "maxItems": max_items,
        # The actor’s input schema requires `proxy`
        "proxy": {"useApifyProxy": True},
    }
    resp = requests.post(SEARCH_URL, json=payload, timeout=300)
    resp.raise_for_status()
    return resp.json()  # list of video objects


def _fetch_comments(video_url: str, limit: int = 20) -> list[dict[str, Any]]:
    """Run the comment actor for a single TikTok video URL."""
    payload = {
        "postURLs": [video_url],
        "commentsPerPost": limit,
        "maxRepliesPerComment": 0,
        "resultsPerPage": limit,
    }
    resp = requests.post(COMMENT_URL, json=payload, timeout=300)
    resp.raise_for_status()
    return resp.json()  # list of comment dicts


# ──────────────────────────────────────────────────────────────────────────────
#  Public function
# ──────────────────────────────────────────────────────────────────────────────
def fetch_tiktok_search_with_comments(
    keyword: str,
    num_videos: int = 10,
    comments_per_video: int = 20,
) -> list[dict[str, Any]]:
    """
    Search TikTok and enrich each result with up to `comments_per_video` comments.

    Args:
        keyword (str): Search term.
        num_videos (int): How many videos to return (≤ 100, actor limit).
        comments_per_video (int): Comment count for each video (≤ 500).

    Returns:
        List[Dict]: Each dict is the original search item plus "comments".
    """
    results: list[dict[str, Any]] = []

    # 1️⃣ Search TikTok
    video_items = _search_tiktok(keyword, max_items=num_videos)

    # 2️⃣ For each video, grab comments
    for item in video_items[:num_videos]:
        # Different actors use slightly different field names; try common ones.
        video_url = (
            item.get("url")
            or item.get("videoUrl")
            or item.get("itemUrl")
            or item.get("shareUrl")
        )
        if not video_url:
            # Skip if we cannot find a usable URL.
            continue

        try:
            comments = _fetch_comments(video_url, limit=comments_per_video)
        except Exception as err:
            # Keep the video record even if comments fail, but mark the error.
            comments = []
            item["comment_error"] = str(err)

        # Attach comments to the original item
        item["comments"] = comments
        results.append(item)

    return results


# ──────────────────────────────────────────────────────────────────────────────
#  Example usage
# ──────────────────────────────────────────────────────────────────────────────

data = fetch_tiktok_search_with_comments(
    "ferrari testarossa", num_videos=10, comments_per_video=10
)
# Pretty-print first record
import json
import pprint

pprint.pprint(json.dumps(data[0], indent=2, ensure_ascii=False) if data else "No data")

# save the results to a file
with open("tiktok_search_results.json", "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

('{\n'
 '  "url": "https://www.tiktok.com/@therealvarryx/video/7488448515195096342",\n'
 '  "id": "7488448515195096342",\n'
 '  "desc": "Gotta love the \\"fake F40\\" 😅 #ferrari #testarossa '
 '#koenigspecials ",\n'
 '  "createTime": "1743540293",\n'
 '  "scheduleTime": 0,\n'
 '  "video": {\n'
 '    "id": "7488448515195096342",\n'
 '    "height": 1024,\n'
 '    "width": 576,\n'
 '    "duration": 27,\n'
 '    "ratio": "540p",\n'
 '    "cover": '
 '"https://p16-common-sign-no.tiktokcdn-us.com/tos-no1a-p-0037-no/oo4xUzADPABzAEBQCE1sQyIw9iIBxeDAkSiECF~tplv-tiktokx-origin.image?dr=9636&x-expires=1749510000&x-signature=uMJCBvueX4OmFI1DbfImlLU9X7A%3D&t=4d5b0474&ps=13740610&shp=81f88b70&shcp=43f4a2f9&idc=useast8",\n'
 '    "originCover": '
 '"https://p16-pu-sign-no.tiktokcdn-eu.com/obj/tos-no1a-p-0037-no/oQBLfAP0GUjfQnNIXKAfnQQMsALI2hsDYZ8oET?lk3s=81f88b70&x-expires=1749510000&x-signature=8fnuY6wn%2BQ42cJklJM5RZwltkKI%3D&shp=81f88b70&shcp=-",\n'
 '    "dynamicCover": '
 '"https://p16-common-si

In [10]:
data[0]["comments"]

[{'videoWebUrl': 'https://www.tiktok.com/@therealvarryx/video/7488448515195096342',
  'submittedVideoUrl': 'https://www.tiktok.com/@therealvarryx/video/7488448515195096342',
  'input': 'https://www.tiktok.com/@therealvarryx/video/7488448515195096342',
  'cid': '7488954223842706198',
  'createTime': 1743658048,
  'createTimeISO': '2025-04-03T05:27:28.000Z',
  'text': 'the venturi 400gt is better.',
  'diggCount': 862,
  'likedByAuthor': False,
  'pinnedByAuthor': False,
  'repliesToId': None,
  'replyCommentTotal': 32,
  'uid': '7334804139882497056',
  'uniqueId': '2jz_pro',
  'avatarThumbnail': 'https://p16-pu-sign-no.tiktokcdn-eu.com/tos-no1a-avt-0068c001-no/7483ac5c52dcc9a957e09f5795ba18c4~tplv-tiktokx-cropcenter:100:100.jpg?dr=10399&refresh_token=274a8713&x-expires=1749423600&x-signature=21o2bSQEkou7aaXShi%2FXK7vbbGQ%3D&t=4d5b0474&ps=13740610&shp=30310797&shcp=ff37627b&idc=no1a'},
 {'videoWebUrl': 'https://www.tiktok.com/@therealvarryx/video/7488448515195096342',
  'submittedVideoUr

In [None]:
import json
import pandas as pd
from tqdm import tqdm
from pathlib import Path

# Load enriched sentiment JSON data
file_path = Path("/mnt/data/tiktok_enriched_with_sentiment.json")
with file_path.open(encoding="utf-8") as f:
    enriched_data = json.load(f)


def aggregate_sentiment_scores(data: list[dict]) -> pd.DataFrame:
    """
    Aggregates sentiment scores from enriched TikTok data.

    Args:
        data (list of dict): Enriched TikTok posts with sentiment-tagged comments.

    Returns:
        pd.DataFrame: Aggregated metrics per video.
    """
    rows = []

    for video in tqdm(data, desc="Processing sentiment aggregation"):
        video_id = video.get("id")
        likes = video.get("diggCount", 0)
        shares = video.get("shareCount", 0)
        plays = video.get("playCount", 0)
        collections = video.get("collectCount", 0)
        comments = video.get("comments", [])

        sentiments = [
            c for c in comments if isinstance(c, dict) and c.get("sentiment") != "error"
        ]

        num_comments = len(sentiments)
        num_positive = sum(1 for c in sentiments if c["sentiment"] == "positive")
        num_negative = sum(1 for c in sentiments if c["sentiment"] == "negative")
        num_neutral = sum(1 for c in sentiments if c["sentiment"] == "neutral")
        avg_score = (
            sum(c["score"] for c in sentiments) / num_comments if num_comments else 0
        )

        rows.append(
            {
                "video_id": video_id,
                "likes": likes,
                "shares": shares,
                "plays": plays,
                "collections": collections,
                "num_comments": num_comments,
                "num_positive": num_positive,
                "num_negative": num_negative,
                "num_neutral": num_neutral,
                "avg_sentiment_score": avg_score,
                "positive_ratio": num_positive / num_comments if num_comments else 0,
                "negative_ratio": num_negative / num_comments if num_comments else 0,
            }
        )

    return pd.DataFrame(rows)


# Run aggregation on loaded data
summary_df = aggregate_sentiment_scores(enriched_data)

# Show the DataFrame to the user
display_dataframe_to_user("TikTok Video Sentiment Summary", summary_df)

In [None]:
import json
import pandas as pd
from pathlib import Path

# ──────────────────────────────────────────────────────────────────────────────
#  Load sample search‑results file produced earlier
# ──────────────────────────────────────────────────────────────────────────────
file_path = Path(
    "/home/ettore/projects/hackathons/MVA_hackathon_2025/vintage_ai/data/raw/tiktok_search_results.json"
)
if not file_path.exists():
    raise FileNotFoundError(f"Sample dataset not found at {file_path}")

with file_path.open(encoding="utf-8") as f:
    raw_data = json.load(f)

# ──────────────────────────────────────────────────────────────────────────────
#  Cleaning helper
# ──────────────────────────────────────────────────────────────────────────────


def _as_int(val):
    """Convert `val` to int safely (handles str / None)."""
    try:
        return int(val)
    except (TypeError, ValueError):
        return 0


def clean_tiktok_data(records):
    """
    Keep only engagement counters + plain comment texts.

    Returns a list like:
    {
        "id": "7488448515195096342",
        "diggCount": 48500,
        "shareCount": 1275,
        "commentCount": 450,
        "playCount": 398300,
        "collectCount": 5813,
        "comments": ["text1", "text2", ...]
    }
    """
    cleaned = []
    for item in records:
        stats = item.get("stats", {}) or item.get("statsV2", {})
        cleaned.append(
            {
                "id": item.get("id") or item.get("video", {}).get("id"),
                "diggCount": _as_int(stats.get("diggCount")),
                "shareCount": _as_int(stats.get("shareCount")),
                "commentCount": _as_int(stats.get("commentCount")),
                "playCount": _as_int(stats.get("playCount")),
                "collectCount": _as_int(stats.get("collectCount")),
                "comments": [
                    c.get("text")
                    for c in item.get("comments", [])
                    if isinstance(c, dict) and c.get("text")
                ],
            }
        )
    return cleaned


cleaned = clean_tiktok_data(raw_data)

# Show a preview of the cleaned structure
print("First cleaned record:")
print(json.dumps(cleaned[0], indent=2, ensure_ascii=False))

# ──────────────────────────────────────────────────────────────────────────────
#  Aggregate table for quick inspection
# ──────────────────────────────────────────────────────────────────────────────
df = pd.DataFrame(
    [{k: v for k, v in item.items() if k != "comments"} for item in cleaned]
)
df.sort_values("diggCount", ascending=False, inplace=True, ignore_index=True)

# Save cleaned output for downstream use
output_path = Path(
    "/home/ettore/projects/hackathons/MVA_hackathon_2025/vintage_ai/data/processed/tiktok_cleaned_results.json"
)
with output_path.open("w", encoding="utf-8") as f:
    json.dump(cleaned, f, indent=2, ensure_ascii=False)

print(f"\n✅ Cleaned data written to: {output_path}")

First cleaned record:
{
  "id": "7488448515195096342",
  "diggCount": 48500,
  "shareCount": 1275,
  "commentCount": 450,
  "playCount": 398300,
  "collectCount": 5813,
  "comments": [
    "the venturi 400gt is better.",
    "koenig specials r better than any ferrari🥀",
    "А вы тоже у мамы просите Феррари ф40?",
    "infernus GTA sa",
    "i want this problem",
    "это даже как будто лучше, не так заезженно",
    "ente yakin itu F40 😅😅",
    "that a testarossa koenig ?",
    "I saw a red one in person and it is beautiful.",
    "es un venturi 400 GT"
  ]
}

✅ Cleaned data written to: /home/ettore/projects/hackathons/MVA_hackathon_2025/vintage_ai/data/processed/tiktok_cleaned_results.json


In [None]:
import os

print("\nCurrent working directory:", os.getcwd())


Current working directory: /home/ettore/projects/hackathons/MVA_hackathon_2025/vintage_ai


In [None]:
import json
import pandas as pd
from transformers import pipeline
from collections import Counter
from pathlib import Path

# Load cleaned data with comment texts only
with open("data/processed/tiktok_cleaned_results.json", encoding="utf-8") as f:
    data = json.load(f)

# Load local model from disk
pipe = pipeline(
    "text-classification", model="data/models/sentiment_analysis/tabularisai"
)

# Prepare a list of rows for aggregation
agg_results = []

for video in data:
    video_id = video.get("id")
    comments = video.get("comments", [])

    if not comments:
        continue

    # Run sentiment classification in batch
    predictions = pipe(comments, batch_size=8, truncation=True)

    # Extract labels and scores
    scores = [pred["score"] for pred in predictions]
    labels = [pred["label"] for pred in predictions]

    # Aggregate
    avg_score = sum(scores) / len(scores)
    label_counts = Counter(labels)
    most_common_label = label_counts.most_common(1)[0][0]

    agg_results.append(
        {
            "num_comments": len(comments),
            "avg_sentiment_score": avg_score,
            "most_common_sentiment": most_common_label,
            "likes": video.get("diggCount", 0),
            "shares": video.get("shareCount", 0),
            "plays": video.get("playCount", 0),
            "collections": video.get("collectCount", 0),
        }
    )

# Convert to DataFrame for easy inspection
df = pd.DataFrame(agg_results)
df.to_csv("data/processed/tiktok_sentiment_aggregated_by_video.csv", index=False)
df.to_json(
    "data/processed/tiktok_sentiment_aggregated_by_video.json",
    orient="records",
    indent=2,
    force_ascii=False,
)

print("✅ Aggregated sentiment written to:")
print("- tiktok_sentiment_aggregated_by_video.csv")
print("- tiktok_sentiment_aggregated_by_video.json")

Device set to use cuda:0


✅ Aggregated sentiment written to:
- tiktok_sentiment_aggregated_by_video.csv
- tiktok_sentiment_aggregated_by_video.json


In [None]:
df.head(10)  # Display first 10 rows for quick inspection

Unnamed: 0,num_comments,avg_sentiment_score,most_common_sentiment,likes,shares,plays,collections
0,10,0.393252,Neutral,48500,1275,398300,5813
1,10,0.375641,Neutral,3547,146,49900,324
2,7,0.397792,Very Positive,794,31,12400,60
3,10,0.37545,Neutral,4439,82,100600,558
4,1,0.24696,Neutral,49,3,612,3
5,10,0.50113,Very Positive,15300,2618,257400,1960
6,10,0.426919,Very Positive,177200,7357,741800,11919
7,10,0.419819,Very Negative,10200,428,66900,1588
8,10,0.446324,Neutral,4495,246,52400,270
9,10,0.524026,Very Positive,13200,1516,106300,1583


In [None]:
# compute the engagement score as the rounded int sum of (Total Engagements / Total Views) * 100
def compute_engagement_score(row):
    total_engagements = (
        row["likes"] + row["shares"] + row["collections"] + row["num_comments"]
    )
    if row["plays"] > 0:
        return round((total_engagements / row["plays"]) * 100)
    return 0


# Apply the engagement score computation
df["engagement_score"] = df.apply(compute_engagement_score, axis=1)
# Save the updated DataFrame with engagement scores
df.to_json(
    "data/processed/tiktok_sentiment_aggregated_by_video.json",
    orient="records",
    indent=2,
    force_ascii=False,
)

In [None]:
df.head(10)  # Display first 10 rows for quick inspection

Unnamed: 0,num_comments,avg_sentiment_score,most_common_sentiment,likes,shares,plays,collections,engagement_score,overall_sentiment_score
0,10,0.393252,Neutral,48500,1275,398300,5813,14,5
1,10,0.375641,Neutral,3547,146,49900,324,8,5
2,7,0.397792,Very Positive,794,31,12400,60,7,5
3,10,0.37545,Neutral,4439,82,100600,558,5,5
4,1,0.24696,Neutral,49,3,612,3,9,5
5,10,0.50113,Very Positive,15300,2618,257400,1960,8,5
6,10,0.426919,Very Positive,177200,7357,741800,11919,26,5
7,10,0.419819,Very Negative,10200,428,66900,1588,18,5
8,10,0.446324,Neutral,4495,246,52400,270,10,5
9,10,0.524026,Very Positive,13200,1516,106300,1583,15,5


In [None]:
aggregated_result = df.median(numeric_only=True).to_dict()
aggregated_result

{'num_comments': 10.0,
 'avg_sentiment_score': 0.40880534989493233,
 'likes': 7347.5,
 'shares': 337.0,
 'plays': 83750.0,
 'collections': 1070.5,
 'engagement_score': 9.5,
 'overall_sentiment_score': 5.0}

In [18]:
def _load_pipeline(model_path: str) -> Any:

    try:
        pipe = pipeline(
            "text-classification", model="tabularisai/multilingual-sentiment-analysis"
        )

        # Save the model and tokenizer to a local directory
        pipe.save_pretrained(model_path)
        # Load the classification pipeline with the specified model
        pipe = pipeline("text-classification", model=model_path, tokenizer=model_path)
    except Exception as e:
        logging.error("Failed to load sentiment pipeline: %s", e)
        raise
    return pipe


pipe = _load_pipeline(MODEL_PATH)

Device set to use cuda:0
Device set to use cuda:0


In [None]:
# tiktok_sentiment.py
from __future__ import annotations
import os
import json
import logging
import requests
from typing import Any, Dict, List
from collections import Counter
from dotenv import load_dotenv
from statistics import median
from pydantic import BaseModel, Field, ValidationError
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

load_dotenv()
APIFY_TOKEN = os.getenv("APIFY_API_KEY")
print("APIFY_API_KEY:", APIFY_TOKEN)
MODEL_PATH = os.getenv(
    "SENTIMENT_MODEL_PATH", "data/models/sentiment_analysis/tabularisai"
)
TIKTOK_RAW_DATA_PATH = os.getenv(
    "TIKTOK_RAW_DATA_PATH", "data/raw/tiktok_search_results.json"
)


class Metrics(BaseModel):
    num_comments: int
    avg_sentiment_score: float
    most_common_sentiment: str
    likes: int
    shares: int
    plays: int
    collections: int
    engagement_score: int
    overall_sentiment_score: int


SEARCH_ACTOR = "epctex~tiktok-search-scraper"
COMMENT_ACTOR = "clockworks~tiktok-comments-scraper"
SEARCH_URL = f"https://api.apify.com/v2/acts/{SEARCH_ACTOR}/run-sync-get-dataset-items?token={APIFY_TOKEN}"
COMMENT_URL = f"https://api.apify.com/v2/acts/{COMMENT_ACTOR}/run-sync-get-dataset-items?token={APIFY_TOKEN}"


logging.basicConfig(
    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
)


def aggregate_tiktok_sentiment(
    keyword: str, num_videos: int = 10, comments_per_video: int = 20, save_raw=False
) -> dict[str, Any]:
    """
    One-shot helper – search TikTok, grab comments, run sentiment and
    return **median** engagement / sentiment metrics as a plain dict.
    On any fatal error we log and return {} (never crash the caller).
    """

    def _load_pipeline(model_path: str) -> Any:

        try:
            pipe = pipeline(
                "text-classification",
                model="tabularisai/multilingual-sentiment-analysis",
            )

            # Save the model and tokenizer to a local directory
            pipe.save_pretrained(model_path)
            # Load the classification pipeline with the specified model
            pipe = pipeline(
                "text-classification", model=model_path, tokenizer=model_path
            )
        except Exception as e:
            logging.error("Failed to load sentiment pipeline: %s", e)
            raise
        return pipe

    def _post(url: str, payload: dict) -> list[dict]:
        r = requests.post(url, json=payload, timeout=300)
        r.raise_for_status()
        return r.json()

    def _search(term: str, limit: int) -> list[dict]:
        return _post(
            SEARCH_URL,
            {"search": [term], "maxItems": limit, "proxy": {"useApifyProxy": True}},
        )

    def _comments(video_url: str, limit: int) -> list[dict]:
        return _post(
            COMMENT_URL,
            {
                "postURLs": [video_url],
                "commentsPerPost": limit,
                "maxRepliesPerComment": 0,
                "resultsPerPage": limit,
            },
        )

    def _clean(item: dict) -> dict:
        s = item.get("stats", {}) or item.get("statsV2", {})

        def _i(x):
            try:
                return int(x)
            except:
                return 0

        return dict(
            likes=_i(s.get("diggCount")),
            shares=_i(s.get("shareCount")),
            plays=_i(s.get("playCount")),
            collections=_i(s.get("collectCount")),
            comments=[
                c.get("text") for c in item.get("comments", []) if isinstance(c, dict)
            ],
        )

    try:
        if not APIFY_TOKEN:
            raise OSError("APIFY_API_KEY missing")
        videos = [
            v
            for v in tqdm(
                _search(keyword, num_videos), desc="Fetching tiktok videos..."
            )
            if v
        ]
        for v in tqdm(videos[:num_videos], desc="Fetching comments..."):
            url = v.get("url") or v.get("videoUrl") or v.get("shareUrl")
            try:
                v["comments"] = _comments(url, comments_per_video) if url else []
            except Exception as e:
                logging.warning("comment fetch failed for %s: %s", url, e)
                v["comments"] = []

        cleaned = [_clean(v) for v in videos if v.get("comments")]
        if not cleaned:
            raise ValueError("no data")

        if save_raw:
            output_path = Path(TIKTOK_RAW_DATA_PATH)
            with output_path.open("w", encoding="utf-8") as f:
                json.dump(videos, f, indent=2, ensure_ascii=False)
            logging.info("Raw search results saved to %s", output_path)

        pipe = _load_pipeline(MODEL_PATH)
        rows = []
        for rec in tqdm(cleaned, desc="Processing comments for sentiment"):
            if not rec["comments"]:
                continue
            preds = pipe(rec["comments"], batch_size=8, truncation=True)
            scores = [p["score"] for p in preds]
            labels = [p["label"].lower() for p in preds]
            total = len(preds)
            total_eng = rec["likes"] + rec["shares"] + rec["collections"] + total
            rows.append(
                dict(
                    num_comments=total,
                    avg_sentiment_score=sum(scores) / total,
                    most_common_sentiment=Counter(labels).most_common(1)[0][0],
                    likes=rec["likes"],
                    shares=rec["shares"],
                    plays=rec["plays"],
                    collections=rec["collections"],
                    engagement_score=(
                        round((total_eng / rec["plays"]) * 100) if rec["plays"] else 0
                    ),
                    overall_sentiment_score=5,  # placeholder
                )
            )

        df = pd.DataFrame(rows)
        med = df.median(numeric_only=True).to_dict()
        med["most_common_sentiment"] = df["most_common_sentiment"].mode().iat[0]
        return Metrics(**med).dict()

    except Exception as err:
        logging.error("aggregate_tiktok_sentiment failed: %s", err)
        return {}

APIFY_API_KEY: apify_api_jCo7dkRZE40PO4ILEAmtScQqld1b3N46q9Be


In [13]:
print(
    json.dumps(
        aggregate_tiktok_sentiment("ferrari testarossa", 5, 10),
        indent=2,
        ensure_ascii=False,
    )
)

Fetching videos...: 100%|██████████| 5/5 [00:00<00:00, 31775.03it/s]
Fetching comments...: 100%|██████████| 5/5 [01:28<00:00, 17.74s/it]
2025-06-08 03:22:34,767 [ERROR] aggregate_tiktok_sentiment failed: Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'data/models/sentiment_analysis/tabularisai'. Use `repo_type` argument if needed.


{}
