In [1]:
!pip install beautifulsoup4



In [2]:
import requests
from bs4 import BeautifulSoup

In [4]:
url = "https://letterboxd.com/film/jujutsu-kaisen/reviews/by/activity/page/1"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
html = soup.find_all('div', class_='listitem')

In [5]:
print(html)

[<div class="listitem"> <article class="production-viewing -viewing" data-js-treasure-hunt="index-target"> <a class="avatar -a40" href="/romanticize/"> <img alt="sidra" height="40" src="https://a.ltrbxd.com/resized/avatar/upload/3/0/8/1/8/4/9/shard/avtr-0-80-0-80-crop.jpg?v=9e78640176" width="40"/> </a> <div class="body"> <div class="content-reactions-strip -viewing"> <span class="rating -green rated-8"> ★★★★ </span> <span class="attribution-detail"> <a class="context" href="/romanticize/film/jujutsu-kaisen/"> Watched by <span class="owner"><strong class="displayname">sidra</strong></span> </a> </span> <span class="date"><time class="timestamp" datetime="2021-05-19">19 May 2021</time></span> <a class="inlineicon icon-16 -ignoreactionpseudo icon-comment" href="/romanticize/film/jujutsu-kaisen/#comments"><span class="icon" role="presentation"></span><span class="label">12</span></a> </div> <div class="js-review"> <p class="body-text -prose js-spoiler-container" data-watchable-uid="film:6

In [None]:
import re
import csv
import os

def _to_int(s, default=0):
    if s is None:
        return default
    s = re.sub(r"[^\d]", "", s)
    return int(s) if s else default

def _parse_stars_text(text: str):
    if not text:
        return None
    text = text.strip()
    full = text.count('★')
    half = 0.5 if '½' in text else 0.0
    val = full + half
    return val if val > 0 else None

def parse_article(art):
    data = {}

    # 1) username
    user = art.select_one('.attribution-detail .displayname')
    if user:
        data['username'] = user.get_text(strip=True)
    else:
        avatar = art.select_one('a.avatar img[alt]')
        data['username'] = avatar['alt'].strip() if avatar and avatar.has_attr('alt') else None

    # 2) tanggal review
    t = art.select_one('.date time.timestamp')
    data['tanggal_review'] = (t.get('datetime') or t.get_text(strip=True)) if t else None

    # 3) jumlah komentar
    c = art.select_one('a.icon-comment .label')
    data['jumlah_komentar'] = _to_int(c.get_text(strip=True) if c else None)

    # 4) text review
    body = art.select_one('.js-review-body')
    if body:
        for br in body.select('br'):
            br.replace_with(' ')
        data['text_review'] = body.get_text(" ", strip=True)
    else:
        p = art.select_one('.js-review p.body-text')
        data['text_review'] = p.get_text(" ", strip=True) if p else None

    # 5) isLike (badge "Liked" di header)
    data['isLike'] = art.select_one('.content-reactions-strip .icon-liked') is not None

    # 6) jumlah like user lain
    like_block = art.select_one('p.like-link-target')
    if like_block and like_block.has_attr('data-count'):
        data['jumlah_like'] = _to_int(like_block['data-count'])
    else:
        a_count = art.select_one('p.like-link-target a[href*="/likes/"]')
        data['jumlah_like'] = _to_int(a_count.get_text(strip=True) if a_count else None)

    # 7) jumlah bintang (0–5; bisa 0.5)
    rating_span = art.select_one('.content-reactions-strip .rating')
    stars = None
    if rating_span:
        cls = ' '.join(rating_span.get('class', []))
        m = re.search(r'rated-(\d+)', cls)
        if m:
            stars = int(m.group(1)) / 2.0
        else:
            stars = _parse_stars_text(rating_span.get_text())
    data['jumlah_bintang'] = stars

    return data

# --- main: fetch 1 halaman dan ekspor CSV ---
url = "https://letterboxd.com/film/jujutsu-kaisen/reviews/by/activity/page/1"
headers = {
    "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                   "(KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36")
}
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()

soup = BeautifulSoup(response.content, 'html.parser')
listitems = soup.find_all('div', class_='listitem')

results = []
for li in listitems:
    art = li.select_one('article.production-viewing')
    if not art:  # skip ikon sosial, dsb.
        continue
    results.append(parse_article(art))

# --- export to CSV ---
outfile = 'reviews_jujutsu_kaisen_p1.csv'
fieldnames = [
    'username',
    'tanggal_review',
    'jumlah_komentar',
    'text_review',
    'isLike',
    'jumlah_like',
    'jumlah_bintang',
]

with open(outfile, 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
    writer.writeheader()
    for row in results:
        writer.writerow(row)

print(f"Saved: {os.path.abspath(outfile)}")


Saved: /content/reviews_jujutsu_kaisen_p1.csv
