In [36]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime

LBXD_BASEURL = "https://letterboxd.com/csi/film/{movie}/rating-histogram/"

In [173]:
def score(movie):
  url = LBXD_BASEURL.format(movie=movie)
  resp = requests.get(url).content
  soup = BeautifulSoup(resp)


  
  now = datetime.now()
  try:
    avg_str = soup.select_one(".display-rating")["title"]
  except TypeError:
    metrics = compute_score(soup)
    metrics["timestamp"] = now
    metrics["movie"] = movie
    return metrics
  metrics =  clean_str(avg_str, now)
  metrics["movie"] = movie

  return metrics

def clean_str(s, timestamp):
  toks = s.split(" ")
  rating = float(toks[3])
  count = int(toks[6].split("\xa0")[0].replace(",", ""))
  # datetime object containing current date and time
  return {"rating": rating, "count": count, "timestamp": timestamp}

In [174]:
def guess_note(soup):
  l = soup.select(".rating-histogram")
  print(l)
  pass
  

In [175]:

movie = "the-fabelmans"

print(score(movie))
#clean_str("Weighted average of 4.08 based on 222,119 ratings")


{'rating': 4.08, 'count': 222301, 'timestamp': datetime.datetime(2023, 2, 24, 22, 2, 32, 723213), 'movie': 'the-fabelmans'}


In [176]:
#test with no note
name = "prison-a-domicile"

print(score(name))


{'rating': 2.25, 'count': 6, 'timestamp': datetime.datetime(2023, 2, 24, 22, 4, 9, 807691), 'movie': 'prison-a-domicile'}


Forecasting a note

In [116]:
def parse_stars(stars):

  # early return for half star case
  if stars == "half-★":
    return 0.5
  
  def star_to_num(star):
    if star == "½":
      return 0.5
    elif star == "★":
      return 1
    else:
      print(f"could not parse {star}")
      return None

  
  rating = sum(map(star_to_num, stars))
  return rating

def text_rating_to_num_rating(s):
  
  toks = s.split(" ")
  count = int(toks[0]) if toks[0] != "No" else 0
  rating = parse_stars(toks[1])
  return {"rating": rating, "count": count}


tests = ["3 ★★ ratings (50%)", "No half-★ ratings", "No ★★★★ ratings"]
expected = [{"rating": 2, "count":3}, {"rating":0.5, "count":0}, {"rating": 4, "count": 0}]
for t, e in zip(tests, expected):
  print(t)
  print(e)
  assert text_rating_to_num_rating(t) == e

3 ★★ ratings (50%)
{'rating': 2, 'count': 3}
No half-★ ratings
{'rating': 0.5, 'count': 0}
No ★★★★ ratings
{'rating': 4, 'count': 0}


In [177]:
r = """<section class="section ratings-histogram-chart"> <h2 class="section-heading"><a href="/film/prison-a-domicile/ratings/" class="tooltip" title="Not enough ratings to calculate average">Ratings</a></h2> <div class="rating-histogram clear rating-histogram-exploded"> <span class="rating-green rating-green-tiny rating-1"><span class="rating rated-2">★</span></span> <ul> <li class="rating-histogram-bar tooltip" style="width: 15px; left: 0px" title="No half-★ ratings"><i style="height: 1px"></i></li> <li class="rating-histogram-bar tooltip" style="width: 15px; left: 16px" title="No ★ ratings"><i style="height: 1px"></i></li> <li class="rating-histogram-bar" style="width: 15px; left: 32px"> <a href="/film/prison-a-domicile/ratings/rated/1%C2%BD/by/rating/" class="ir tooltip" title="1&nbsp;★½ rating (17%)">1&nbsp;★½ rating (17%)<i style="height: 15.333333333333332px;"></i></a> </li> <li class="rating-histogram-bar" style="width: 15px; left: 48px"> <a href="/film/prison-a-domicile/ratings/rated/2/by/rating/" class="ir tooltip" title="3&nbsp;★★ ratings (50%)">3&nbsp;★★ ratings (50%)<i style="height: 44.0px;"></i></a> </li> <li class="rating-histogram-bar" style="width: 15px; left: 64px"> <a href="/film/prison-a-domicile/ratings/rated/2%C2%BD/by/rating/" class="ir tooltip" title="1&nbsp;★★½ rating (17%)">1&nbsp;★★½ rating (17%)<i style="height: 15.333333333333332px;"></i></a> </li> <li class="rating-histogram-bar tooltip" style="width: 15px; left: 80px" title="No ★★★ ratings"><i style="height: 1px"></i></li> <li class="rating-histogram-bar" style="width: 15px; left: 96px"> <a href="/film/prison-a-domicile/ratings/rated/3%C2%BD/by/rating/" class="ir tooltip" title="1&nbsp;★★★½ rating (17%)">1&nbsp;★★★½ rating (17%)<i style="height: 15.333333333333332px;"></i></a> </li> <li class="rating-histogram-bar tooltip" style="width: 15px; left: 112px" title="No ★★★★ ratings"><i style="height: 1px"></i></li> <li class="rating-histogram-bar tooltip" style="width: 15px; left: 128px" title="No ★★★★½ ratings"><i style="height: 1px"></i></li> <li class="rating-histogram-bar tooltip" style="width: 15px; left: 144px" title="No ★★★★★ ratings"><i style="height: 1px"></i></li> </ul> <span class="rating-green rating-green-tiny rating-5"><span class="rating rated-10">★★★★★</span></span> </div> </section>"""

soup = BeautifulSoup(r)

def compute_score(soup):
  score = 0.0
  counts = 0

  for star_idx, li in enumerate(soup.select_one(".rating-histogram ul").findAll("li")):
    rating = (star_idx) / 2.0 + 0.5
    if li.text.strip() == "":
      count = 0
    else:
      tok = int(li.text.split("\xa0")[0])
      count = tok
    #print(f"{rating}: {count}")
    score += rating*count
    counts += count
  return {"rating": score / counts, "count": counts}

print(compute_score(soup))

{'rating': 2.25, 'count': 6}


In [199]:
import time
def popular_movies():
  r = requests.get("https://letterboxd.com").text
  soup = BeautifulSoup(r)
  movies = list()
  for movie in soup.select_one(".-p150").findAll("li"):
    movieid = movie["data-film-slug"][6:-1] # move from /film/bienvenue/ to bienvenue
    movies.append(movieid)
  return movies
    

for movie in popular_movies():
  print(f"{movie} : {score(movie)}")
  time.sleep(1)

the-quiet-girl : {'rating': 4.0, 'count': 17877, 'timestamp': datetime.datetime(2023, 2, 24, 22, 17, 46, 254098), 'movie': 'the-quiet-girl'}
inside-2023 : {'rating': 3.32, 'count': 294, 'timestamp': datetime.datetime(2023, 2, 24, 22, 17, 47, 344525), 'movie': 'inside-2023'}
of-an-age : {'rating': 3.75, 'count': 2699, 'timestamp': datetime.datetime(2023, 2, 24, 22, 17, 48, 440147), 'movie': 'of-an-age'}
gods-time : {'rating': 3.47, 'count': 192, 'timestamp': datetime.datetime(2023, 2, 24, 22, 17, 49, 531422), 'movie': 'gods-time'}
m3gan : {'rating': 3.12, 'count': 261843, 'timestamp': datetime.datetime(2023, 2, 24, 22, 17, 50, 626994), 'movie': 'm3gan'}
infinity-pool : {'rating': 3.31, 'count': 86167, 'timestamp': datetime.datetime(2023, 2, 24, 22, 17, 51, 716937), 'movie': 'infinity-pool'}
