In [23]:
# Imports
import re
import certifi
import urllib3
import pandas as pd

In [33]:
# Web Scrape

# Setting up Request Pool
http = urllib3.PoolManager(ca_certs=certifi.where())

link = "https://www.metacritic.com/browse/movies/score/metascore/year/filtered?year_selected=2014"

# Personal User Agent. Prevents looping during web scraping...I hope
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/109.0.0.0 Safari/537.36"

# Web Request
r = http.request('GET', link, headers={"User-Agent": user_agent})

# Convert Data to Usable Text
webData = str(r.data, "utf-8")

# Verify Data
print(f"Request Successful: {r.status == 200}")
print(f"Characters Scraped: {len(webData)}")

Request Successful: True
Characters Scraped: 512496


In [61]:
# Regular Expressions. Checks the data we scraped, then puts it into respective lists.

titles = list(re.findall("class=\"title\"><h3>(.*)<\/h3><\/a>", webData))
dates = list(re.findall("<span>(.*,\s2014)<\/span>", webData))
scores = list(re.findall("class=\"clamp-score-wrap\">\s*<a class=\"metascore_anchor\" href=\".*\">\n<div class=\"metascore_w large movie positive.*\">(.*)<\/div>", 
                         webData))
summaries = list(re.findall("class=\"summary\">\n                        (.*)", webData))
thumbnails = list(re.findall("<a href=\"\/movie\/\S*\"><img src=\"(.*)\"", webData))

# Verify RegEx works. Should find 100 each.
print(f"Found {len(titles)} Titles, {len(dates)} Dates, {len(scores)} Scores, {len(summaries)} Summaries, {len(thumbnails)} Images")

Found 100 Titles, 100 Dates, 100 Scores, 100 Summaries, 100 Images


In [68]:
# Data Visualization

data = {'Title': titles,
        'Date': dates,
        'Score': scores,
        'Summary': summaries,
        'Thumbnail URL': thumbnails}

df = pd.DataFrame(data)

# Some basic styling
df.index += 1
pd.set_option('display.colheader_justify', 'center')

print(df)

                         Title                               Date        Score                      Summary                                         Thumbnail URL                   
1                                            Boyhood       July 11, 2014   100  Filmed over 12 years with the same cast, Richa...  https://static.metacritic.com/images/products/...
2                                         Mr. Turner   December 19, 2014    94  Mr. Turner explores the last quarter century o...  https://static.metacritic.com/images/products/...
3                                          Leviathan   December 25, 2014    92  Kolia lives in a small town near the Barents S...  https://static.metacritic.com/images/products/...
4                                                Ida         May 2, 2014    91  Poland, 1962. Anna (Agata Trzebuchowska), an e...  https://static.metacritic.com/images/products/...
5                                            Big Men      March 14, 2014    90  Big Men looks a