In [6]:
# In this code block, I retrieve this text of the metacritic page
import urllib3
import certifi

# Link to metacritic page I used
link = "https://www.metacritic.com/browse/movie/?releaseYearMin=2008&releaseYearMax=2008&page=1"

# Construct a python request pool
http = urllib3.PoolManager(ca_certs=certifi.where())

# Initiate a web request
r = http.request('GET', link, headers={"user-agent": "Chrome/86.0.4240.111"})

# Convert the result from raw bytes to text
datastring = str(r.data, "utf-8")

# Check whether it succeeded and how much data we got from the site
print(f"Fetched {len(datastring)} characters from {link}.")


Fetched 363944 characters from https://www.metacritic.com/browse/movie/?releaseYearMin=2008&releaseYearMax=2008&page=1.


In [30]:
# In this code block, I retrieve the titles of the movies from the text of the metacritic page using regex
import re

#RegEx expression used to find titles on page
titles_expression = '<div data-title="(.*?)"'

# Finds matches using expression 
titles = re.findall(titles_expression, datastring)

# prints how many matches were retrieved, and shows first match to make sure information is correct
print(f"Retrieved {len(titles)} movie titles. First match is '{titles[0]}'.")

Retrieved 24 movie titles. First match is 'WALL-E'.


In [36]:
# In this code block, I retrieve the descriptions of the movies from the text of the metacritic page using regex

#RegEx expression used to find descriptions on page
descriptions_expression = '<div class="c-finderProductCard_description"><span>([^<]*)<'

# Finds matches using expression 
descriptions = re.findall(descriptions_expression, datastring)

# prints how many matches were retrieved, and shows first match to make sure information is correct
print(f"Retrieved {len(descriptions)} movie descriptions. First match is '{descriptions[0]}'.")

Retrieved 24 movie descriptions. First match is 'After hundreds of lonely years of doing what he was built for, Wall-E discovers a new purpose in life when he meets a sleek search robot named EVE. [Walt Disney Pictures]'.


In [35]:
# In this code block, I retrieve the release date of the movies from the text of the metacritic page using regex

#RegEx expression used to find release dates on page
date_expression = '<span class="u-text-uppercase">\n          (.*)'

# Finds matches using expression 
release_date = re.findall(date_expression, datastring)

# prints how many matches were retrieved, and shows first match to make sure information is correct
print(f"Retrieved {len(release_date)} movie release dates. First match is '{release_date[0]}'.")

Retrieved 24 movie release dates. First match is 'Jun 27, 2008'.


In [37]:
# In this code block, I retrieve the Metascore of the movies from the text of the metacritic page using regex

#RegEx expression used to find Metascores on page
metascore_expression = '<div title="Metascore (.*?) '

# Finds matches using expression 
metascore = re.findall(metascore_expression, datastring)

# prints how many matches were retrieved, and shows first match to make sure information is correct
print(f"Retrieved {len(metascore)} movie metascores. First match is '{metascore[0]}'.")

Retrieved 24 movie metascores. First match is '95'.


In [38]:
# In this code block, I retrieve the thumbnail of the movies from the text of the metacritic page using regex

#RegEx expression used to find images on page
thumbnails_expression = '<picture class="c-cmsImage"> <img src="(.*?)"'

# Finds matches using expression 
thumbnails = re.findall(thumbnails_expression, datastring)

# prints how many matches were retrieved
print(f"Retrieved {len(thumbnails)} movie thumbnails - BUT, I now have to replace &amp; with & in the URL!")

# changes &amp; to & for each thumbnail
fixed = []
for url in thumbnails:
    url = url.replace("&amp;", "&")
    fixed.append(url)


Retrieved 24 movie thumbnails - BUT, I now have to replace &amp; with & in the URL!


In [40]:
# Now I print the results in a pretty format
import pandas as pd

data = {
    "Title": titles,
    "Date Released": release_date,
    "Metascore": metascore,
    "Description": descriptions,
    "Thumbnail": thumbnails
}

df = pd.DataFrame(data)

# Import style module form pandas to improve text readability
from pandas.io.formats import style

# Display results with format
left_aligned_df = df.style.set_properties(**{'text-alight': 'left'})
display(left_aligned_df)

Unnamed: 0,Title,Date Released,Metascore,Description,Thumbnail
0,WALL-E,"Jun 27, 2008",95,"After hundreds of lonely years of doing what he was built for, Wall-E discovers a new purpose in life when he meets a sleek search robot named EVE. [Walt Disney Pictures]",https://www.metacritic.com/a/img/resize/2d6454ef8acc62c4b32150898a74c7dd6c01d779/catalog/provider/2/2/2-4edbd70f54caaba2e1896d92332e5523.jpg?auto=webp&fit=cover&height=132&width=88
1,The Hurt Locker,"Jun 26, 2009",95,"When a new sergeant, James (Jeremy Renner), takes over a highly trained bomb disposal team amidst violent conflict, he surprises his two subordinates, Sanborn (Anthony Mackie) and Eldridge (Brian Geraghty), by recklessly plunging them into a deadly game of urban combat. James behaves as if he's indifferent to death. As the men struggle to control their wild new leader, the city explodes into chaos, and James' true character reveals itself in a way that will change each man forever. [Summit Entertainment]",https://www.metacritic.com/a/img/resize/cc33fc019a9e8c0b08b2bc0d2d67330fe79351b4/catalog/provider/2/2/2-adde6969853075cdc993342c0b9221d1.jpg?auto=webp&fit=cover&height=132&width=88
2,Sita Sings the Blues,"Dec 25, 2009",93,"Sita is a goddess separated from her beloved Lord and husband Rama. Nina is an animator whose husband moves to India, then dumps her by email. Three hilarious shadow puppets narrate both ancient tragedy and modern comedy in this beautifully animated interpretation of the Indian epic Ramayana. Set to the 1920's jazz vocals of Annette Hanshaw, Sita Sings the Blues earns its tagline as ""the Greatest Break-Up Story Ever Told.""",https://www.metacritic.com/a/img/resize/2d61205742203f2e592fce84bd0d0d3a00978254/catalog/provider/2/2/2-cba2ff25f20d111f2bccc6beea27e106.jpg?auto=webp&fit=cover&height=132&width=88
3,The Class,"Dec 19, 2008",92,"François and his fellow teachers prepare for a new year at a high school in a tough neighborhood. Armed with the best intentions, they brace themselves to not let discouragement stop them from trying to give the best education to their students. Cultures and attitudes often clash in the classroom, a microcosm of contemporary France. As amusing and inspiring as the teenaged students can be, their difficult behavior can still jeopardize any teacher's enthusiasm for the low-paying job. François insists on an atmosphere of respect and diligence. Neither stuffy nor severe, his extravagant frankness often takes the students by surprise. But his classroom ethics are put to the test when his students begin to challenge his methods. (Sony Classics)",https://www.metacritic.com/a/img/resize/53aa31ad00afe0e7d5c2f7a76fdae08c16f7b446/catalog/provider/2/2/2-76b31c9083d6ba9f630b2183966380d2.jpg?auto=webp&fit=cover&height=132&width=88
4,35 Shots of Rum,"Sep 16, 2009",92,"A widowed conductor, looking forward to retirement, lives with his grown daughter in a Paris suburb. When a neighbor starts to show interest in his ""little girl"", the conductor tries to adjust.",https://www.metacritic.com/a/img/resize/602c5d9343b315e517f61dc99bf713cd8923d3a3/catalog/provider/2/2/2-9ae678e5bdfbc42f2c1a24afe30bb5f6.jpg?auto=webp&fit=cover&height=132&width=88
5,Waltz with Bashir,"Dec 25, 2008",91,"One night at a bar, an old friend tells director Ari about a recurring nightmare in which he is chased by 26 vicious dogs. Every night, the same number of beasts. The two men conclude that there’s a connection to their Israeli Army mission in the first Lebanon War of the early eighties. Ari is surprised that he can’t remember a thing anymore about that period of his life. Intrigued by this riddle, he decides to meet and interview old friends and comrades around the world. He needs to discover the truth about that time and about himself. As Ari delves deeper and deeper into the mystery, his memory begins to creep up in surreal images. [Sony Classics]",https://www.metacritic.com/a/img/resize/befabd95103d13817086fec62cebcb52cc18ead9/catalog/provider/2/2/2-f54ea351385b86b88a94098e752b8b0b.jpg?auto=webp&fit=cover&height=132&width=88
6,Goodbye Solo,"Mar 27, 2009",89,"On the lonely roads of Winston-Salem, North Carolina, two men forge an improbable friendship that will change both of their lives forever. Solo is a Senegalese cab driver working to provide a better life for his young family. William is a tough Southern good ol‘ boy with a lifetime of regrets. One man‘s American dream is just beginning, while the other‘s is quickly winding down. But despite their differences, both men soon realize they need each other more than either is willing to admit. Through this unlikely but unforgettable friendship, Goodbye Solo deftly explores the passing of a generation as well as the rapidly changing face of America. (Roadside Attractions)",https://www.metacritic.com/a/img/resize/875c2e37837fa0198df54bbd7bdd90991e88c98f/catalog/provider/2/2/2-4523bbb63fed7950ed01a885df419094.jpg?auto=webp&fit=cover&height=132&width=88
7,Still Walking,"Aug 21, 2009",89,"Fifteen years ago, Junpei, the youngest son of the Yokoyama family died while rescuing a boy from drowning. On the anniversary of his death, the remaining siblings visit the quaint home of their parents with their families in tow. Over the course of a beautiful day, new relatives become acquainted telling stories and squabbling over sizzling tempura and an elegant graveside ritual is performed for Junpei. (IFC Films)",https://www.metacritic.com/a/img/resize/f3fca107a96abf3e51c0a52dc4f57b8ed7a7d2a7/catalog/provider/2/2/2-7d3ebba92bdfe3156c1f30487cdbf09e.jpg?auto=webp&fit=cover&height=132&width=88
8,Man on Wire,"Jul 25, 2008",89,"On August 7th 1974, a young Frenchman named Philippe Petit stepped out on a wire illegally rigged between New York's twin towers, then the world’s tallest buildings. After nearly an hour dancing on the wire, he was arrested, taken for psychological evaluation, and brought to jail before he was finally released. Following six and a half years of dreaming of the towers, Petit spent eight months in New York City planning the execution of the coup. Aided by a team of friends and accomplices, Petit was faced with numerous extraordinary challenges: he had to find a way to bypass the WTC’s security; smuggle the heavy steel cable and rigging equipment into the towers; pass the wire between the two rooftops; anchor the wire and tension it to withstand the winds and the swaying of the buildings. The rigging was done by night in complete secrecy. At 7:15 AM, Philippe took his first step on the high wire 1,350 feet above the sidewalks of Manhattan. [Magnolia Pictures]",https://www.metacritic.com/a/img/resize/77c99dd651c4878aab87a40ce16235ac47edab4d/catalog/provider/2/2/2-b5aa08a4d39117f96f702134b0312a1e.jpg?auto=webp&fit=cover&height=132&width=88
9,Tulpan,"Apr 1, 2009",88,"Following his Russian naval service, young dreamer Asa returns to his sister’s nomadic brood on the desolate Hunger Steppe to begin a hardscrabble career as a shepherd. But before he can tend a flock of his own, Asa must win the hand of the only eligible bachelorette for miles—his alluringly mysterious neighbor Tulpan. Accompanied by his girlie mag-reading sidekick Boni (and a menagerie of adorable lambs, stampeding camels, mewing kittens and mischievous children), Asa will stop at nothing to prove he is a worthy husband and herder. (Zeitgeist Films)",https://www.metacritic.com/a/img/resize/badbc9c9ae5ebca6172564f4ac6b67572791fac1/catalog/provider/2/2/2-eaea3e7e967a2ef83f1e23e21a558e54.jpg?auto=webp&fit=cover&height=132&width=88
