# Fetch the contents of a webpage

In [163]:

# import two urllib3 and certifi libraris  with  pip install <library>
import urllib3
import certifi

# This line of code generates a pool for http requests. 
http = urllib3.PoolManager(ca_certs=certifi.where())

# These lines of code choose which website we will fetch and which browser we will simulate.
link = 'https://www.metacritic.com/browse/movies/score/metascore/year/filtered?year_selected=2009&sort=desc&view=detailed'
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:105.0) Gecko/20100101 Firefox/105.0"

# This instruction requests the contents of the page 
response = http.request('GET', link, headers={"User-Agent": user_agent})
page_content = response.data.decode('utf-8')

# Print out the result
#print(f"Fetched {len(page_content)} characters from {link} (status: {response.status})")

Fetched 521387 characters from https://www.metacritic.com/browse/movies/score/metascore/year/filtered?year_selected=2009&sort=desc&view=detailed (status: 200)


# Exploring facts within text using Regular Expressions

In [167]:
 
import re

#find matches movie Titles from webpage by the code rgular expression in regex 101
regular_expression_title = r'class="title"><h3>(.*)<\/h3><\/a>'
titles = re.findall(regular_expression_title, page_content)
#print(f"Found {len(titles)} matches.  The first match is '{titles[0]}'.")

#find matches release date from webpage by the code rgular expression in regex 101
regular_expression_date = r'<span>(.*,\s2009)<\/span>'
release_date= re.findall(regular_expression_date, page_content)
#print(f"Found {len(release_date)} matches.  The first match is '{release_date[0]}'.")

#find matches metascore from webpage by the code rgular expression in regex 101
regular_expression_metascore = r'class="metascore_w large movie positive">(.*)<\/div>\s*<\/a>\s*<\/div>\s*<span class="title numbered">'
metascore = re.findall(regular_expression_metascore, page_content)
#print(f"Found {len(metascore)} matches.  The first match is '{metascore[0]}'.")

#find matches url from webpage by the code rgular expression in regex 101
regular_expression_url = r'<a href="\/movie\/.*"><img src="(.*)" alt=".*" \/><\/a>'
url = re.findall(regular_expression_url, page_content)
#print(f"Found {len(url)} matches.  The first match is '{url[0]}'.")

#find matches description from webpage by the code in rgular expression in regex 101
regular_expression_description1= r'<div class="summary">\s*(.*)'
description1= re.findall(regular_expression_description1, page_content)
#print(f"Found {len(description1)} matches.  The first match is '{description1[0]}'.")

Found 100 matches.  The first match is 'The Hurt Locker'.
Found 100 matches.  The first match is 'June 26, 2009'.
Found 100 matches.  The first match is '95'.
Found 100 matches.  The first match is 'https://static.metacritic.com/images/products/movies/0/c4d2cbc9f18e46e1f43623bc8d2ef510-98.jpg'.
Found 100 matches.  The first match is 'When a new sergeant, James (Jeremy Renner), takes over a highly trained bomb disposal team amidst violent conflict, he surprises his two subordinates, Sanborn (Anthony Mackie) and Eldridge (Brian Geraghty), by recklessly plunging them into a deadly game of urban combat. James behaves as if he's indifferent to death. As the men struggle to control their wild new leader, the city explodes into chaos, and James' true character reveals itself in a way that will change each man forever. [Summit Entertainment]'.


# Formatting Output using Pandas DataFrames

In [170]:

import pandas as pd

#create title for columns with values ( matches values from regex) for dataframe
dataset = {
    " Movie Title" : titles , 
    "Release Date":  release_date, 
    "Metascore":metascore,
    "Thumbnail URL":url,
    "Description":description1
     } 

# create dataframe  
df= pd.DataFrame(dataset)

# starting index with number 1 (table will start with number 1)
df.index += 1 

# we have two kinds of code for displaying all columns and rows.
# first approach :
#pd.set_option('display.max_rows', 500)
#pd.set_option('display.max_columns', 500)
#pd.set_option('display.width', 1000)
#display(df)


# second approach to display the table with all rows and columns 
with pd.option_context("display.max_rows", None, "display.max_columns", None):display(df)




Unnamed: 0,Movie Title,Release Date,Metascore,Thumbnail URL,Description
1,The Hurt Locker,"June 26, 2009",95,https://static.metacritic.com/images/products/...,"When a new sergeant, James (Jeremy Renner), ta..."
2,Sita Sings the Blues,"December 25, 2009",93,https://static.metacritic.com/images/products/...,Sita is a goddess separated from her beloved L...
3,35 Shots of Rum,"September 16, 2009",92,https://static.metacritic.com/images/products/...,"A widowed conductor, looking forward to retire..."
4,Still Walking,"August 21, 2009",89,https://static.metacritic.com/images/products/...,"Fifteen years ago, Junpei, the youngest son of..."
5,Goodbye Solo,"March 27, 2009",89,https://static.metacritic.com/images/products/...,"On the lonely roads of Winston-Salem, North Ca..."
6,Tulpan,"April 1, 2009",88,https://static.metacritic.com/images/products/...,"Following his Russian naval service, young dre..."
7,Up,"May 29, 2009",88,https://static.metacritic.com/images/products/...,Up is a comedy adventure about 78-year-old bal...
8,Gomorrah,"February 13, 2009",87,https://static.metacritic.com/images/products/...,"Power, money and blood: these are the values t..."
9,A Serious Man,"October 2, 2009",86,https://static.metacritic.com/images/products/...,A Serious Man is the story of an ordinary man’...
10,The Beaches of Agnès,"July 1, 2009",86,https://static.metacritic.com/images/products/...,"A reflection on art, life and the movies, The ..."


In [171]:
# how to change the content of the table to the left alignment 
# Installing library jinja2 was needed for this alignment 
df_style = df.style.set_properties(**{'text-align': 'left'})
df_style = df_style.set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])])
#display(df_style)


Unnamed: 0,Movie Title,Release Date,Metascore,Thumbnail URL,Description
1,The Hurt Locker,"June 26, 2009",95,https://static.metacritic.com/images/products/movies/0/c4d2cbc9f18e46e1f43623bc8d2ef510-98.jpg,"When a new sergeant, James (Jeremy Renner), takes over a highly trained bomb disposal team amidst violent conflict, he surprises his two subordinates, Sanborn (Anthony Mackie) and Eldridge (Brian Geraghty), by recklessly plunging them into a deadly game of urban combat. James behaves as if he's indifferent to death. As the men struggle to control their wild new leader, the city explodes into chaos, and James' true character reveals itself in a way that will change each man forever. [Summit Entertainment]"
2,Sita Sings the Blues,"December 25, 2009",93,https://static.metacritic.com/images/products/movies/7/8885d152c0d05eae0d4bd367da70ce80-98.jpg,"Sita is a goddess separated from her beloved Lord and husband Rama. Nina is an animator whose husband moves to India, then dumps her by email. Three hilarious shadow puppets narrate both ancient tragedy and modern comedy in this beautifully animated interpretation of the Indian epic Ramayana. Set to the 1920's jazz vocals of Annette Hanshaw, Sita Sings the Blues earns its tagline as ""the Greatest Break-Up Story Ever Told."""
3,35 Shots of Rum,"September 16, 2009",92,https://static.metacritic.com/images/products/movies/4/f992fc8242586a29353491fb269fb38c-98.jpg,"A widowed conductor, looking forward to retirement, lives with his grown daughter in a Paris suburb. When a neighbor starts to show interest in his ""little girl"", the conductor tries to adjust."
4,Still Walking,"August 21, 2009",89,https://static.metacritic.com/images/products/movies/5/6119c74122f16758cd6c74c370413c5b-98.jpg,"Fifteen years ago, Junpei, the youngest son of the Yokoyama family died while rescuing a boy from drowning. On the anniversary of his death, the remaining siblings visit the quaint home of their parents with their families in tow. Over the course of a beautiful day, new relatives become acquainted telling stories and squabbling over sizzling tempura and an elegant graveside ritual is performed for Junpei. (IFC Films)"
5,Goodbye Solo,"March 27, 2009",89,https://static.metacritic.com/images/products/movies/1/4d2fea77ca17ad03c29bf25dac503d7a-98.jpg,"On the lonely roads of Winston-Salem, North Carolina, two men forge an improbable friendship that will change both of their lives forever. Solo is a Senegalese cab driver working to provide a better life for his young family. William is a tough Southern good ol‘ boy with a lifetime of regrets. One man‘s American dream is just beginning, while the other‘s is quickly winding down. But despite their differences, both men soon realize they need each other more than either is willing to admit. Through this unlikely but unforgettable friendship, Goodbye Solo deftly explores the passing of a generation as well as the rapidly changing face of America. (Roadside Attractions)"
6,Tulpan,"April 1, 2009",88,https://static.metacritic.com/images/products/movies/4/0b3b152667150d3bdf386b9708560de4-98.jpg,"Following his Russian naval service, young dreamer Asa returns to his sister’s nomadic brood on the desolate Hunger Steppe to begin a hardscrabble career as a shepherd. But before he can tend a flock of his own, Asa must win the hand of the only eligible bachelorette for miles—his alluringly mysterious neighbor Tulpan. Accompanied by his girlie mag-reading sidekick Boni (and a menagerie of adorable lambs, stampeding camels, mewing kittens and mischievous children), Asa will stop at nothing to prove he is a worthy husband and herder. (Zeitgeist Films)"
7,Up,"May 29, 2009",88,https://static.metacritic.com/images/products/movies/8/cd07a97dba892bd870dffb1b63a87a5c-98.jpg,"Up is a comedy adventure about 78-year-old balloon salesman Carl Fredricksen, who finally fulfills his lifelong dream of a great adventure when he ties thousands of balloons to his house and flies away to the wilds of South America. But he discovers all too late that his biggest nightmare has stowed away on the trip: an overly optimistic 9-year-old Wilderness Explorer named Russell. (Walt Disney Pictures)"
8,Gomorrah,"February 13, 2009",87,https://static.metacritic.com/images/products/movies/9/af6fee3537c8cbb1e680301e89112054-98.jpg,"Power, money and blood: these are the values that the residents of the province of Naples and Caserta confront every day. They have practically no choice, and are forced to obey the rules of the ""System,"" the Camorra. Only a lucky few can even think of leading a normal life. Five stories are woven together in this violent scenario, set in a cruel and ostensibly invented world, but one that is deeply rooted in reality. (IFC Films)"
9,A Serious Man,"October 2, 2009",86,https://static.metacritic.com/images/products/movies/4/bcc3781dcfbf06df3d556ef69e7d8b4f-98.jpg,"A Serious Man is the story of an ordinary man’s search for clarity in a universe where Jefferson Airplane is on the radio and F-Troop is on TV. It is 1967, and Larry Gopnik, a physics professor at a quiet Midwestern university, has just been informed by his wife Judith that she is leaving him. She has fallen in love with one of his more pompous acquaintances, Sy Ableman, who seems to her a more substantial person than the feckless Larry. Larry’s unemployable brother Arthur is sleeping on the couch, his son Danny is a discipline problem and a shirker at Hebrew school, and his daughter Sarah is filching money from his wallet in order to save up for a nose job. Struggling for equilibrium, Larry seeks advice from three different rabbis. Can anyone help him cope with his afflictions and become a righteous person – a mensch – a serious man? (Focus Features)"
10,The Beaches of Agnès,"July 1, 2009",86,https://static.metacritic.com/images/products/movies/9/869484982fb0c5f433f4c3e201bb3a98-98.jpg,"A reflection on art, life and the movies, The Beaches of Agnes is a magnificent new film from the great Agnes Varda, a richly cinematic self portrait that touches on everything from the feminist movement and the black panthers to the films of husband Jacques Demy and the birth of the French New Wave. (Cinema Guild)"
