# Scpraping


### We use the Box Office Mojo https://www.boxofficemojo.com/ to getall the  2010-2019 movies data.

In [1]:
import requests
from requests import get
from bs4 import BeautifulSoup
from IPython.core.display import display, HTML
import pandas as pd
import numpy as np
import re
import dateutil.parser
import seaborn as sns

from time import sleep
from random import randint

### Define general functions

In [2]:
def get_movie_value(soup, field):
    obj = soup.find(text=re.compile(field))
    if not obj:
        return None
    next_element = obj.findNext()
    if next_element:
        return next_element.text
    else:
        return None
    
def to_date(datestring):
    date = dateutil.parser.parse(datestring)
    return date

### Define atribute functions

In [3]:
def get_movie_title(soup):
    title_element = soup.find("title")
    if not title_element:
        return None

    title_text = title_element.text
    if "-" in title_text:
        title_parts = title_text.split("-")
        if title_parts.count == 2:
            return title_parts[0].strip()
        else:
            return "-".join(title_parts[0:-1]).strip()
    else:
        return title_text.strip()


def get_movie_runtime(soup):
    runtimestring = get_movie_value(soup, "Running")
    try:
        runtime = runtimestring.split()
        minutes = int(runtime[0]) * 60 + int(runtime[2])
        return minutes
    except:
        return None


def get_movie_widest_release(soup):
    widest_release_value = get_movie_value(soup, "Widest Release")
    if not widest_release_value:
        return None
    widest_release_value = widest_release_value.replace(",", "").split(" ")[0]
    widest_release_value = widest_release_value.strip()
    return int(widest_release_value)    
    
    
def get_movie_domestic_gross(soup):
    moneystring = (soup.find(class_="mojo-performance-summary-table").find_all("span", class_="money")[0].text)
    if not moneystring:
        return None
    return int(moneystring.replace("$", "").replace(",", ""))


def get_movie_worldwide_gross(soup):
    moneystring = (soup.find(class_="mojo-performance-summary-table").find_all("span", class_="money")[-1].text)
    if not moneystring:
        return None
    return int(moneystring.replace("$", "").replace(",", ""))

def get_movie_distributor(soup):
    distributor1 = get_movie_value(soup, "Distributor")
    distributor = distributor1.split('See')[0]
    if not distributor:
        return None
    return distributor.strip()

def get_movie_opening_gross(soup):
    element1 = soup.find(class_="a-section a-spacing-none mojo-summary-values mojo-hidden-from-mobile")
    if not element1:
        return None
    element2 = element1.find("span", class_="money")
    if not element2:
        return None
    opening = element2.text.replace("$", "").replace(",", "")
    return int(opening)

def get_movie_budget(soup):
    budget1 = get_movie_value(soup, "Budget")
    try:
        budget = budget1.replace("$", "").replace(",", "")
        return int(budget)
    except:
        return None

def get_release_date(soup):
    date = get_movie_value(soup, "Release Date").split('-')[0].split('(')[0].strip()
    date = to_date(date)
    if not date:
        return None
    return date #.strip()    

def get_movie_rating(soup):
    rating_value = get_movie_value(soup, "MPAA")
    if not rating_value:
        return None
    return rating_value.strip()

def get_movie_genres(soup):
    genres_value = get_movie_value(soup, "Genres")
    if not genres_value:
        return None
    #genres_value = genres_value.replace('\n    \n       ','')
    return genres_value.split()#.strip()

### Combining all those attributes

In [4]:
def get_movie_data(url):
    response = requests.get(url)
    page = response.text
    soup = BeautifulSoup(page, "lxml")

    title = get_movie_title(soup)
    domestic_gross = get_movie_domestic_gross(soup)
    #international_gross = get_movie_international_gross(soup)
    worldwide_gross = get_movie_worldwide_gross(soup)
    movie_distributor = get_movie_distributor(soup)
    movie_opening_gross = get_movie_opening_gross(soup)
    movie_budget =get_movie_budget(soup)
    release_date = get_release_date(soup)
    runtime = get_movie_runtime(soup)
    movie_widest_release = get_movie_widest_release(soup)
    rating = get_movie_rating(soup)
    movie_genres = get_movie_genres(soup)

    headers = ["title", "domestic", "worldwide", "distributor", "opening", "budget", "date", "runtime", "widest", "rating", "genres"] #"international",
    movie_dict = dict(zip(headers, [title, domestic_gross, worldwide_gross, movie_distributor, movie_opening_gross, movie_budget, release_date, runtime, movie_widest_release, rating, movie_genres])) #international_gross

    return movie_dict

### Scrape the atribues

In [5]:
def get_urls():
    # Create a list of all urls
    urls = []
    for year in range(2010, 2020):
        url = (
            "https://www.boxofficemojo.com/year/"
            + str(year)
            + "/?grossesOption=totalGrosses"
        )
        urls.append(url)
        
        
        all_titles = []
    for url in urls:
        page = requests.get(url)
        soup = BeautifulSoup(page.text, "lxml")
        titles = soup.find_all(
            "td", class_="a-text-left mojo-field-type-release mojo-cell-wide"
        )
        for title in titles:
            all_titles.append(
                "https://www.boxofficemojo.com/" + title.select("a")[0].get("href")
            )
    return all_titles


urls = get_urls()

# Use thise code to verify the scraping process
"""count = 1
for url in urls:
    print(f"{count}: {url}")
    movie_data = get_movie_data(url)
    print(movie_data)
    count = count + 1"""
    #sleep(randint(1, 2))

'count = 1\nfor url in urls:\n    print(f"{count}: {url}")\n    movie_data = get_movie_data(url)\n    print(movie_data)\n    count = count + 1'

### Create a dicctionary

In [6]:
movies_list = []
for url in urls:
    movies_list.append(get_movie_data(url))
    
movies_list

[{'title': 'Toy Story 3',
  'domestic': 415004880,
  'worldwide': 1066969703,
  'distributor': 'Walt Disney Studios Motion Pictures',
  'opening': 110307189,
  'budget': 200000000,
  'date': datetime.datetime(2010, 6, 18, 0, 0),
  'runtime': 103,
  'widest': 4028,
  'rating': None,
  'genres': ['Adventure', 'Animation', 'Comedy', 'Family', 'Fantasy']},
 {'title': 'Alice in Wonderland',
  'domestic': 334191110,
  'worldwide': 1025467110,
  'distributor': 'Walt Disney Studios Motion Pictures',
  'opening': 116101023,
  'budget': 200000000,
  'date': datetime.datetime(2010, 3, 5, 0, 0),
  'runtime': 108,
  'widest': 3739,
  'rating': 'PG',
  'genres': ['Adventure', 'Family', 'Fantasy']},
 {'title': 'Iron Man 2',
  'domestic': 312433331,
  'worldwide': 623933331,
  'distributor': 'Paramount Pictures',
  'opening': 128122480,
  'budget': 200000000,
  'date': datetime.datetime(2010, 5, 7, 0, 0),
  'runtime': 124,
  'widest': 4390,
  'rating': 'PG-13',
  'genres': ['Action', 'Adventure', 'Sci

## Simple EDA and Baseline Model

### Create a DataFrame (df)

In [7]:
df = pd.DataFrame(movies_list)
df.set_index('title', inplace=True)

df

Unnamed: 0_level_0,domestic,worldwide,distributor,opening,budget,date,runtime,widest,rating,genres
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Toy Story 3,415004880,1066969703,Walt Disney Studios Motion Pictures,110307189.0,200000000.0,2010-06-18,103.0,4028.0,,"[Adventure, Animation, Comedy, Family, Fantasy]"
Alice in Wonderland,334191110,1025467110,Walt Disney Studios Motion Pictures,116101023.0,200000000.0,2010-03-05,108.0,3739.0,PG,"[Adventure, Family, Fantasy]"
Iron Man 2,312433331,623933331,Paramount Pictures,128122480.0,200000000.0,2010-05-07,124.0,4390.0,PG-13,"[Action, Adventure, Sci-Fi]"
The Twilight Saga: Eclipse,300531751,698491347,Summit Entertainment,64832191.0,68000000.0,2010-06-30,124.0,4468.0,PG-13,"[Action, Adventure, Drama, Fantasy, Romance, T..."
Harry Potter and the Deathly Hallows: Part 1,295983305,976536918,Warner Bros.,125017372.0,,2010-11-19,146.0,4125.0,PG-13,"[Adventure, Family, Fantasy, Mystery]"
...,...,...,...,...,...,...,...,...,...,...
Red Joan,1579730,10647493,IFC Films,38949.0,,2019-04-19,101.0,195.0,R,"[Biography, Drama, History, Romance]"
After the Wedding,1574258,2790019,Sony Pictures Classics,50264.0,,2019-08-09,112.0,428.0,PG-13,[Drama]
Extreme Job,1565885,119932733,CJ Entertainment,42587.0,,2019-01-25,111.0,26.0,,"[Action, Comedy, Crime]"
The Kid,1508593,1553483,Lionsgate,514286.0,,2019-03-08,100.0,268.0,R,"[Biography, Drama, Western]"


In [8]:
# Necessary imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_formats = ['svg']  # or svg
%matplotlib inline

sns.set(context='notebook', style='whitegrid', font_scale=1.2)

In [9]:
df.dtypes

domestic                int64
worldwide               int64
distributor            object
opening               float64
budget                float64
date           datetime64[ns]
runtime               float64
widest                float64
rating                 object
genres                 object
dtype: object

In [10]:
## Examine the columns, look at missing data
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2000 entries, Toy Story 3 to El Chicano
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   domestic     2000 non-null   int64         
 1   worldwide    2000 non-null   int64         
 2   distributor  1999 non-null   object        
 3   opening      1981 non-null   float64       
 4   budget       1187 non-null   float64       
 5   date         2000 non-null   datetime64[ns]
 6   runtime      1947 non-null   float64       
 7   widest       1998 non-null   float64       
 8   rating       1847 non-null   object        
 9   genres       1996 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(2), object(3)
memory usage: 171.9+ KB


### Remove/drop null-values

In [11]:
df = df.dropna()

In [12]:
df.to_csv('data/movies')
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1151 entries, Alice in Wonderland to The Wizard of Oz 2019 Re-release
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   domestic     1151 non-null   int64         
 1   worldwide    1151 non-null   int64         
 2   distributor  1151 non-null   object        
 3   opening      1151 non-null   float64       
 4   budget       1151 non-null   float64       
 5   date         1151 non-null   datetime64[ns]
 6   runtime      1151 non-null   float64       
 7   widest       1151 non-null   float64       
 8   rating       1151 non-null   object        
 9   genres       1151 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(2), object(3)
memory usage: 98.9+ KB


### Create quantitative DataFrame (df_num)

- Domestic Release
- Worldwide Release
- Opening
- Budget
- Runtime
- Widest Release

In [13]:
df_num= df.loc[:,["worldwide", "domestic", "opening", "budget", "runtime", "widest"]]

In [14]:
df_num.head()

Unnamed: 0_level_0,worldwide,domestic,opening,budget,runtime,widest
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alice in Wonderland,1025467110,334191110,116101023.0,200000000.0,108.0,3739.0
Iron Man 2,623933331,312433331,128122480.0,200000000.0,124.0,4390.0
The Twilight Saga: Eclipse,698491347,300531751,64832191.0,68000000.0,124.0,4468.0
Inception,826137188,292576195,62785337.0,160000000.0,148.0,3792.0
Despicable Me,543113985,251513985,56397125.0,69000000.0,95.0,3602.0


In [15]:
df_num.to_csv('data/movies_num')

### Create Qualitative DataFrame (df_cat)

We use dummy variables function for this.

- Distrubutor
- MPAA Rating
- Genres
- Release Date

Distributor

In [16]:
#df_cat = df

In [17]:
dist_quantity = df['distributor'].value_counts()
dist_quantity

Warner Bros.                             159
Universal Pictures                       151
Twentieth Century Fox                    132
Sony Pictures Entertainment (SPE)         95
Paramount Pictures                        94
Lionsgate                                 86
Walt Disney Studios Motion Pictures       81
Screen Gems                               35
Focus Features                            29
Relativity Media                          28
Open Road Films (II)                      22
STX Entertainment                         22
Sony Pictures Classics                    21
Fox Searchlight Pictures                  20
TriStar Pictures                          20
The Weinstein Company                     20
Summit Entertainment                      13
Roadside Attractions                      11
DreamWorks                                10
CBS Films                                 10
FilmDistrict                              10
Dimension Films                            8
A24       

Let's group distributor by less than 9 movies so we can avoid the oulies of the smaller distrubutors.

_Note:_ Seems to be a lot a distrubutor with few films. This outlier remember we are using movie data from 2010-2019 so during these years video streaming services like Netflix got very popular and people began to spend less time and movie at movie theather and more time and movie at streaming services. This is something we can analize in a second part of the current project. let's keep focused on this part for now.

In [18]:
dist_quantity =  df['distributor'].value_counts()
dist_quantity[dist_quantity <= 8]

other_dist = list(dist_quantity[dist_quantity<=8].index)

In [19]:
df_distributor = df.copy()
df_distributor['distributor'] = df['distributor'].replace(other_dist, 'Other')

In [20]:
df_distributor['distributor'].value_counts()

Warner Bros.                           159
Universal Pictures                     151
Twentieth Century Fox                  132
Sony Pictures Entertainment (SPE)       95
Paramount Pictures                      94
Lionsgate                               86
Other                                   82
Walt Disney Studios Motion Pictures     81
Screen Gems                             35
Focus Features                          29
Relativity Media                        28
STX Entertainment                       22
Open Road Films (II)                    22
Sony Pictures Classics                  21
TriStar Pictures                        20
The Weinstein Company                   20
Fox Searchlight Pictures                20
Summit Entertainment                    13
Roadside Attractions                    11
DreamWorks                              10
CBS Films                               10
FilmDistrict                            10
Name: distributor, dtype: int64

In [21]:
df_distributor = pd.get_dummies(df['distributor'], drop_first=True)
df_distributor.head()

Unnamed: 0_level_0,ATO Pictures,Affirm Films,Anchor Bay Films,Annapurna Pictures,Apparition,Aviron Pictures,BH Tilt,Bleecker Street Media,Broad Green Pictures,CBS Films,...,Summit Entertainment,The Samuel Goldwyn Company,The Weinstein Company,TriStar Pictures,Twentieth Century Fox,United Artists Releasing,Universal Pictures,Vivendi Entertainment,Walt Disney Studios Motion Pictures,Warner Bros.
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alice in Wonderland,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Iron Man 2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
The Twilight Saga: Eclipse,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
Inception,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
Despicable Me,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [22]:
df_distributor.to_csv('data/distributor')

MPAA Rating

In [23]:
df['rating'].unique()

array(['PG', 'PG-13', 'R', 'G'], dtype=object)

In [24]:
df['rating'].value_counts()

PG-13    499
R        442
PG       202
G          8
Name: rating, dtype: int64

In [25]:
df_rating = pd.get_dummies(df['rating'], drop_first=True)
df_rating.head()

Unnamed: 0_level_0,PG,PG-13,R
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice in Wonderland,1,0,0
Iron Man 2,0,1,0
The Twilight Saga: Eclipse,0,1,0
Inception,0,1,0
Despicable Me,1,0,0


In [26]:
df_rating.to_csv('data/rating')

Genres

In [27]:
df['genres'].head()

title
Alice in Wonderland                                [Adventure, Family, Fantasy]
Iron Man 2                                          [Action, Adventure, Sci-Fi]
The Twilight Saga: Eclipse    [Action, Adventure, Drama, Fantasy, Romance, T...
Inception                                 [Action, Adventure, Sci-Fi, Thriller]
Despicable Me                       [Animation, Comedy, Crime, Family, Fantasy]
Name: genres, dtype: object

In [28]:
type(df['genres'])

pandas.core.series.Series

In [29]:
type(df.genres[1])

list

In [43]:
df['genres'] = df.genres.astype(str)
df['genres'] = df['genres'].str.replace('[','').str.replace(']','').str.replace("'","")
df['genres']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['genres'] = df.genres.astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['genres'] = df['genres'].str.replace('[','').str.replace(']','').str.replace("'","")


title
Alice in Wonderland                                        Adventure, Family, Fantasy
Iron Man 2                                                  Action, Adventure, Sci-Fi
The Twilight Saga: Eclipse          Action, Adventure, Drama, Fantasy, Romance, Th...
Inception                                         Action, Adventure, Sci-Fi, Thriller
Despicable Me                               Animation, Comedy, Crime, Family, Fantasy
                                                          ...                        
The Best of Enemies                                         Biography, Drama, History
Motherless Brooklyn                                             Crime, Drama, Mystery
The Goldfinch                                                                   Drama
Don't Let Go                                 Drama, Horror, Mystery, Sci-Fi, Thriller
The Wizard of Oz 2019 Re-release                  Adventure, Family, Fantasy, Musical
Name: genres, Length: 1151, dtype: object

In [44]:
df_genres = df['genres'].str.get_dummies(sep=', ')

In [45]:
df_genres

Unnamed: 0_level_0,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,Fantasy,...,Horror,Music,Musical,Mystery,Romance,Sci-Fi,Sport,Thriller,War,Western
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alice in Wonderland,0,1,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
Iron Man 2,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
The Twilight Saga: Eclipse,1,1,0,0,0,0,0,1,0,1,...,0,0,0,0,1,0,0,1,0,0
Inception,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
Despicable Me,0,0,1,0,1,1,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
The Best of Enemies,0,0,0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
Motherless Brooklyn,0,0,0,0,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0
The Goldfinch,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
Don't Let Go,0,0,0,0,0,0,0,1,0,0,...,1,0,0,1,0,1,0,1,0,0


In [46]:
df_genres.to_csv('data/genres')

Release Date

In [47]:
def date_to_season(date):
    winter = [12, 1, 2]
    spring = [3, 4, 5]
    summer = [6, 7, 8]
    fall = [9, 10, 11]
    
    if date.month in winter:
        return "winter"
    elif date.month in spring:
        return "spring"
    elif date.month in summer:
        return "summer"
    else:
        return "fall"

In [49]:
df['date_to_season'] = df['date'].apply(date_to_season)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date_to_season'] = df['date'].apply(date_to_season)


In [51]:
df.head()

Unnamed: 0_level_0,domestic,worldwide,distributor,opening,budget,date,runtime,widest,rating,genres,date_to_season
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alice in Wonderland,334191110,1025467110,Walt Disney Studios Motion Pictures,116101023.0,200000000.0,2010-03-05,108.0,3739.0,PG,"Adventure, Family, Fantasy",spring
Iron Man 2,312433331,623933331,Paramount Pictures,128122480.0,200000000.0,2010-05-07,124.0,4390.0,PG-13,"Action, Adventure, Sci-Fi",spring
The Twilight Saga: Eclipse,300531751,698491347,Summit Entertainment,64832191.0,68000000.0,2010-06-30,124.0,4468.0,PG-13,"Action, Adventure, Drama, Fantasy, Romance, Th...",summer
Inception,292576195,826137188,Warner Bros.,62785337.0,160000000.0,2010-07-16,148.0,3792.0,PG-13,"Action, Adventure, Sci-Fi, Thriller",summer
Despicable Me,251513985,543113985,Universal Pictures,56397125.0,69000000.0,2010-07-09,95.0,3602.0,PG,"Animation, Comedy, Crime, Family, Fantasy",summer


In [52]:
df_season = pd.get_dummies(df['date_to_season'], drop_first=True)
df_season.head()

Unnamed: 0_level_0,spring,summer,winter
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice in Wonderland,1,0,0
Iron Man 2,1,0,0
The Twilight Saga: Eclipse,0,1,0
Inception,0,1,0
Despicable Me,0,1,0


In [53]:
df_season.to_csv('data/season')