# Imports

In [3]:
from bs4 import BeautifulSoup as bs
import pandas as pd
import help_functions as hf

# Catching movie rating

In [4]:
# Loading dataframe stored in pickle file
data = hf.load_data_pickle('dataframe_disney_cleaned.pickle')
print(data[0:5])


                             title       Production company  \
0          Academy Award Review of  Walt Disney Productions   
1  Snow White and the Seven Dwarfs  Walt Disney Productions   
2                        Pinocchio  Walt Disney Productions   
3                         Fantasia  Walt Disney Productions   
4             The Reluctant Dragon  Walt Disney Productions   

       Distributed by Release date  Running time        Country Language  \
0      United Artists   1937-05-19            41  United States  English   
1  RKO Radio Pictures          NaT            83  United States  English   
2  RKO Radio Pictures          NaT            88  United States  English   
3  RKO Radio Pictures   1940-11-13           126  United States  English   
4  RKO Radio Pictures   1941-06-27            74  United States  English   

   Box office                                        Directed by  \
0        45.0                                                NaN   
1       418.0  [David Hand, 

In [5]:
# Transforming the DataFrame into a Pandas DataFrame
df = pd.DataFrame(data)
# Creating a year column in the dataframe, so that the catch_movie() function runs more accurately, selecting the correct year and not the same movie from another year.
df['Release Year'] = df['Release date'].dt.year.astype('Int64')
df['Release Year'] 

0      1937
1      <NA>
2      <NA>
3      1940
4      1941
       ... 
514    1986
516    <NA>
517    1963
518    <NA>
519    1991
Name: Release Year, Length: 499, dtype: Int64

In [6]:
# Create a column named ['Rotten Tomatoes'], where from the catch_movie() function is used as parameter 'Rotten Tomatoes' to return the review score of this site
df['Rotten Tomatoes'] = df.apply(lambda x:hf.catch_rating(x['title'], x['Release Year'],'Rotten Tomatoes' ), axis=1)

# Creates a column called ['Internet Movie Database'], where from the catch_movie() function is used as parameter 'Internet Movie Database' to return the review note of this site
df['Internet Movie Database'] = df.apply(lambda x:hf.catch_rating(x['title'], x['Release Year'], 'Internet Movie Database' ), axis=1)

# Creates a column named ['Metacritic'], where from the catch_movie() function is used as parameter 'Metacritic'] to return the review note of this site
df['Metacritic'] = df.apply(lambda x:hf.catch_rating(x['title'], x['Release Year'],'Metacritic' ), axis=1)

In [7]:
data = df[['title', 'Release date','Rotten Tomatoes','Internet Movie Database','Metacritic']]
data

Unnamed: 0,title,Release date,Rotten Tomatoes,Internet Movie Database,Metacritic
0,Academy Award Review of,1937-05-19,0,7.1,0
1,Snow White and the Seven Dwarfs,NaT,0,0,0
2,Pinocchio,NaT,0,0,0
3,Fantasia,1940-11-13,95,7.7,96
4,The Reluctant Dragon,1941-06-27,0,6.8,0
...,...,...,...,...,...
514,SpaceCamp,1986-06-06,46,5.7,40
516,The Aristocats,NaT,0,0,0
517,The Sword in the Stone,1963-12-25,66,7.2,61
518,Tinker Bell,NaT,0,0,0


In [8]:
# Deleting lines with title == 0
data.drop(data.loc[df['title'] =='0'].index, inplace=True)
data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(data.loc[df['title'] =='0'].index, inplace=True)


Unnamed: 0,title,Release date,Rotten Tomatoes,Internet Movie Database,Metacritic
0,Academy Award Review of,1937-05-19,0,7.1,0
1,Snow White and the Seven Dwarfs,NaT,0,0.0,0
2,Pinocchio,NaT,0,0.0,0
3,Fantasia,1940-11-13,95,7.7,96
4,The Reluctant Dragon,1941-06-27,0,6.8,0


In [9]:
data.to_csv('dataframe_disney_imdb_note.csv')

In [10]:
data.info()
data.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 499 entries, 0 to 519
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   title                    499 non-null    object        
 1   Release date             276 non-null    datetime64[ns]
 2   Rotten Tomatoes          499 non-null    object        
 3   Internet Movie Database  499 non-null    object        
 4   Metacritic               499 non-null    object        
dtypes: datetime64[ns](1), object(4)
memory usage: 23.4+ KB


  data.describe()


Unnamed: 0,title,Release date,Rotten Tomatoes,Internet Movie Database,Metacritic
count,499,276,499.0,499.0,499.0
unique,472,268,80.0,47.0,62.0
top,Hercules,2019-11-12 00:00:00,0.0,0.0,0.0
freq,2,2,312.0,241.0,340.0
first,,1937-05-19 00:00:00,,,
last,,2022-04-22 00:00:00,,,


In [11]:
df

Unnamed: 0,title,Production company,Distributed by,Release date,Running time,Country,Language,Box office,Directed by,Written by,...,Basis,Productions,Awards,Created by,Original work,Owner,Release Year,Rotten Tomatoes,Internet Movie Database,Metacritic
0,Academy Award Review of,Walt Disney Productions,United Artists,1937-05-19,41,United States,English,45.0,,,...,,,,,,,1937,0,7.1,0
1,Snow White and the Seven Dwarfs,Walt Disney Productions,RKO Radio Pictures,NaT,83,United States,English,418.0,"[David Hand, William Cottrell, Wilfred Jackson...","[Ted Sears, Richard Creedon, Otto Englander, D...",...,,,,,,,,0,0,0
2,Pinocchio,Walt Disney Productions,RKO Radio Pictures,NaT,88,United States,English,164.0,"[Ben Sharpsteen, Hamilton Luske, Bill Roberts,...",,...,,,,,,,,0,0,0
3,Fantasia,Walt Disney Productions,RKO Radio Pictures,1940-11-13,126,United States,English,76.0,"[Samuel Armstrong, James Algar, Bill Roberts, ...",,...,,,,,,,1940,95,7.7,96
4,The Reluctant Dragon,Walt Disney Productions,RKO Radio Pictures,1941-06-27,74,United States,English,960.0,Alfred Werker (live action) Hamilton Luske (an...,Live-action: Ted Sears Al Perkins Larry Clemmo...,...,,,,,,,1941,0,6.8,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514,SpaceCamp,ABC Motion Pictures,20th Century Fox,1986-06-06,107,United States,English,9.0,Harry Winer,,...,,,,,,,1986,46,5.7,40
516,The Aristocats,Walt Disney Productions,Buena Vista Distribution,NaT,79,United States,English,191.0,Wolfgang Reitherman,,...,,,,,,,,0,0,0
517,The Sword in the Stone,Walt Disney Productions,Buena Vista Distribution,1963-12-25,74,United States,English,22.0,"[Wolfgang Reitherman, Hamilton Luske (Uncredit...",,...,,,,,,,1963,66,7.2,61
518,Tinker Bell,DisneyToon Studios,Walt Disney Studios Home Entertainment,NaT,468,United States,English,,"[Bradley Raymond ( 1 , 3 & 4 ), Klay Hall ( 2 ...",,...,,,,,,,,0,0,0
