In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
from secrets import API_KEY
pd.options.display.max_columns = 999
from urllib.parse import quote_plus
from requests.utils import requote_uri
from datetime import datetime

# Testing how to format the url get from the OMDb API site.

In [22]:
website_url = 'http://www.omdbapi.com/?t=roma&apikey=cd16ab6d'
result = requests.get(website_url)

result.status_code

200

In [23]:
result.content

b'{"Title":"Roma","Year":"2018","Rated":"R","Released":"21 Nov 2018","Runtime":"135 min","Genre":"Drama","Director":"Alfonso Cuar\xc3\xb3n","Writer":"Alfonso Cuar\xc3\xb3n","Actors":"Yalitza Aparicio, Marina de Tavira, Diego Cortina Autrey, Carlos Peralta","Plot":"A year in the life of a middle-class family\'s maid in Mexico City in the early 1970s.","Language":"Spanish, Mixtec, English, Japanese, German, French, Norwegian","Country":"Mexico","Awards":"N/A","Poster":"https://m.media-amazon.com/images/M/MV5BMTU0OTc3ODk4Ml5BMl5BanBnXkFtZTgwMzM4NzI5NjM@._V1_SX300.jpg","Ratings":[{"Source":"Internet Movie Database","Value":"7.7/10"},{"Source":"Metacritic","Value":"96/100"}],"Metascore":"96","imdbRating":"7.7","imdbVotes":"117,294","imdbID":"tt6155172","Type":"movie","DVD":"N/A","BoxOffice":"N/A","Production":"N/A","Website":"N/A","Response":"True"}'

In [24]:
type(result)

requests.models.Response

In [25]:
roma = result.json()
type(roma)

dict

In [26]:
pd.DataFrame.from_dict(roma)

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,Language,Country,Awards,Poster,Ratings,Metascore,imdbRating,imdbVotes,imdbID,Type,DVD,BoxOffice,Production,Website,Response
0,Roma,2018,R,21 Nov 2018,135 min,Drama,Alfonso Cuarón,Alfonso Cuarón,"Yalitza Aparicio, Marina de Tavira, Diego Cort...",A year in the life of a middle-class family's ...,"Spanish, Mixtec, English, Japanese, German, Fr...",Mexico,,https://m.media-amazon.com/images/M/MV5BMTU0OT...,"{'Source': 'Internet Movie Database', 'Value':...",96,7.7,117294,tt6155172,movie,,,,,True
1,Roma,2018,R,21 Nov 2018,135 min,Drama,Alfonso Cuarón,Alfonso Cuarón,"Yalitza Aparicio, Marina de Tavira, Diego Cort...",A year in the life of a middle-class family's ...,"Spanish, Mixtec, English, Japanese, German, Fr...",Mexico,,https://m.media-amazon.com/images/M/MV5BMTU0OT...,"{'Source': 'Metacritic', 'Value': '96/100'}",96,7.7,117294,tt6155172,movie,,,,,True


# Reading in the Best Pictures list from the Oscar Movie Database

In [27]:
best_pics = pd.read_csv('data/best_pic_list.csv')

In [28]:
best_pics.head()

Unnamed: 0,year,category,winner,entity
0,1927,OUTSTANDING PICTURE,False,The Racket
1,1927,OUTSTANDING PICTURE,False,7th Heaven
2,1927,OUTSTANDING PICTURE,True,Wings
3,1928,OUTSTANDING PICTURE,False,Alibi
4,1928,OUTSTANDING PICTURE,False,In Old Arizona


In [29]:
best_pics['year'] = best_pics['year'].astype(str)

# Creating a new column with a formatted url that includes the API key so that a function can be created to pull the movie info

In [30]:
base_url = 'http://www.omdbapi.com/?t='
api_string = '&apikey='
year_string = '&y='

In [33]:
def url_convert(series):
    for qs in series:
        return f'{base_url}{quote_plus(series)}{api_string}{API_KEY}'

In [34]:
best_pics['url'] = best_pics.entity.apply(url_convert)
best_pics.head()

Unnamed: 0,year,category,winner,entity,url
0,1927,OUTSTANDING PICTURE,False,The Racket,http://www.omdbapi.com/?t=The+Racket&apikey=cd...
1,1927,OUTSTANDING PICTURE,False,7th Heaven,http://www.omdbapi.com/?t=7th+Heaven&apikey=cd...
2,1927,OUTSTANDING PICTURE,True,Wings,http://www.omdbapi.com/?t=Wings&apikey=cd16ab6d
3,1928,OUTSTANDING PICTURE,False,Alibi,http://www.omdbapi.com/?t=Alibi&apikey=cd16ab6d
4,1928,OUTSTANDING PICTURE,False,In Old Arizona,http://www.omdbapi.com/?t=In+Old+Arizona&apike...


In [35]:
best_pics.url.iloc[2]

'http://www.omdbapi.com/?t=Wings&apikey=cd16ab6d'

# Adding the "year" column to the end of each URL so that they correctly match within the OMDb information

In [36]:
best_pics['url'] = best_pics['url'] + year_string + best_pics['year']

In [37]:
best_pics.url.iloc[98]

'http://www.omdbapi.com/?t=The+Wizard+of+Oz&apikey=cd16ab6d&y=1939'

# Verifying the type of each column

In [38]:
best_pics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 546 entries, 0 to 545
Data columns (total 5 columns):
year        546 non-null object
category    546 non-null object
winner      546 non-null bool
entity      546 non-null object
url         546 non-null object
dtypes: bool(1), object(4)
memory usage: 17.7+ KB


# Converting the url column into a string so that it can correctly work in the upcoming function

In [39]:
best_pics['url'] = best_pics['url'].astype(str)

In [40]:
best_pics.head()

Unnamed: 0,year,category,winner,entity,url
0,1927,OUTSTANDING PICTURE,False,The Racket,http://www.omdbapi.com/?t=The+Racket&apikey=cd...
1,1927,OUTSTANDING PICTURE,False,7th Heaven,http://www.omdbapi.com/?t=7th+Heaven&apikey=cd...
2,1927,OUTSTANDING PICTURE,True,Wings,http://www.omdbapi.com/?t=Wings&apikey=cd16ab6...
3,1928,OUTSTANDING PICTURE,False,Alibi,http://www.omdbapi.com/?t=Alibi&apikey=cd16ab6...
4,1928,OUTSTANDING PICTURE,False,In Old Arizona,http://www.omdbapi.com/?t=In+Old+Arizona&apike...


# Pulling out the url column specifically as a Series so that the function can run

In [41]:
urls = best_pics['url']
urls.head()

0    http://www.omdbapi.com/?t=The+Racket&apikey=cd...
1    http://www.omdbapi.com/?t=7th+Heaven&apikey=cd...
2    http://www.omdbapi.com/?t=Wings&apikey=cd16ab6...
3    http://www.omdbapi.com/?t=Alibi&apikey=cd16ab6...
4    http://www.omdbapi.com/?t=In+Old+Arizona&apike...
Name: url, dtype: object

In [42]:
type(urls)

pandas.core.series.Series

In [43]:
urls[0]

'http://www.omdbapi.com/?t=The+Racket&apikey=cd16ab6d&y=1927'

# Selecting just the top 5 and top 40 urls in the list so that a small segment of the list can be tested against the function

In [44]:
top_5 = urls.head()
top_40 = urls.head(40)

# Function designed to scrape the OMDb API site to get each film's associated information. This function also cleans the resulting dataframe by turning the result into a dataframe, dropping unnecessary columns, and removing duplicate rows.

### Returns the dataframe as well as a list of URLs (errors_list) that were unable to process via the function

In [13]:
def webscraper(series):
    # create an empty list to place the scraped info in
    df_list = []
    
    # create an empty list to place the non-working URLs in
    errors_list = []
    for i in series:
        try:
            # since the URLs are in a series, no specific numeric/index values are associated with each URL
            url = i
            result = requests.get(url)
            value = result.json()
            frame = pd.DataFrame.from_dict(value)
            df_list.append(frame)
        
        # if a URL doesn't work above, the function will add it to the error_list so that it can be manually pulled later
        except:
            errors_list.append(i)
    
    # converting the list created into a dataframe
    df = pd.concat(df_list)
    
    # dropping columns that are unnecessary to the analysis and that also result in duplicate rows
    df_drop = df.drop(columns = ['Writer', 'Actors', 'Plot', 'Awards', 'Poster', 'Ratings', 'DVD', 'Website', 'Response',
                                'imdbVotes'])
    
    # removing duplicates so that only single values remain for each URL
    df_clean = df_drop.drop_duplicates()
    
    return df_clean, errors_list

# Running the function with the Top 5 URLS

In [108]:
df_clean_5, errors_list_5 = webscraper(top_5)

In [109]:
df_clean_5

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Language,Country,Metascore,imdbRating,imdbID,Type,BoxOffice,Production
0,7th Heaven,1927,Not Rated,30 Oct 1927,110 min,"Drama, Romance",Frank Borzage,English,USA,,7.8,tt0018379,movie,,Fox
0,Wings,1927,PG-13,05 Jan 1929,144 min,"Drama, Romance, War, Action","William A. Wellman, Harry d'Abbadie d'Arrast",English,USA,,7.5,tt0018578,movie,,Unknown
0,In Old Arizona,1928,PASSED,20 Jan 1929,95 min,"Romance, Western",Irving Cummings,"English, Spanish, Italian",USA,,5.7,tt0020018,movie,,20th Century Fox Film Corporation


In [89]:
df_clean_5.columns

Index(['Title', 'Year', 'Rated', 'Released', 'Runtime', 'Genre', 'Director',
       'Language', 'Country', 'Metascore', 'imdbRating', 'imdbID', 'Type',
       'BoxOffice', 'Production'],
      dtype='object')

# Verifying that there are no error URLs

In [90]:
errors_list_5

['http://www.omdbapi.com/?t=The+Racket&apikey=cd16ab6d&y=1927']

In [91]:
# df_clean_5 = df_clean_5.Released.replace('N/A', '08 Nov 1962')
df_clean_5['Released'] = pd.to_datetime(df_clean_5['Released'], format = '%d %b %Y')
df_clean_5['month'] = pd.DatetimeIndex(df_clean_5['Released']).month

In [92]:
df_clean_5

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Language,Country,Metascore,imdbRating,imdbID,Type,BoxOffice,Production,month
0,7th Heaven,1927,Not Rated,1927-10-30,110 min,"Drama, Romance",Frank Borzage,English,USA,,7.8,tt0018379,movie,,Fox,10
0,Wings,1927,PG-13,1929-01-05,144 min,"Drama, Romance, War, Action","William A. Wellman, Harry d'Abbadie d'Arrast",English,USA,,7.5,tt0018578,movie,,Unknown,1
0,In Old Arizona,1928,PASSED,1929-01-20,95 min,"Romance, Western",Irving Cummings,"English, Spanish, Italian",USA,,5.7,tt0020018,movie,,20th Century Fox Film Corporation,1


# Checking a larger set of URLs to see if errors come up

In [93]:
df_clean_40, errors_list_40 = webscraper(top_40)

In [94]:
df_clean_40

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Language,Country,Metascore,imdbRating,imdbID,Type,BoxOffice,Production
0,7th Heaven,1927,Not Rated,30 Oct 1927,110 min,"Drama, Romance",Frank Borzage,English,USA,,7.8,tt0018379,movie,,Fox
0,Wings,1927,PG-13,05 Jan 1929,144 min,"Drama, Romance, War, Action","William A. Wellman, Harry d'Abbadie d'Arrast",English,USA,,7.5,tt0018578,movie,,Unknown
0,In Old Arizona,1928,PASSED,20 Jan 1929,95 min,"Romance, Western",Irving Cummings,"English, Spanish, Italian",USA,,5.7,tt0020018,movie,,20th Century Fox Film Corporation
0,Disraeli,1929,PASSED,01 Nov 1929,90 min,"Biography, Drama, History",Alfred E. Green,English,USA,,6.4,tt0019823,movie,,A & E
0,The Love Parade,1929,APPROVED,18 Jan 1930,107 min,"Comedy, Musical, Romance",Ernst Lubitsch,"English, French",USA,,7.2,tt0020112,movie,,Paramount Pictures
0,Arrowsmith,1931,PASSED,26 Dec 1931,108 min,Drama,John Ford,"English, Italian, Swedish",USA,,6.2,tt0021622,movie,,United Artists
0,Bad Girl,1931,Passed,13 Sep 1931,90 min,"Drama, Romance",Frank Borzage,English,USA,,6.6,tt0021635,movie,,
0,The Champ,1931,Passed,21 Nov 1931,86 min,"Drama, Sport",King Vidor,English,USA,,7.3,tt0021730,movie,,MGM
0,Five Star Final,1931,NOT RATED,26 Sep 1931,89 min,"Crime, Drama",Mervyn LeRoy,English,USA,,7.3,tt0021873,movie,,Vitaphone Corporation
0,The Smiling Lieutenant,1931,Passed,01 Aug 1931,93 min,"Comedy, Romance, Musical",Ernst Lubitsch,"English, French",USA,,7.7,tt0022074,movie,,Paramount Pictures


In [95]:
errors_list_40

['http://www.omdbapi.com/?t=The+Racket&apikey=cd16ab6d&y=1927',
 'http://www.omdbapi.com/?t=The+Broadway+Melody&apikey=cd16ab6d&y=1928',
 'http://www.omdbapi.com/?t=Hollywood+Revue&apikey=cd16ab6d&y=1928',
 'http://www.omdbapi.com/?t=All+Quiet+on+the+Western+Front&apikey=cd16ab6d&y=1929',
 'http://www.omdbapi.com/?t=The+Big+House&apikey=cd16ab6d&y=1929',
 'http://www.omdbapi.com/?t=The+Divorcee&apikey=cd16ab6d&y=1929',
 'http://www.omdbapi.com/?t=Cimarron&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=East+Lynne&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=The+Front+Page&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=Skippy&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=Trader+Horn&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=Grand+Hotel&apikey=cd16ab6d&y=1931',
 'http://www.omdbapi.com/?t=One+Hour+with+You&apikey=cd16ab6d&y=1931',
 'http://www.omdbapi.com/?t=Shanghai+Express&apikey=cd16ab6d&y=1931',
 'http://www.omdbapi.com/?t=Cavalcade&apikey=cd16ab6

# Running the scrape of all the URLs to create the full list of films that were nominated for Best Picture Oscars

In [110]:
df_clean_all, errors_list_all = webscraper(urls)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




# Previewing the data frame of the Best Picture list and exporting out to csv to reduce the number of times that scraping needs to take place on OMDb

In [111]:
df_clean_all

Unnamed: 0,BoxOffice,Country,Director,Genre,Language,Metascore,Production,Rated,Released,Runtime,Title,Type,Year,imdbID,imdbRating,totalSeasons
0,,USA,Frank Borzage,"Drama, Romance",English,,Fox,Not Rated,30 Oct 1927,110 min,7th Heaven,movie,1927,tt0018379,7.8,
0,,USA,"William A. Wellman, Harry d'Abbadie d'Arrast","Drama, Romance, War, Action",English,,Unknown,PG-13,05 Jan 1929,144 min,Wings,movie,1927,tt0018578,7.5,
0,,USA,Irving Cummings,"Romance, Western","English, Spanish, Italian",,20th Century Fox Film Corporation,PASSED,20 Jan 1929,95 min,In Old Arizona,movie,1928,tt0020018,5.7,
0,,USA,Alfred E. Green,"Biography, Drama, History",English,,A & E,PASSED,01 Nov 1929,90 min,Disraeli,movie,1929,tt0019823,6.4,
0,,USA,Ernst Lubitsch,"Comedy, Musical, Romance","English, French",,Paramount Pictures,APPROVED,18 Jan 1930,107 min,The Love Parade,movie,1929,tt0020112,7.2,
0,,USA,John Ford,Drama,"English, Italian, Swedish",,United Artists,PASSED,26 Dec 1931,108 min,Arrowsmith,movie,1931,tt0021622,6.2,
0,,USA,Frank Borzage,"Drama, Romance",English,,,Passed,13 Sep 1931,90 min,Bad Girl,movie,1931,tt0021635,6.6,
0,,USA,King Vidor,"Drama, Sport",English,,MGM,Passed,21 Nov 1931,86 min,The Champ,movie,1931,tt0021730,7.3,
0,,USA,Mervyn LeRoy,"Crime, Drama",English,,Vitaphone Corporation,NOT RATED,26 Sep 1931,89 min,Five Star Final,movie,1931,tt0021873,7.3,
0,,USA,Ernst Lubitsch,"Comedy, Romance, Musical","English, French",,Paramount Pictures,Passed,01 Aug 1931,93 min,The Smiling Lieutenant,movie,1931,tt0022074,7.7,


In [112]:
df_clean_all.columns

Index(['BoxOffice', 'Country', 'Director', 'Genre', 'Language', 'Metascore',
       'Production', 'Rated', 'Released', 'Runtime', 'Title', 'Type', 'Year',
       'imdbID', 'imdbRating', 'totalSeasons'],
      dtype='object')

In [113]:
df_clean_all.head()

Unnamed: 0,BoxOffice,Country,Director,Genre,Language,Metascore,Production,Rated,Released,Runtime,Title,Type,Year,imdbID,imdbRating,totalSeasons
0,,USA,Frank Borzage,"Drama, Romance",English,,Fox,Not Rated,30 Oct 1927,110 min,7th Heaven,movie,1927,tt0018379,7.8,
0,,USA,"William A. Wellman, Harry d'Abbadie d'Arrast","Drama, Romance, War, Action",English,,Unknown,PG-13,05 Jan 1929,144 min,Wings,movie,1927,tt0018578,7.5,
0,,USA,Irving Cummings,"Romance, Western","English, Spanish, Italian",,20th Century Fox Film Corporation,PASSED,20 Jan 1929,95 min,In Old Arizona,movie,1928,tt0020018,5.7,
0,,USA,Alfred E. Green,"Biography, Drama, History",English,,A & E,PASSED,01 Nov 1929,90 min,Disraeli,movie,1929,tt0019823,6.4,
0,,USA,Ernst Lubitsch,"Comedy, Musical, Romance","English, French",,Paramount Pictures,APPROVED,18 Jan 1930,107 min,The Love Parade,movie,1929,tt0020112,7.2,


In [115]:
df_clean_all.to_csv('data/df_clean_all.csv', index = False)

# Previewing the errors list, converting to a data frame, and exporting

In [100]:
errors_list_all

['http://www.omdbapi.com/?t=The+Racket&apikey=cd16ab6d&y=1927',
 'http://www.omdbapi.com/?t=The+Broadway+Melody&apikey=cd16ab6d&y=1928',
 'http://www.omdbapi.com/?t=Hollywood+Revue&apikey=cd16ab6d&y=1928',
 'http://www.omdbapi.com/?t=All+Quiet+on+the+Western+Front&apikey=cd16ab6d&y=1929',
 'http://www.omdbapi.com/?t=The+Big+House&apikey=cd16ab6d&y=1929',
 'http://www.omdbapi.com/?t=The+Divorcee&apikey=cd16ab6d&y=1929',
 'http://www.omdbapi.com/?t=Cimarron&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=East+Lynne&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=The+Front+Page&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=Skippy&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=Trader+Horn&apikey=cd16ab6d&y=1930',
 'http://www.omdbapi.com/?t=Grand+Hotel&apikey=cd16ab6d&y=1931',
 'http://www.omdbapi.com/?t=One+Hour+with+You&apikey=cd16ab6d&y=1931',
 'http://www.omdbapi.com/?t=Shanghai+Express&apikey=cd16ab6d&y=1931',
 'http://www.omdbapi.com/?t=Cavalcade&apikey=cd16ab6

In [101]:
error_df = pd.DataFrame(errors_list_all)
error_df

Unnamed: 0,0
0,http://www.omdbapi.com/?t=The+Racket&apikey=cd...
1,http://www.omdbapi.com/?t=The+Broadway+Melody&...
2,http://www.omdbapi.com/?t=Hollywood+Revue&apik...
3,http://www.omdbapi.com/?t=All+Quiet+on+the+Wes...
4,http://www.omdbapi.com/?t=The+Big+House&apikey...
5,http://www.omdbapi.com/?t=The+Divorcee&apikey=...
6,http://www.omdbapi.com/?t=Cimarron&apikey=cd16...
7,http://www.omdbapi.com/?t=East+Lynne&apikey=cd...
8,http://www.omdbapi.com/?t=The+Front+Page&apike...
9,http://www.omdbapi.com/?t=Skippy&apikey=cd16ab...


In [None]:
error_df.to_csv("data/error_df.csv")

# Reading the full list of Best Picture nominees so that I don't have to continue scraping to get the data frame produced

In [7]:
best_films = pd.read_csv('data/df_clean_all.csv')

In [8]:
best_films

Unnamed: 0,Country,Director,Genre,Language,Metascore,Production,Rated,Released,Runtime,Title,Year,imdbID,imdbRating
0,USA,Frank Borzage,"Drama, Romance",English,,Fox,Not Rated,30-Oct-27,110 min,7th Heaven,1927,tt0018379,7.8
1,USA,"William A. Wellman, Harry d'Abbadie d'Arrast","Drama, Romance, War, Action",English,,Unknown,PG-13,5-Jan-29,144 min,Wings,1927,tt0018578,7.5
2,USA,Irving Cummings,"Romance, Western",English,,20th Century Fox Film Corporation,PASSED,20-Jan-29,95 min,In Old Arizona,1928,tt0020018,5.7
3,USA,Alfred E. Green,"Biography, Drama, History",English,,A & E,PASSED,1-Nov-29,90 min,Disraeli,1929,tt0019823,6.4
4,USA,Ernst Lubitsch,"Comedy, Musical, Romance",English,,Paramount Pictures,APPROVED,18-Jan-30,107 min,The Love Parade,1929,tt0020112,7.2
5,USA,John Ford,Drama,English,,United Artists,PASSED,26-Dec-31,108 min,Arrowsmith,1931,tt0021622,6.2
6,USA,Frank Borzage,"Drama, Romance",English,,,Passed,13-Sep-31,90 min,Bad Girl,1931,tt0021635,6.6
7,USA,King Vidor,"Drama, Sport",English,,MGM,Passed,21-Nov-31,86 min,The Champ,1931,tt0021730,7.3
8,USA,Mervyn LeRoy,"Crime, Drama",English,,Vitaphone Corporation,NOT RATED,26-Sep-31,89 min,Five Star Final,1931,tt0021873,7.3
9,USA,Ernst Lubitsch,"Comedy, Romance, Musical",English,,Paramount Pictures,Passed,1-Aug-31,93 min,The Smiling Lieutenant,1931,tt0022074,7.7


In [119]:
best_films.loc[best_films['Released'] == 'NaT']

# # best_films = best_films.Released.replace('N/A', '08 Nov 1962')
# best_films['Released'] = pd.to_datetime(best_films['Released'], format = '%d %b %Y')
# best_films['month'] = pd.DatetimeIndex(best_films['Released']).month

In [121]:
best_films.head(40)

Unnamed: 0,BoxOffice,Country,Director,Genre,Language,Metascore,Production,Rated,Released,Runtime,Title,Type,Year,imdbID,imdbRating,totalSeasons,month
0,,USA,Frank Borzage,"Drama, Romance",English,,Fox,Not Rated,1927-10-30,110 min,7th Heaven,movie,1927,tt0018379,7.8,,10.0
1,,USA,"William A. Wellman, Harry d'Abbadie d'Arrast","Drama, Romance, War, Action",English,,Unknown,PG-13,1929-01-05,144 min,Wings,movie,1927,tt0018578,7.5,,1.0
2,,USA,Irving Cummings,"Romance, Western","English, Spanish, Italian",,20th Century Fox Film Corporation,PASSED,1929-01-20,95 min,In Old Arizona,movie,1928,tt0020018,5.7,,1.0
3,,USA,Alfred E. Green,"Biography, Drama, History",English,,A & E,PASSED,1929-11-01,90 min,Disraeli,movie,1929,tt0019823,6.4,,11.0
4,,USA,Ernst Lubitsch,"Comedy, Musical, Romance","English, French",,Paramount Pictures,APPROVED,1930-01-18,107 min,The Love Parade,movie,1929,tt0020112,7.2,,1.0
5,,USA,John Ford,Drama,"English, Italian, Swedish",,United Artists,PASSED,1931-12-26,108 min,Arrowsmith,movie,1931,tt0021622,6.2,,12.0
6,,USA,Frank Borzage,"Drama, Romance",English,,,Passed,1931-09-13,90 min,Bad Girl,movie,1931,tt0021635,6.6,,9.0
7,,USA,King Vidor,"Drama, Sport",English,,MGM,Passed,1931-11-21,86 min,The Champ,movie,1931,tt0021730,7.3,,11.0
8,,USA,Mervyn LeRoy,"Crime, Drama",English,,Vitaphone Corporation,NOT RATED,1931-09-26,89 min,Five Star Final,movie,1931,tt0021873,7.3,,9.0
9,,USA,Ernst Lubitsch,"Comedy, Romance, Musical","English, French",,Paramount Pictures,Passed,1931-08-01,93 min,The Smiling Lieutenant,movie,1931,tt0022074,7.7,,8.0


In [9]:
error_urls = pd.read_csv('data/error_df.csv')

In [11]:
error_urls.head()

Unnamed: 0,title
0,http://www.omdbapi.com/?t=The+Racket&apikey=cd...
1,http://www.omdbapi.com/?t=The+Broadway+Melody&...
2,http://www.omdbapi.com/?t=Hollywood+Revue&apik...
3,http://www.omdbapi.com/?t=All+Quiet+on+the+Wes...
4,http://www.omdbapi.com/?t=The+Big+House&apikey...


In [15]:
urls_2 = error_urls['title']

In [16]:
errors_info, errors_errors = webscraper(urls_2)

In [18]:
errors_info

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Language,Country,Metascore,imdbRating,imdbID,Type,BoxOffice,Production
0,The Racket,1928,PASSED,01 Nov 1928,84 min,"Crime, Drama, Film-Noir",Lewis Milestone,English,USA,,6.9,tt0019304,movie,,Paramount Pictures
0,The Broadway Melody,1929,Passed,06 Jun 1929,100 min,"Drama, Musical, Romance",Harry Beaumont,English,USA,,5.7,tt0019729,movie,,MGM Home Entertainment
0,The Hollywood Revue of 1929,1929,,23 Nov 1929,130 min,Musical,"Charles Reisner, Christy Cabanne",English,USA,,7.4,tt0019993,movie,,Metro-Goldwyn-Mayer Pictures
0,All Quiet on the Western Front,1930,Not Rated,24 Aug 1930,136 min,"Drama, War",Lewis Milestone,"English, French, German, Latin",USA,91.0,8.0,tt0020629,movie,,Universal Pictures
0,The Big House,1930,Passed,14 Jun 1930,87 min,"Crime, Drama, Thriller","George W. Hill, Ward Wing","English, Russian",USA,,7.2,tt0020686,movie,,Warner Bros.
0,The Divorcee,1930,Passed,19 Apr 1930,84 min,"Romance, Drama",Robert Z. Leonard,"English, French",USA,,6.8,tt0020827,movie,,MGM
0,Cimarron,1931,Passed,09 Feb 1931,123 min,"Drama, Western",Wesley Ruggles,"English, French",USA,,5.9,tt0021746,movie,,MGM Home Entertainment
0,East Lynne,1931,,01 Mar 1931,102 min,Drama,Frank Lloyd,English,USA,,6.9,tt0021826,movie,,
0,The Front Page,1931,TV-PG,04 Apr 1931,101 min,Comedy,Lewis Milestone,English,USA,,6.7,tt0021890,movie,,United Artists
0,Skippy,1931,Approved,25 Apr 1931,85 min,"Comedy, Drama, Family",Norman Taurog,English,USA,,6.3,tt0022397,movie,,Paramount Pictures


In [19]:
errors_info.to_csv("data/errors_info.csv", index = False)