In [1]:
from bs4 import BeautifulSoup
import requests
from urllib.request import urlopen
import pandas as pd
import numpy as np

In [2]:
BASE_URL = 'https://www.imdb.com'

In [3]:
def find_movie_url(title, year):  
    try:
        r = requests.get(url="https://www.imdb.com/find", params={"q": title})
        search_page = urlopen(r.url + "&s=tt")
    except:
        return None
    
    soup = BeautifulSoup(search_page, "lxml")
    movies = soup.find_all('td', class_='result_text')
    for movie in movies:
        search_result_year = get_movie_year(movie)
        if year and search_result_year == year:
            return get_movie_url(movie)
    return None

In [4]:
def get_movie_year(movie):
    movie_string = str(movie)
    start_indices = find_all_indices(movie_string, '(')
    year = None
    for index in start_indices:
        year = movie_string[index + 1 : index + 5]
        try:
            year = int(year)
            break
        except:
            continue
            
    return year

In [5]:
def find_all_indices(string, substring):
    indices = []
    counter = 0
    while string.find(substring) != -1:
        index = string.find(substring)
        indices.append(index + counter)
        counter = counter + index + 1
        string = string[index + 1:]
    return indices

In [6]:
r = requests.get(url="https://www.imdb.com/find", params={"q": "postman"})
search_page = urlopen(r.url + "&s=tt")

soup = BeautifulSoup(search_page, "lxml")
movies = soup.find_all('td', class_='result_text')
print(movies[0])

movie = "<td class=\"result_text\"> <a href=\"/title/tt0119925/\">The Postman</a>(II) (1997) (TV Series)</td>"
print(get_movie_year(movie))

<td class="result_text"> <a href="/title/tt0119925/">The Postman</a> (1997) </td>
1997


In [6]:
def get_movie_url(movie):
    movie_string = str(movie)
    start_index = movie_string.find("href")
    movie_substring = movie_string[start_index:]
    end_index = movie_substring.find("\">") + start_index
    url = movie_string[start_index + 6 : end_index]
    return url

In [7]:
def get_ratings(url):
    if url is None:
        return np.nan, np.nan
    try:
        page_movie = urlopen(BASE_URL + url)
    except:
        return np.nan, np.nan
    soup = BeautifulSoup(page_movie, "lxml")
    
    try:
        user_score_span = soup.find_all('span', itemprop='ratingValue')
        user_score_str = str(user_score_span)
        start_index = user_score_str.find("Value") + 7
        end_index = user_score_str.rfind("span") - 2
        user_score = float(user_score_str[start_index : end_index])
    except:
        user_score = np.nan

    try:
        metascore_div = soup.find_all('div', class_='metacriticScore')
        metascore_str = str(metascore_div)
        start_index = metascore_str.find("span") + 5
        end_index = metascore_str.rfind("span") - 2
        metascore = int(metascore_str[start_index : end_index])
    except:
        metascore = np.nan
        
    return user_score, metascore

In [9]:
url = find_movie_url("Toy Story 3", 2010)
print(get_ratings(url))

(8.3, 92)


In [10]:
df = pd.read_csv("movies.csv")


In [8]:
def clean_title_data(title):
    year = int(title[-5:-1])
    remove_index = title.index("(")
    search_title = title[:remove_index]
    return (search_title, year)

In [14]:
movies_df = df["title"]
user_scores = []
metascores = []

index = 0

while index < movies_df.size / 2:
    try:
        movie = movies_df.iloc[index]
        title, year = clean_title_data(movie)
        url = find_movie_url(title, year)
        user_score, metascore = get_ratings(url)

        user_scores.append(user_score)
        metascores.append(metascore)
        index = index + 1
    except:
        print(index, movie)
        user_scores.append(np.nan)
        metascores.append(np.nan)
        index = index + 1

8463 Babe Ruth Story, The (1948) 
8602 Heroes of Telemark, The (1965) 
8910 Jack's Back (1988) 
9120 After the Rain (Ame agaru) (1999) 
9763 Love Letter (1995) 
9876 Acts of Worship (2001) 
9996 Godzilla vs. Biollante (Gojira vs. Biorante) (1989) 
10057 Godzilla vs. Hedorah (Gojira tai Hedorâ) (Godzilla vs. The Smog Monster) (1971) 
10641 Le créateur (1999) 
11494 R.S.V.P. (2002) 
11603 Blood Trails (2006) 
11630 Godzilla vs. Destroyah (Gojira vs. Desutoroiâ) (1995) 
11912 Return to the 36th Chamber (Shao Lin da peng da shi) (1980) 
12562 Frozen City (Valkoinen kaupunki) (2006) 
13999 Straight from the Barrio (Talento de barrio) (2008) 
15036 Millions Game, The (Das Millionenspiel)
15182 Off and Running (2009) 
15183 Truth, The (2010) 
15184 Wedding Song, The (2008) 
15192 Four Stories of St. Julian (2010) 
15698 Life in Flight (2008) 
15936 City of Pirates (La ville des pirates) (1983) 
16057 Open House (2010) 
16283 White Hell of Pitz Palu, The (Die weiße Hölle vom Piz Palü) (1929) 


In [15]:
df["imdb_user_rating"] = np.nan
df["imdb_metascore"] = np.nan
new_df = pd.DataFrame()
new_df["imdb_user_rating"] = user_scores
new_df["imdb_metascore"] = metascores
df.update(new_df)

In [16]:
df.to_csv('imdb_first_half.csv')

In [20]:
print(len(user_scores), len(metascores))

31020 31020


In [27]:
f = open("imdb.txt", "r")
line = f.readline()
cnt = 1

while line:
    index = int(line.split()[0])
    user_scores.insert(index, np.nan)
    metascores.insert(index, np.nan)
    line = f.readline()
    cnt += 1
    
print(cnt)
f.close()

193


In [35]:
df["imdb_user_rating"] = np.nan
df["imdb_metascore"] = np.nan

In [36]:
new_df = pd.DataFrame()
new_df["imdb_user_rating"] = user_scores
new_df["imdb_metascore"] = metascores
df.update(new_df)

In [38]:
df.to_csv('imdb_first_half.csv')

In [39]:
int(round(movies_df.size / 2))

31212

In [40]:
movies_df = df["title"]
user_scores = []
metascores = []

index = int(round(movies_df.size / 2))

while index < movies_df.size:
    try:
        movie = movies_df.iloc[index]
        title, year = clean_title_data(movie)
        url = find_movie_url(title, year)
        user_score, metascore = get_ratings(url)

        user_scores.append(user_score)
        metascores.append(metascore)
        index = index + 1
    except:
        print(index, movie)
        user_scores.append(np.nan)
        metascores.append(np.nan)
        index = index + 1
    
print(user_scores)
print(metascores)

31220 The Ritual
31251 The Expedition
31399 Chinese Boxes
31845 Danielův svět
32073 The Near Room
32118 Filmage: The Story of Descendents/All
32189 About Sarah
32298 La steppa
32421 Swallows and Amazons
32456 Ready Player One
32660 Los tontos y los estúpidos
32690 Ride in a Pink Car
32741 The Naked Truth (1957) (Your Past Is Showing)
32781 Disaster Playground
32799 Nice Guy
32847 OMG, I'm a Robot!
32880 KillerSaurus
33142 Viva
33335 Ollaan vapaita
33545 Hyena Road
33819 Asphalt Angels
34038 Fakta Ladh Mhana
34136 Man Crazy
34527 Roger la Honte
34669 Sentimentalnyy roman
34705 Yedyanchi Jatra
34717 Dhadakebaaz
34720 Ittefaq
34830 Elämältä kaiken sain
34840 Dil Kya Kare
34843 Hogi Pyar Ki Jeet
34871 Monk by Blood
34885 I Am Syd Stone
34890 Alone With People
35004 Proibito Rubare - Luigi Comencini
35022 Le strelle nel fosso
35194 38 Parrots
35225 The Adventures of Sherlock Holmes and Doctor Watson
35310 Amigo Undead
35555 The Republic 
35620 A Dark Place Inside
35783 Ουζερί Τσιτσάνης
3593

58288 Underneath
58474 The Art of McCartney
58873 Romeo Akbar Walter
58973 The Man with Half a Body
59058 Anatolian Trip
59660 The Guy Who Didn't Like Musicals
59837 A Place of Truth
59905 Мiсто, в якому не ходять грошi
59911 D-Day: The Total Story
60026 Kid Brother
60063 The Saint Lies In Wait
60146 Kopy Kings
60204 RWBY: Volume 5
60247 Tenth of a Second
60248 Time of tears
60317 Man's First Friend
60614 Knox: The Life and Legacy of Scotland's Controversial Reformer
60696 Sherlock: The Blind Banker
60772 cursor
60826 Waiting for the Storm
61001 Eli
61002 In the Tall Grass
61027 1013 Briar Lane
61079 Burdock
61100 Десять минут первого
61143 Falling Inn Love
61324 The Fellowship of the Farmers
61540 NOVA: Prediction by the Numbers
61549 The Night Bulletin
61629 Royal Shakespeare Company: The Tempest
61684 Egy másik életben
61732 Twice
61742 The Black Legend of Mexican Cinema
61756 Chef vs. Science: The Ultimate Kitchen Challenge
61859 Hotel Camarillo
61868 The Evil Inside
61895 Along Ca

In [50]:
ratings_df = df.loc[:, 'imdb_user_rating':'imdb_metascore']
ratings_df.drop(df.index[[31212, 62422]])
ratings_df

Unnamed: 0,imdb_user_rating,imdb_metascore
0,8.3,95.0
1,7.0,
2,6.7,
3,5.9,
4,6.1,
5,8.2,76.0
6,6.3,
7,5.5,
8,5.8,
9,7.2,65.0


In [61]:
temp_df = pd.DataFrame()
temp_df["imdb_user_rating"] = user_scores
temp_df["imdb_metascore"] = metascores
metascores

[nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 70,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 70,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 74,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 64,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 80,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 na

In [62]:
df.loc[df.index[31212]:, "imdb_user_rating"] = user_scores
df.loc[df.index[31212]:, "imdb_metascore"] = metascores

df.iloc[31212:31222]

Unnamed: 0,movieId,title,genres,imdb_user_rating,imdb_metascore
31212,138024,"Michael Jackson: Dangerous Tour (Bucharest, 19...",(no genres listed),9.1,
31213,138026,Michael Jackson: Number Ones (2003),(no genres listed),8.7,
31214,138028,Tsumugi (2004),Drama|Romance,3.7,
31215,138030,The Chinese Mayor (2015),Documentary|Drama,7.7,
31216,138032,Hunger (1974),Animation,6.9,
31217,138034,The Border (2007),Drama,6.7,
31218,138036,The Man from U.N.C.L.E. (2015),Action|Adventure|Comedy,7.3,
31219,138038,The Widowmaker (2015),Documentary,8.1,70.0
31220,138042,The Ritual,(no genres listed),,
31221,138044,Reuniting the Rubins (2012),Children|Comedy|Drama,,


In [63]:
df.to_csv('imdb.csv')

In [68]:
def get_metascore(url):
    if url is None:
        return np.nan, np.nan
    try:
        page_movie = urlopen(BASE_URL + url)
    except:
        return np.nan, np.nan
    soup = BeautifulSoup(page_movie, "lxml")

    try:
        metascore_div = soup.find_all('div', class_='metacriticScore')
        metascore_str = str(metascore_div)
        start_index = metascore_str.find("span") + 5
        end_index = metascore_str.rfind("span") - 2
        metascore = int(metascore_str[start_index : end_index])
    except:
        metascore = np.nan
        
    return metascore

In [None]:
metascores_df = df["imdb_metascore"]
index = 0

while index < metascores_df.size:
    score = metascores_df.iloc[index]
    try:
        if np.isnan(score):
            movie = movies_df.iloc[index]
            title, year = clean_title_data(movie)
            url = find_movie_url(title, year)
            metascore = get_metascore(url)
            df.loc[df.index[index], "imdb_metascore"] = metascore
        index = index + 1
    except:
        print(index, movie)
        index = index + 1

106 Catwalk (1996)
126 Jupiter's Wife (1994)
140 Shadows (Cienie) (1988)
221 Don Juan DeMarco (1995)
240 Gordy (1995)
282 Beyond Bedlam (1993)
341 Backbeat (1993)
397 Open Season (1996)
398 Two Crimes (Dos crímenes) (1995)
417 Blink (1994)
464 House of the Spirits, The (1993)
564 Wedding Gift, The (1994)
575 Dear Diary (Caro Diario) (1994)
590 Window to Paris (Okno v Parizh) (1994)
607 Bread and Chocolate (Pane e cioccolata) (1973)
646 Yankee Zulu (1994)
663 Hostile Intentions (1994)
665 Some Folks Call It a Sling Blade (1993)
675 Promise, The (Versprechen, Das) (1995)
707 Haunted World of Edward D. Wood Jr., The (1996)
740 Carmen Miranda: Bananas Is My Business (1994)
803 Crude Oasis, The (1995)
807 Regular Guys (Echte Kerle) (1996)
808 Women Robbers (Diebinnen) (1995)
823 Eyes Without a Face (Yeux sans visage, Les) (1959)
846 Small Faces (1996)
849 Death in Brunswick (1991)
869 Baton Rouge (Bâton rouge) (1988)
987 Love Bug, The (1969)
1001 Three Caballeros, The (1945)
1003 So Dear to

7141 Lost Skeleton of Cadavra, The (2002)
7164 Silent Night, Bloody Night (1973)
7170 After the Life (2002)
7179 Allegro non troppo (1977)
7181 Herod's Law (Ley de Herodes, La) (2000)
7231 Night Crossing (1981)
7331 Foreign Land (Terra Estrangeira) (1996)
7339 Naked Prey, The (1966)
7369 In This Our Life (1942)
7417 Comedy of Terrors, The (1964)
7436 Tuesdays with Morrie (1999)
7443 Winter Sleepers (Winterschläfer) (2000)
7485 Thin Man Goes Home, The (1945)
7508 Countess Dracula (1972)
7521 Master of the Flying Guillotine (Du bi quan wang da po xue di zi) (1975)
7559 Yakuza, The (1975)
7596 10 Rillington Place (1971)
7635 Sex: The Annabel Chong Story (1999)
7654 Jin Roh: The Wolf Brigade (Jin-Rô) (1998)
7672 Golden Coach, The (Le carrosse d'or) (1953)
7748 Bright Future (Akarui mirai) (2003)
7806 Johnny Eager (1942)
7861 Plain Dirty (a.k.a. Briar Patch) (2003)
7893 Pull My Daisy (1958)
7939 The Night of the Following Day (1968)
8003 Plainsman, The (1937)
8027 Ruby Cairo (1993)
8037 Gun

14886 Whirlpool (1949)
14963 Twilight People, The (1973)
15010 Lies (Gojitmal) (1999)
15026 Solstice (1994)
15036 Millions Game, The (Das Millionenspiel)
15040 Kautokeino Rebellion, The (Kautokeino-opprøret) (2008)
15065 Thorn in the Heart, The (2009)
15066 See What I'm Saying: The Deaf Entertainers Documentary (2010)
15074 Crossworlds (1997)
15148 King Lear (Korol Lir) (1971)
15159 Hypothesis of the Stolen Painting, The (L'hypothèse du tableau volé) (1979)
15171 No One Dies in Lily Dale (2010)
15182 Off and Running (2009) 
15183 Truth, The (2010) 
15184 Wedding Song, The (2008) 
15192 Four Stories of St. Julian (2010) 
15292 (Untitled) (2009)
15385 Moth, The (Cma) (1980)
15437 Perfect Human, The (Perfekte Menneske, Det) (1967)
15496 Tramp, The (Awaara) (Awara) (1951)
15572 Mendy: A Question of Faith (2003)
15617 Man I Love, The (1947)
15640 Nightfall (1957)
15676 It's Alive III: Island of the Alive (1987)
15698 Life in Flight (2008) 
15741 Tiger's Tail, The (2006)
15789 State Witness,

19627 Castle of Cloads, The (Pilvilinna) (1970)
19638 Carne de gallina (Chicken Skin) (2002)
19686 Three Degrees Colder (3° kälter) (2005)
19727 Lesson Plan (2011) 
19749 Harvest: 3,000 Years (Mirt Sost Shi Amit) (1976)
19771 Tie Xi Qu: West of the Tracks (Tiexi qu) (2003)
19772 Night at the Crossroads (La nuit du carrefour) (1932)
19810 Modify (2005) 
19811 Hiroshima (2005) 
19845 London Paris New York (2012)
19863 Bad Blood: A Cautionary Tale (2010) 
19874 Ill-Fated Love (Doomed Love) (Amor de Perdição) (1979)
19877 Wedding Trough (Vase de noces) (1975)
19880 Days of Grace (Días de gracia) (2011) 
19908 Liz & Dick (2012) 
19913 Projectionist, The (1971)
19917 Blutzbrüdaz (2013)
19922 Back to 1942 (2012) 
19937 Woody Allen: A Documentary (2012)
19941 Ghoulies III: Ghoulies Go to College (1991)
19942 How to Make Money Selling Drugs (2013)
19959 Ed Hardy: Tattoo the World (2010) 
19960 Vile (2011) 
19970 Please Remove Your Shoes (2010) 
19975 ATF (1999) 
20010 Bashu, the Little Stranger

24539 Mr. Denning Drives North (1952)
24553 Vice Raid (1960)
24567 Maniac Cop 3: Badge of Silence (1993)
24717 Little Fridolf Becomes a Grandfather (1957)
24736 Mask of the Avenger (1951)
24817 Gun for a Coward (1957)
24826 Scorpion Spring (1996)
24828 The Naughty Flirt (1931)
24868 Tomorrow You're Gone (2013)
24931 Pretty Devils (2000)
24941 The Bates Haunting (2013)
24942 The Rainbow Tribe (2011)
24954 Heaven with a Gun (1969)
24975 The Piano Player (2003)
24981 National Lampoon's Movie Madness (1983)
25000 Jack Frost (1979)
25074 Untitled Spider-Man Reboot (2017)
25102 Magic Boy (1961)
25130 Stalingrad (1989)
25134 Goodbye, Mr. President (1987)
25155 The Geisha Boy (1959)
25162 The Expedition to the End of the World (2014)
25165 The Radio Burglary (1951)
25180 Julius Caesar (1971)
25196 See Girl Run (2013)
25210 The Flesh and the Fiends (1961)
25216 Cry, the Beloved Country (1952)
25230 Katherine (1975)
25273 The House Under the Rocks (1959)
25292 Our Italian Husband (2006)
25318 Th

In [None]:
df

In [12]:
# Load user, ratings, and movie data
users_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(
    './ml-100k/u.user', sep='|', names=users_cols, encoding='latin-1')

ratings_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv(
    './ml-100k/u.data', sep='\t', names=ratings_cols, encoding='latin-1')

# The movies file contains a binary feature for each genre.
genre_cols = [
    "genre_unknown", "Action", "Adventure", "Animation", "Children", "Comedy",
    "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
    "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"
]
movies_cols = [
    'movie_id', 'title', 'release_date', "video_release_date", "imdb_url"
] + genre_cols
movies = pd.read_csv('./ml-100k/u.item', sep='|', names=movies_cols, encoding='latin-1')

In [17]:
movies

Unnamed: 0,movie_id,title,release_date,video_release_date,imdb_url,genre_unknown,Action,Adventure,Animation,Children,...,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,year,imdb_user_rating,imdb_metascore
0,0,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,1995,,
1,1,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,1,0,0,1995,,
2,2,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,1,0,0,1995,,
3,3,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,1995,,
4,4,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,1,0,0,1995,,
5,5,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,01-Jan-1995,,http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...,0,0,0,0,0,...,0,0,0,0,0,0,0,1995,,
6,6,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,...,0,0,0,1,0,0,0,1995,,
7,7,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,...,0,0,0,0,0,0,0,1995,,
8,8,Dead Man Walking (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,...,0,0,0,0,0,0,0,1995,,
9,9,Richard III (1995),22-Jan-1996,,http://us.imdb.com/M/title-exact?Richard%20III...,0,0,0,0,0,...,0,0,0,0,0,1,0,1996,,


In [14]:
# 0-index data
users["user_id"] = users["user_id"].apply(lambda x: str(x-1))
movies["movie_id"] = movies["movie_id"].apply(lambda x: str(x-1))
movies["year"] = movies['release_date'].apply(lambda x: str(x).split('-')[-1])
ratings["movie_id"] = ratings["movie_id"].apply(lambda x: str(x-1))
ratings["user_id"] = ratings["user_id"].apply(lambda x: str(x-1))
ratings["rating"] = ratings["rating"].apply(lambda x: float(x))

In [16]:
movies["imdb_user_rating"] = np.nan
movies["imdb_metascore"] = np.nan

In [18]:
movies_df = movies["title"]
user_scores = []
metascores = []
index = 0

while index < movies_df.size:
    try:
        movie = movies_df.iloc[index]
        title, year = clean_title_data(movie)
        url = find_movie_url(title, year)
        user_score, metascore = get_ratings(url)

        user_scores.append(user_score)
        metascores.append(metascore)
        #movies.loc[movies.index[index], "imdb_user_rating"] = user_score
        #movies.loc[movies.index[index], "imdb_metascore"] = metascore
        print(index, movie)
        index = index + 1
        
    except:
        print('ERROR')
        print(index, movie)
        user_scores.append(np.nan)
        metascores.append(np.nan)
        index = index + 1

0 Toy Story (1995)
1 GoldenEye (1995)
2 Four Rooms (1995)
3 Get Shorty (1995)
4 Copycat (1995)
5 Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)
6 Twelve Monkeys (1995)
7 Babe (1995)
8 Dead Man Walking (1995)
9 Richard III (1995)
10 Seven (Se7en) (1995)
11 Usual Suspects, The (1995)
12 Mighty Aphrodite (1995)
13 Postino, Il (1994)
14 Mr. Holland's Opus (1995)
15 French Twist (Gazon maudit) (1995)
16 From Dusk Till Dawn (1996)
17 White Balloon, The (1995)
18 Antonia's Line (1995)
19 Angels and Insects (1995)
20 Muppet Treasure Island (1996)
21 Braveheart (1995)
22 Taxi Driver (1976)
23 Rumble in the Bronx (1995)
24 Birdcage, The (1996)
25 Brothers McMullen, The (1995)
26 Bad Boys (1995)
27 Apollo 13 (1995)
28 Batman Forever (1995)
29 Belle de jour (1967)
30 Crimson Tide (1995)
31 Crumb (1994)
32 Desperado (1995)
33 Doom Generation, The (1995)
34 Free Willy 2: The Adventure Home (1995)
35 Mad Love (1995)
36 Nadja (1994)
37 Net, The (1995)
38 Strange Days (1995)
39 To Wong Foo, Thank

287 Scream (1996)
288 Evita (1996)
289 Fierce Creatures (1997)
290 Absolute Power (1997)
291 Rosewood (1997)
292 Donnie Brasco (1997)
293 Liar Liar (1997)
294 Breakdown (1997)
295 Promesse, La (1996)
296 Ulee's Gold (1997)
297 Face/Off (1997)
298 Hoodlum (1997)
299 Air Force One (1997)
300 In & Out (1997)
301 L.A. Confidential (1997)
302 Ulee's Gold (1997)
303 Fly Away Home (1996)
304 Ice Storm, The (1997)
305 Mrs. Brown (Her Majesty, Mrs. Brown) (1997)
306 Devil's Advocate, The (1997)
307 FairyTale: A True Story (1997)
308 Deceiver (1997)
309 Rainmaker, The (1997)
310 Wings of the Dove, The (1997)
311 Midnight in the Garden of Good and Evil (1997)
312 Titanic (1997)
313 3 Ninjas: High Noon At Mega Mountain (1998)
314 Apt Pupil (1998)
315 As Good As It Gets (1997)
316 In the Name of the Father (1993)
317 Schindler's List (1993)
318 Everyone Says I Love You (1996)
319 Paradise Lost: The Child Murders at Robin Hood Hills (1996)
320 Mother (1996)
321 Murder at 1600 (1997)
322 Dante's Peak

574 City Slickers II: The Legend of Curly's Gold (1994)
575 Cliffhanger (1993)
576 Coneheads (1993)
577 Demolition Man (1993)
578 Fatal Instinct (1993)
579 Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995)
580 Kalifornia (1993)
581 Piano, The (1993)
582 Romeo Is Bleeding (1993)
583 Secret Garden, The (1993)
584 Son in Law (1993)
585 Terminal Velocity (1994)
586 Hour of the Pig, The (1993)
587 Beauty and the Beast (1991)
588 Wild Bunch, The (1969)
589 Hellraiser: Bloodline (1996)
590 Primal Fear (1996)
591 True Crime (1995)
592 Stalingrad (1993)
593 Heavy (1995)
594 Fan, The (1996)
595 Hunchback of Notre Dame, The (1996)
596 Eraser (1996)
597 Big Squeeze, The (1996)
598 Police Story 4: Project S (Chao ji ji hua) (1993)
599 Daniel Defoe's Robinson Crusoe (1996)
600 For Whom the Bell Tolls (1943)
601 American in Paris, An (1951)
602 Rear Window (1954)
603 It Happened One Night (1934)
604 Meet Me in St. Louis (1944)
605 All About Eve (1950)
606 Rebecca (1940)
607 Spellbou

869 Touch (1997)
870 Vegas Vacation (1997)
871 Love Jones (1997)
872 Picture Perfect (1997)
873 Career Girls (1997)
874 She's So Lovely (1997)
875 Money Talks (1997)
876 Excess Baggage (1997)
877 That Darn Cat! (1997)
878 Peacemaker, The (1997)
879 Soul Food (1997)
880 Money Talks (1997)
881 Washington Square (1997)
882 Telling Lies in America (1997)
883 Year of the Horse (1997)
884 Phantoms (1998)
885 Life Less Ordinary, A (1997)
886 Eve's Bayou (1997)
887 One Night Stand (1997)
888 Tango Lesson, The (1997)
889 Mortal Kombat: Annihilation (1997)
890 Bent (1997)
891 Flubber (1997)
892 For Richer or Poorer (1997)
893 Home Alone 3 (1997)
894 Scream 2 (1997)
895 Sweet Hereafter, The (1997)
896 Time Tracers (1995)
897 Postman, The (1997)
898 Winter Guest, The (1997)
899 Kundun (1997)
900 Mr. Magoo (1997)
901 Big Lebowski, The (1998)
902 Afterglow (1997)
903 Ma vie en rose (My Life in Pink) (1997)
904 Great Expectations (1998)
905 Oscar & Lucinda (1997)
906 Vermin (1998)
907 Half Baked (199

1157 Fille seule, La (A Single Girl) (1995)
1158 Stalker (1979)
1159 Love! Valour! Compassion! (1997)
1160 Palookaville (1996)
1161 Phat Beach (1996)
1162 Portrait of a Lady, The (1996)
1163 Zeus and Roxanne (1997)
1164 Big Bully (1996)
1165 Love & Human Remains (1993)
1166 Sum of Us, The (1994)
1167 Little Buddha (1993)
1168 Fresh (1994)
1169 Spanking the Monkey (1994)
1170 Wild Reeds (1994)
1171 Women, The (1939)
1172 Bliss (1997)
1173 Caught (1996)
1174 Hugo Pool (1997)
1175 Welcome To Sarajevo (1997)
1176 Dunston Checks In (1996)
1177 Major Payne (1994)
1178 Man of the House (1995)
1179 I Love Trouble (1994)
1180 Low Down Dirty Shame, A (1994)
1181 Cops and Robbersons (1994)
1182 Cowboy Way, The (1994)
1183 Endless Summer 2, The (1994)
1184 In the Army Now (1994)
1185 Inkwell, The (1994)
1186 Switchblade Sisters (1975)
1187 Young Guns II (1990)
1188 Prefontaine (1997)
1189 That Old Feeling (1997)
1190 Letter From Death Row, A (1998)
1191 Boys of St. Vincent, The (1993)
1192 Before 

1433 Shooting Fish (1997)
1434 Steal Big, Steal Little (1995)
1435 Mr. Jones (1993)
1436 House Party 3 (1994)
1437 Panther (1995)
1438 Jason's Lyric (1994)
1439 Above the Rim (1994)
1440 Moonlight and Valentino (1995)
1441 Scarlet Letter, The (1995)
1442 8 Seconds (1994)
1443 That Darn Cat! (1965)
1444 Ladybird Ladybird (1994)
1445 Bye Bye, Love (1995)
1446 Century (1993)
1447 My Favorite Season (1993)
1448 Pather Panchali (1955)
1449 Golden Earrings (1947)
1450 Foreign Correspondent (1940)
1451 Lady of Burlesque (1943)
1452 Angel on My Shoulder (1946)
1453 Angel and the Badman (1947)
1454 Outlaw, The (1943)
1455 Beat the Devil (1954)
1456 Love Is All There Is (1996)
1457 Damsel in Distress, A (1937)
1458 Madame Butterfly (1995)
1459 Sleepover (1995)
1460 Here Comes Cookie (1935)
1461 Thieves (Voleurs, Les) (1996)
1462 Boys, Les (1997)
1463 Stars Fell on Henrietta, The (1995)
1464 Last Summer in the Hamptons (1995)
1465 Margaret's Museum (1995)
1466 Saint of Fort Washington, The (1993)

In [20]:
movies["imdb_user_rating"] = user_scores
movies["imdb_metascore"] = metascores

In [21]:
movies

Unnamed: 0,movie_id,title,release_date,video_release_date,imdb_url,genre_unknown,Action,Adventure,Animation,Children,...,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,year,imdb_user_rating,imdb_metascore
0,0,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,1995,8.3,95.0
1,1,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,1,0,0,1995,7.2,65.0
2,2,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,1,0,0,1995,6.8,
3,3,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,1995,6.9,82.0
4,4,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,1,0,0,1995,6.6,54.0
5,5,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,01-Jan-1995,,http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...,0,0,0,0,0,...,0,0,0,0,0,0,0,1995,7.1,77.0
6,6,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,...,0,0,0,1,0,0,0,1995,8.0,74.0
7,7,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,...,0,0,0,0,0,0,0,1995,6.8,83.0
8,8,Dead Man Walking (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,...,0,0,0,0,0,0,0,1995,7.5,80.0
9,9,Richard III (1995),22-Jan-1996,,http://us.imdb.com/M/title-exact?Richard%20III...,0,0,0,0,0,...,0,0,0,0,0,1,0,1996,7.4,


In [22]:
movies.to_csv("100k_imdb_ratings.csv")