## MovieMetadata

Purpose: To extract relavent information about a movie when its ID is called.

In [4]:
import pandas as pd
import numpy as np


Pull IMDb movie data and add column labels.

{*movieID, title, release date, link to IMDb page, multi-hot encoded genre classification*}

In [5]:
path = 'C:\\Python310\\Projects\\UBarcelona\\ADS\\MovieRecommender\\movieMetadata.item'
movieMeta = pd.read_csv(path, header=None, encoding='latin1', sep='|')
genres = ['unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-fi', 'Thriller', 'War', 'Western']
movieMeta.columns = ['movieID', 'title', 'release', 'drop', 'IMDbLink']+genres
movieMeta = movieMeta.drop('drop',axis=1)
movieMeta.head()


Unnamed: 0,movieID,title,release,IMDbLink,unknown,Action,Adventure,Animation,Children's,Comedy,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [97]:

def get_genres(movieData, genres):
    '''
    Function to extract a list of all genres that are encoded in the multi-hot genre encoding.
    This function returns a dataframe of  'movieID' and listed 'genre'.
    '''
    #Construct backbone
    backbone = movieData[['movieID']]
    listed_genre = []
    for movieID in backbone.values:
        #Pull multi-hot genres
        movie = movieMeta[movieMeta['movieID']==movieID[0]]
        movie_genres = movie[genres]
        mhot_genres = movie_genres.values.reshape(len(genres))
        found = np.where(mhot_genres==1)[0]

        listed_genre.append([genres[ind] for ind in found])
    
    #Add listed genres to backbone
    df = backbone.copy()
    df['genre'] = listed_genre
    return df

In [98]:
all_genres = get_genres(movieMeta, genres)

In [99]:
all_genres

Unnamed: 0,movieID,genre
0,1,"[Animation, Children's, Comedy]"
1,2,"[Action, Adventure, Thriller]"
2,3,[Thriller]
3,4,"[Action, Comedy, Drama]"
4,5,"[Crime, Drama, Thriller]"
...,...,...
1677,1678,[Drama]
1678,1679,"[Romance, Thriller]"
1679,1680,"[Drama, Romance]"
1680,1681,[Comedy]


In [105]:
movieMeta.drop(genres, axis=1)

Unnamed: 0,movieID,title,release,IMDbLink
0,1,Toy Story (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Toy%20Story%2...
1,2,GoldenEye (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?GoldenEye%20(...
2,3,Four Rooms (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Four%20Rooms%...
3,4,Get Shorty (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Get%20Shorty%...
4,5,Copycat (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Copycat%20(1995)
...,...,...,...,...
1677,1678,Mat' i syn (1997),06-Feb-1998,http://us.imdb.com/M/title-exact?Mat%27+i+syn+...
1678,1679,B. Monkey (1998),06-Feb-1998,http://us.imdb.com/M/title-exact?B%2E+Monkey+(...
1679,1680,Sliding Doors (1998),01-Jan-1998,http://us.imdb.com/Title?Sliding+Doors+(1998)
1680,1681,You So Crazy (1994),01-Jan-1994,http://us.imdb.com/M/title-exact?You%20So%20Cr...


In [107]:
allMetadata = pd.merge(movieMeta.drop(genres, axis=1),all_genres, on='movieID')

In [120]:
def get_movie(movie_id, metadata):
    return metadata[metadata['movieID']==movie_id]

In [121]:
get_movie(172, allMetadata)

Unnamed: 0,movieID,title,release,IMDbLink,genre
171,172,"Empire Strikes Back, The (1980)",01-Jan-1980,http://us.imdb.com/M/title-exact?Empire%20Stri...,"[Action, Adventure, Drama, Romance, Sci-fi, War]"
