In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast #to process trees of the Python abstract syntax grammar

## **Import Dataset**

In [None]:
! pip install -q kaggle

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive/kaggle'

In [None]:
!kaggle datasets download -d rounakbanik/the-movies-dataset

Dataset URL: https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset
License(s): CC0-1.0
Downloading the-movies-dataset.zip to /content
 96% 218M/228M [00:01<00:00, 93.0MB/s]
100% 228M/228M [00:01<00:00, 133MB/s] 


In [None]:
import zipfile

zip_ref = zipfile.ZipFile('the-movies-dataset.zip','r')
zip_ref.extractall('./content')



In [None]:
df_credits = pd.read_csv('/content/content/credits.csv')
df_metadata = pd.read_csv('/content/content//movies_metadata.csv')
df_credits.head()

  df_metadata = pd.read_csv('/content/content//movies_metadata.csv')


Unnamed: 0,cast,crew,id
0,"[{'cast_id': 14, 'character': 'Woody (voice)',...","[{'credit_id': '52fe4284c3a36847f8024f49', 'de...",862
1,"[{'cast_id': 1, 'character': 'Alan Parrish', '...","[{'credit_id': '52fe44bfc3a36847f80a7cd1', 'de...",8844
2,"[{'cast_id': 2, 'character': 'Max Goldman', 'c...","[{'credit_id': '52fe466a9251416c75077a89', 'de...",15602
3,"[{'cast_id': 1, 'character': ""Savannah 'Vannah...","[{'credit_id': '52fe44779251416c91011acb', 'de...",31357
4,"[{'cast_id': 1, 'character': 'George Banks', '...","[{'credit_id': '52fe44959251416c75039ed7', 'de...",11862


In [None]:
df_metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 24 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  45466 non-null  object 
 1   belongs_to_collection  4494 non-null   object 
 2   budget                 45466 non-null  object 
 3   genres                 45466 non-null  object 
 4   homepage               7782 non-null   object 
 5   id                     45466 non-null  object 
 6   imdb_id                45449 non-null  object 
 7   original_language      45455 non-null  object 
 8   original_title         45466 non-null  object 
 9   overview               44512 non-null  object 
 10  popularity             45461 non-null  object 
 11  poster_path            45080 non-null  object 
 12  production_companies   45463 non-null  object 
 13  production_countries   45463 non-null  object 
 14  release_date           45379 non-null  object 
 15  re

## **Data Cleaning**

*  Release date is in an object datatype, we should change it to a date.
*  Create a **year** column using the release date column.



In [None]:
df_metadata['release_date'] = pd.to_datetime(df_metadata['release_date'], errors='coerce')
df_metadata['release_date'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 45466 entries, 0 to 45465
Series name: release_date
Non-Null Count  Dtype         
--------------  -----         
45376 non-null  datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 355.3 KB


In [None]:
df_metadata['year'] = df_metadata['release_date'].dt.year

In [None]:
yearcount_df = df_metadata['year'].value_counts().sort_index()
yearcount_df.head()

Unnamed: 0_level_0,count
year,Unnamed: 1_level_1
1874.0,1
1878.0,1
1883.0,1
1887.0,1
1888.0,2


In [None]:
new_metadata = df_metadata.loc[df_metadata['year'] == 2017, ['genres','id','title','year']]
new_metadata.head()

Unnamed: 0,genres,id,title,year
26560,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0
26561,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",141052,Justice League,2017.0
26565,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",284053,Thor: Ragnarok,2017.0
26566,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",283995,Guardians of the Galaxy Vol. 2,2017.0
30536,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'na...",245842,The King's Daughter,2017.0


In [None]:
new_metadata['id'] = new_metadata['id'].astype(int)

In [None]:
df = pd.merge(new_metadata,df_credits,on='id')

In [None]:
pd.set_option('display.max_colwidth',75)
df.head()

Unnamed: 0,genres,id,title,year,cast,crew
0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, 'name': 'Action'}, {'id': ...",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,"[{'cast_id': 1, 'character': 'Captain Jack Sparrow', 'credit_id': '52fe...","[{'credit_id': '52fe4c9cc3a36847f8236a65', 'department': 'Production', ..."
1,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': ...",141052,Justice League,2017.0,"[{'cast_id': 2, 'character': 'Bruce Wayne / Batman', 'credit_id': '535e...","[{'credit_id': '55ef66dbc3a3686f1700a52d', 'department': 'Production', ..."
2,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': ...",284053,Thor: Ragnarok,2017.0,"[{'cast_id': 0, 'character': 'Thor Odinson', 'credit_id': '545d46a80e0a...","[{'credit_id': '56a93fa4c3a36872db001e7a', 'department': 'Writing', 'ge..."
3,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': ...",283995,Guardians of the Galaxy Vol. 2,2017.0,"[{'cast_id': 3, 'character': 'Peter Quill / Star-Lord', 'credit_id': '5...","[{'credit_id': '59171547925141583c0315a6', 'department': 'Sound', 'gend..."
4,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'name': 'Action'}, {'id': 12...",245842,The King's Daughter,2017.0,"[{'cast_id': 0, 'character': 'King Louis XIV', 'credit_id': '5431dd580e...","[{'credit_id': '5431de49c3a36825d300007e', 'department': 'Directing', '..."


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531 entries, 0 to 530
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   genres  531 non-null    object 
 1   id      531 non-null    int64  
 2   title   531 non-null    object 
 3   year    531 non-null    float64
 4   cast    531 non-null    object 
 5   crew    531 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 25.0+ KB


In [None]:
cols = ['genres','cast','crew']

for col in cols:
  df[col] = df[col].map(lambda x: ast.literal_eval(x))

df.head(3)

Unnamed: 0,genres,id,title,year,cast,crew
0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, 'name': 'Action'}, {'id': ...",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,"[{'cast_id': 1, 'character': 'Captain Jack Sparrow', 'credit_id': '52fe...","[{'credit_id': '52fe4c9cc3a36847f8236a65', 'department': 'Production', ..."
1,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': ...",141052,Justice League,2017.0,"[{'cast_id': 2, 'character': 'Bruce Wayne / Batman', 'credit_id': '535e...","[{'credit_id': '55ef66dbc3a3686f1700a52d', 'department': 'Production', ..."
2,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': ...",284053,Thor: Ragnarok,2017.0,"[{'cast_id': 0, 'character': 'Thor Odinson', 'credit_id': '545d46a80e0a...","[{'credit_id': '56a93fa4c3a36872db001e7a', 'department': 'Writing', 'ge..."


In [None]:
pd.set_option('display.max_colwidth',200)
df['genres'].head(3)

Unnamed: 0,genres
0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, 'name': 'Action'}, {'id': 14, 'name': 'Fantasy'}, {'id': 35, 'name': 'Comedy'}]"
1,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 878, 'name': 'Science Fiction'}]"
2,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 878, 'name': 'Science Fiction'}]"


In [None]:
def make_genresList(x):
  gen = []
  st = " "
  for i in x:
    if i.get('name') == 'Science Fiction':
      scifi = 'Sci-Fi'
      gen.append(scifi)
    else:
      gen.append(i.get('name'))
  if gen == []:
    return np.NaN
  else:
    return (st.join(gen))

df['genres_list']  = df['genres'].map(lambda x: make_genresList(x))
df['genres_list'].head()

Unnamed: 0,genres_list
0,Adventure Action Fantasy Comedy
1,Action Adventure Fantasy Sci-Fi
2,Action Adventure Fantasy Sci-Fi
3,Action Adventure Comedy Sci-Fi
4,Fantasy Action Adventure


### **Main Actor**

In [None]:
pd.set_option('display.max_colwidth',250)
df['cast'].head(3)



Unnamed: 0,cast
0,"[{'cast_id': 1, 'character': 'Captain Jack Sparrow', 'credit_id': '52fe4c9cc3a36847f8236a53', 'gender': 2, 'id': 85, 'name': 'Johnny Depp', 'order': 0, 'profile_path': '/kbWValANhZI8rbWZXximXuMN4UN.jpg'}, {'cast_id': 9, 'character': 'Captain Arma..."
1,"[{'cast_id': 2, 'character': 'Bruce Wayne / Batman', 'credit_id': '535e68db0e0a264fe10065f5', 'gender': 2, 'id': 880, 'name': 'Ben Affleck', 'order': 0, 'profile_path': '/cPuPt6mYJ83DjvO3hbjNGug6Fbi.jpg'}, {'cast_id': 1, 'character': 'Clark Kent ..."
2,"[{'cast_id': 0, 'character': 'Thor Odinson', 'credit_id': '545d46a80e0a261fb3004e81', 'gender': 2, 'id': 74568, 'name': 'Chris Hemsworth', 'order': 0, 'profile_path': '/tlkDiLn2G75Xr7m1ybK8QFzZBso.jpg'}, {'cast_id': 1, 'character': 'Loki', 'credi..."


In [None]:
def actor_list(x):
  cast = []
  st = " "
  for i in x:
    cast.append(i.get('name'))
  if cast == []:
    return np.NaN
  else:
    # cast = cast[:3]
    return cast[0]

df['actor_1_name']  = df['cast'].map(lambda x: actor_list(x))
df['actor_1_name'].head(3)

Unnamed: 0,actor_1_name
0,Johnny Depp
1,Ben Affleck
2,Chris Hemsworth


In [None]:
df.head(3)

Unnamed: 0,genres,id,title,year,cast,crew,genres_list,actor_1_name
0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, 'name': 'Action'}, {'id': 14, 'name': 'Fantasy'}, {'id': 35, 'name': 'Comedy'}]",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,"[{'cast_id': 1, 'character': 'Captain Jack Sparrow', 'credit_id': '52fe4c9cc3a36847f8236a53', 'gender': 2, 'id': 85, 'name': 'Johnny Depp', 'order': 0, 'profile_path': '/kbWValANhZI8rbWZXximXuMN4UN.jpg'}, {'cast_id': 9, 'character': 'Captain Arma...","[{'credit_id': '52fe4c9cc3a36847f8236a65', 'department': 'Production', 'gender': 2, 'id': 770, 'job': 'Producer', 'name': 'Jerry Bruckheimer', 'profile_path': '/lQu9eRzRX264j2LFQwvKC50wb9s.jpg'}, {'credit_id': '592f443cc3a3685cf7000019', 'departm...",Adventure Action Fantasy Comedy,Johnny Depp
1,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 878, 'name': 'Science Fiction'}]",141052,Justice League,2017.0,"[{'cast_id': 2, 'character': 'Bruce Wayne / Batman', 'credit_id': '535e68db0e0a264fe10065f5', 'gender': 2, 'id': 880, 'name': 'Ben Affleck', 'order': 0, 'profile_path': '/cPuPt6mYJ83DjvO3hbjNGug6Fbi.jpg'}, {'cast_id': 1, 'character': 'Clark Kent ...","[{'credit_id': '55ef66dbc3a3686f1700a52d', 'department': 'Production', 'gender': 0, 'id': 282, 'job': 'Producer', 'name': 'Charles Roven', 'profile_path': '/4uJLoVstC1CBcArXFOe53N2fDr1.jpg'}, {'credit_id': '594287cfc3a3686c2c00bc9f', 'department'...",Action Adventure Fantasy Sci-Fi,Ben Affleck
2,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 878, 'name': 'Science Fiction'}]",284053,Thor: Ragnarok,2017.0,"[{'cast_id': 0, 'character': 'Thor Odinson', 'credit_id': '545d46a80e0a261fb3004e81', 'gender': 2, 'id': 74568, 'name': 'Chris Hemsworth', 'order': 0, 'profile_path': '/tlkDiLn2G75Xr7m1ybK8QFzZBso.jpg'}, {'cast_id': 1, 'character': 'Loki', 'credi...","[{'credit_id': '56a93fa4c3a36872db001e7a', 'department': 'Writing', 'gender': 2, 'id': 105643, 'job': 'Storyboard', 'name': 'Jay Oliva', 'profile_path': '/y7EbnuoKFMvU7uZ4aD50jThyNoF.jpg'}, {'credit_id': '551b780b9251414ae6001b7c', 'department': ...",Action Adventure Fantasy Sci-Fi,Chris Hemsworth


In [None]:
def actor_list2(x):
  cast = []
  st = " "
  for i in x:
    cast.append(i.get('name'))
  if cast == []  or len(cast)<=1:
    return np.NaN
  else:
    # cast = cast[:3]
    return cast[1]

df['actor_2_name']  = df['cast'].map(lambda x: actor_list2(x))
df['actor_2_name'].head(3)

Unnamed: 0,actor_2_name
0,Javier Bardem
1,Henry Cavill
2,Tom Hiddleston


In [None]:
def actor_list3(x):
  cast = []
  st = " "
  for i in x:
    cast.append(i.get('name'))
  if cast == []  or len(cast)<=2:
    return np.NaN
  else:
    # cast = cast[:3]
    return cast[1]

df['actor_3_name']  = df['cast'].map(lambda x: actor_list3(x))
df['actor_3_name'].head(3)

Unnamed: 0,actor_3_name
0,Javier Bardem
1,Henry Cavill
2,Tom Hiddleston


## **Director**

In [None]:
pd.set_option('display.max_colwidth',250)
df['crew'].head(3)

Unnamed: 0,crew
0,"[{'credit_id': '52fe4c9cc3a36847f8236a65', 'department': 'Production', 'gender': 2, 'id': 770, 'job': 'Producer', 'name': 'Jerry Bruckheimer', 'profile_path': '/lQu9eRzRX264j2LFQwvKC50wb9s.jpg'}, {'credit_id': '592f443cc3a3685cf7000019', 'departm..."
1,"[{'credit_id': '55ef66dbc3a3686f1700a52d', 'department': 'Production', 'gender': 0, 'id': 282, 'job': 'Producer', 'name': 'Charles Roven', 'profile_path': '/4uJLoVstC1CBcArXFOe53N2fDr1.jpg'}, {'credit_id': '594287cfc3a3686c2c00bc9f', 'department'..."
2,"[{'credit_id': '56a93fa4c3a36872db001e7a', 'department': 'Writing', 'gender': 2, 'id': 105643, 'job': 'Storyboard', 'name': 'Jay Oliva', 'profile_path': '/y7EbnuoKFMvU7uZ4aD50jThyNoF.jpg'}, {'credit_id': '551b780b9251414ae6001b7c', 'department': ..."


In [None]:
def director(x):
  dir = []
  st = " "
  for i in x:
    if i.get('job') == 'Director':
      dir.append(i.get('name'))
  if dir == []:
    return np.NaN
  else:
    # dir = dir[:3]
    return st.join(dir)

df['director_name']  = df['crew'].map(lambda x: director(x))
df['director_name'].head(3)

Unnamed: 0,director_name
0,Joachim Rønning Espen Sandberg
1,Zack Snyder
2,Taika Waititi


In [None]:
df.head(1)

Unnamed: 0,genres,id,title,year,cast,crew,genres_list,actor_1_name,actor_2_name,actor_3_name,director_name
0,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, 'name': 'Action'}, {'id': 14, 'name': 'Fantasy'}, {'id': 35, 'name': 'Comedy'}]",166426,Pirates of the Caribbean: Dead Men Tell No Tales,2017.0,"[{'cast_id': 1, 'character': 'Captain Jack Sparrow', 'credit_id': '52fe4c9cc3a36847f8236a53', 'gender': 2, 'id': 85, 'name': 'Johnny Depp', 'order': 0, 'profile_path': '/kbWValANhZI8rbWZXximXuMN4UN.jpg'}, {'cast_id': 9, 'character': 'Captain Arma...","[{'credit_id': '52fe4c9cc3a36847f8236a65', 'department': 'Production', 'gender': 2, 'id': 770, 'job': 'Producer', 'name': 'Jerry Bruckheimer', 'profile_path': '/lQu9eRzRX264j2LFQwvKC50wb9s.jpg'}, {'credit_id': '592f443cc3a3685cf7000019', 'departm...",Adventure Action Fantasy Comedy,Johnny Depp,Javier Bardem,Javier Bardem,Joachim Rønning Espen Sandberg


In [None]:
df_movie = df.loc[:,['director_name','actor_1_name','actor_2_name','actor_3_name','genres_list','title']]

In [None]:
#drop missing values
df_movie = df_movie.dropna(how='any')
df_movie.isna().sum()

Unnamed: 0,0
director_name,0
actor_1_name,0
actor_2_name,0
actor_3_name,0
genres_list,0
title,0


In [None]:
df_movie.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres_list,title
0,Joachim Rønning Espen Sandberg,Johnny Depp,Javier Bardem,Javier Bardem,Adventure Action Fantasy Comedy,Pirates of the Caribbean: Dead Men Tell No Tales
1,Zack Snyder,Ben Affleck,Henry Cavill,Henry Cavill,Action Adventure Fantasy Sci-Fi,Justice League
2,Taika Waititi,Chris Hemsworth,Tom Hiddleston,Tom Hiddleston,Action Adventure Fantasy Sci-Fi,Thor: Ragnarok
3,James Gunn,Chris Pratt,Zoe Saldana,Zoe Saldana,Action Adventure Comedy Sci-Fi,Guardians of the Galaxy Vol. 2
4,Sean McNamara,Pierce Brosnan,William Hurt,William Hurt,Fantasy Action Adventure,The King's Daughter


In [None]:
df_movie = df_movie.rename(columns={'genres_list':'genres'})
df_movie = df_movie.rename(columns={'title':'movie_title'})
df_movie['movie_title'] = df_movie['movie_title'].str.lower()
df_movie['combination'] = df_movie['actor_1_name'] + ' ' + df_movie['actor_2_name'] + ' ' + df_movie['actor_3_name'] + ' ' + df_movie['genres']
df_movie.head(3)

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,combination
0,Joachim Rønning Espen Sandberg,Johnny Depp,Javier Bardem,Javier Bardem,Adventure Action Fantasy Comedy,pirates of the caribbean: dead men tell no tales,Johnny Depp Javier Bardem Javier Bardem Adventure Action Fantasy Comedy
1,Zack Snyder,Ben Affleck,Henry Cavill,Henry Cavill,Action Adventure Fantasy Sci-Fi,justice league,Ben Affleck Henry Cavill Henry Cavill Action Adventure Fantasy Sci-Fi
2,Taika Waititi,Chris Hemsworth,Tom Hiddleston,Tom Hiddleston,Action Adventure Fantasy Sci-Fi,thor: ragnarok,Chris Hemsworth Tom Hiddleston Tom Hiddleston Action Adventure Fantasy Sci-Fi


In [None]:
old_df = pd.read_csv('/content/content/df_movie_1.csv')
old_df.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,the dark knight rises
4,Doug Walker,Doug Walker,Rob Walker,unknown,Documentary,star wars: episode vii - the force awakens


In [None]:
old_df['combination'] = old_df['actor_1_name'] + ' ' + old_df['actor_2_name'] + ' ' + old_df['actor_3_name'] + ' ' + old_df['genres']
old_df.head(3)

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,combination
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar,CCH Pounder Joel David Moore Wes Studi Action Adventure Fantasy Sci-Fi
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end,Johnny Depp Orlando Bloom Jack Davenport Action Adventure Fantasy
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre,Christoph Waltz Rory Kinnear Stephanie Sigman Action Adventure Thriller


In [None]:
df_movie.head(3)

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,combination
0,Joachim Rønning Espen Sandberg,Johnny Depp,Javier Bardem,Javier Bardem,Adventure Action Fantasy Comedy,pirates of the caribbean: dead men tell no tales,Johnny Depp Javier Bardem Javier Bardem Adventure Action Fantasy Comedy
1,Zack Snyder,Ben Affleck,Henry Cavill,Henry Cavill,Action Adventure Fantasy Sci-Fi,justice league,Ben Affleck Henry Cavill Henry Cavill Action Adventure Fantasy Sci-Fi
2,Taika Waititi,Chris Hemsworth,Tom Hiddleston,Tom Hiddleston,Action Adventure Fantasy Sci-Fi,thor: ragnarok,Chris Hemsworth Tom Hiddleston Tom Hiddleston Action Adventure Fantasy Sci-Fi


In [None]:
frames = [old_df,df_movie]
#df_new = old_df.append(df_movie)
df_new = pd.concat(frames)
df_new.head()

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,combination
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar,CCH Pounder Joel David Moore Wes Studi Action Adventure Fantasy Sci-Fi
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end,Johnny Depp Orlando Bloom Jack Davenport Action Adventure Fantasy
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre,Christoph Waltz Rory Kinnear Stephanie Sigman Action Adventure Thriller
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,the dark knight rises,Tom Hardy Christian Bale Joseph Gordon-Levitt Action Thriller
4,Doug Walker,Doug Walker,Rob Walker,unknown,Documentary,star wars: episode vii - the force awakens,Doug Walker Rob Walker unknown Documentary


In [None]:
df_new.drop_duplicates(subset='movie_title',keep='last',inplace=True)


In [None]:
df_new.to_csv('/content/df_movie_2.csv',index=False)