# Project: TMDB 5000 Movie Dataset


In [1]:
import pandas as pd
import seaborn as sns

In [2]:
df= pd.read_csv('tmdb_5000_movies.csv')
df.shape

(4803, 20)

In [3]:
df.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4803 non-null   int64  
 1   genres                4803 non-null   object 
 2   homepage              1712 non-null   object 
 3   id                    4803 non-null   int64  
 4   keywords              4803 non-null   object 
 5   original_language     4803 non-null   object 
 6   original_title        4803 non-null   object 
 7   overview              4800 non-null   object 
 8   popularity            4803 non-null   float64
 9   production_companies  4803 non-null   object 
 10  production_countries  4803 non-null   object 
 11  release_date          4802 non-null   object 
 12  revenue               4803 non-null   int64  
 13  runtime               4801 non-null   float64
 14  spoken_languages      4803 non-null   object 
 15  status               

#### Checking for null values

In [5]:
df.isna().sum()

budget                     0
genres                     0
homepage                3091
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
dtype: int64

### Dropping Nulls

In [6]:
df.dropna(inplace = True)
df.shape

(1493, 20)

In [7]:
df.isna().sum()

budget                  0
genres                  0
homepage                0
id                      0
keywords                0
original_language       0
original_title          0
overview                0
popularity              0
production_companies    0
production_countries    0
release_date            0
revenue                 0
runtime                 0
spoken_languages        0
status                  0
tagline                 0
title                   0
vote_average            0
vote_count              0
dtype: int64

### Checking for Duplicates

In [8]:
df.duplicated().sum()

0

## Which movie had the highest rating?

In [9]:
highest_rated = df['vote_average'].max()
print(f'The max rating of movies is {highest_rated} rating')

The max rating of movies is 8.4 rating


In [10]:
highest_rated = df.nlargest(1, ['vote_average']).original_title
print(highest_rated)

3337    The Godfather
Name: original_title, dtype: object


- The highest rating movie title is The Godfather

### How many reviewers gave it the highest rating

In [11]:
df['vote_average'].value_counts().reset_index().sort_values(by='index').tail(1)


Unnamed: 0,index,vote_average
51,8.4,1


- a single reviewer gave a rating of 8.4

## whats least rating and review details

In [12]:
least_rated = df['vote_average'].min()
print(f'The min rating of movies is {least_rated} rating')

The min rating of movies is 0.0 rating


In [13]:
df['vote_average'].value_counts().reset_index().sort_values(by='index').head(1)
## we can see 6 movies have a rating of Zero

Unnamed: 0,index,vote_average
40,0.0,6


In [14]:
vote_average_data = df.nsmallest(6, ['vote_average'])
vote_average_data

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
3669,0,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...",http://www.romeothemovie.com/,113406,[],en,Should've Been Romeo,"A self-centered, middle-aged pitchman for a po...",0.40703,"[{""name"": ""Phillybrook Films"", ""id"": 65147}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-04-28,0,0.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Even Shakespeare didn't see this one coming.,Should've Been Romeo,0.0,0
3855,3500000,"[{""id"": 99, ""name"": ""Documentary""}]",http://www.timetochoose.com,370662,"[{""id"": 2210, ""name"": ""climate change""}, {""id""...",en,Time to Choose,Academy Award® winning director Charles Fergus...,1.618138,"[{""name"": ""Representational Pictures"", ""id"": 8...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2015-09-04,0,100.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Climate change for good.,Time to Choose,0.0,0
4266,0,"[{""id"": 53, ""name"": ""Thriller""}, {""id"": 27, ""n...",http://houseattheendofthedrive.com/,280381,"[{""id"": 156580, ""name"": ""ghost story""}]",en,House at the End of the Drive,When David King purchases a house in the hills...,0.43049,"[{""name"": ""Allure Entertainment"", ""id"": 26679}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2014-01-11,0,91.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,It's where they died.,House at the End of the Drive,0.0,0
4486,700000,"[{""id"": 18, ""name"": ""Drama""}]",http://naturallynative.com/home.html,302579,"[{""id"": 10322, ""name"": ""native american""}, {""i...",en,Naturally Native,"Naturally Native follows the lives, loves, pai...",0.578798,"[{""name"": ""Red-horse Native"", ""id"": 38147}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1999-10-08,10508,107.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Sometimes When You're Going Home, It's Importa...",Naturally Native,0.0,0
4559,0,"[{""id"": 53, ""name"": ""Thriller""}, {""id"": 35, ""n...",http://soundandtheshadow.com/,362765,[],en,The Sound and the Shadow,"An allergy-ridden, eavesdropping sound enginee...",0.072359,"[{""name"": ""Veracity Productions"", ""id"": 64898}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2014-10-19,0,90.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Be careful what you listen for.,The Sound and the Shadow,0.0,0
4569,0,[],http://mutualfriendsmovie.com/,198370,[],en,Mutual Friends,Surprise parties rarely go well. This one is n...,0.136721,[],[],2014-04-15,0,0.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Surprise parties rarely go well.,Mutual Friends,0.0,0


- 6 movies were rated 0 as shown above