In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
import cufflinks as cf
import plotly
import plotly.offline as py
import plotly.graph_objs as go

%matplotlib inline

In [23]:
py.init_notebook_mode(connected=True)
cf.go_offline()

In [24]:
glob('*_cleaned.csv')

['action-comedy_movies_cleaned.csv',
 'action_movies_cleaned.csv',
 'adventure_movies_cleaned.csv',
 'animation_movies_cleaned.csv',
 'comedy-romance_movies_cleaned.csv',
 'comedy_movies_cleaned.csv',
 'crime_movies_cleaned.csv',
 'drama_movies_cleaned.csv',
 'fantasy_movies_cleaned.csv',
 'horror_movies_cleaned.csv',
 'mystery_movies_cleaned.csv',
 'romance_movies_cleaned.csv',
 'scifi_movies_cleaned.csv',
 'thriller_movies_cleaned.csv']

In [25]:
# Reading csv files

df = pd.read_csv('all_movies_data.csv')

In [26]:
df.head()

Unnamed: 0,movie,year,category,duration,genres,rating,tagline,director,stars,votes
0,Range 15,2016,TV-MA,89,"Action, Comedy, Horror",4.3,Veterans wake up after a night of partying to ...,Ross Patterson,"Sean Astin, Keith David, Danny Trejo, William ...",5010
1,Snake in the Eagle's Shadow,1978,PG,90,"Action, Comedy",7.3,An orphan who has been raised at a kung fu sch...,Woo-Ping Yuen,"Jackie Chan, Siu-Tin Yuen, Jeong-lee Hwang, De...",12191
2,Twinkle Twinkle Lucky Stars,1985,TV-14,105,"Action, Comedy",6.2,5 HK cops (4 horny males) on vacation in Patta...,Sammo Kam-Bo Hung,"Sammo Kam-Bo Hung, Richard Ng, Eric Tsang, Kiu...",4164
3,McHale's Navy,1997,PG,108,"Action, Comedy",4.5,"A retired Navy officer returns to active duty,...",Bryan Spicer,"Tom Arnold, Dean Stockwell, Ernest Borgnine, D...",6891
4,Fastlane,2002,TV-14,60,"Action, Comedy, Crime",7.4,Two hotshot undercover cops and their equally ...,,"Peter Facinelli, Bill Bellamy, Tiffani Thiesse...",3600


In [27]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23096 entries, 0 to 23095
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   movie     23096 non-null  object 
 1   year      23096 non-null  int64  
 2   category  23096 non-null  object 
 3   duration  23096 non-null  int64  
 4   genres    23096 non-null  object 
 5   rating    23096 non-null  float64
 6   tagline   23096 non-null  object 
 7   director  23096 non-null  object 
 8   stars     23096 non-null  object 
 9   votes     23096 non-null  int64  
dtypes: float64(1), int64(3), object(6)
memory usage: 1.8+ MB
None


In [28]:
def change_dtypes(df):
    df['year'] = df['year'].astype('category')
    df['duration'] = df['duration'].astype(np.int64)
    df['rating'] = df['rating'].astype(float)

In [29]:
def print_number_of_unique_values(col, df):
    print(f"{col} --> {df[col].nunique()}")

In [30]:
for col in df.columns:
    print_number_of_unique_values(col, df)

movie --> 21673
year --> 109
category --> 28
duration --> 408
genres --> 812
rating --> 86
tagline --> 21269
director --> 6974
stars --> 22866
votes --> 16420


In [31]:
df[df['movie']=='The Equalizer']

Unnamed: 0,movie,year,category,duration,genres,rating,tagline,director,stars,votes
19024,The Equalizer,2021,TV-14,43,"Action, Crime, Drama",5.3,An enigmatic figure who uses her extensive ski...,,"Adam Goldberg, Liza Lapira, Queen Latifah, Tor...",13909
19251,The Equalizer,2014,R,132,"Action, Crime, Thriller",7.2,A man who believes he has put his mysterious p...,Antoine Fuqua,"Denzel Washington, Marton Csokas, Chloë Grace ...",378782
20805,The Equalizer,1985,TV-PG,48,"Action, Crime, Drama",7.8,A retired Intelligence Agent turned private de...,,"Edward Woodward, Keith Szarabajka, Robert Lans...",5425


In [42]:
plt.figure(figsize=(20,8))
df['year'].iplot(kind='hist')
plt.show()

<Figure size 1440x576 with 0 Axes>

- More action-comedy movies were released in recent years

In [44]:
plt.figure(figsize=(20,8))
df['category'].value_counts().drop('None').iplot(kind='bar', fontsize=16)

<Figure size 1440x576 with 0 Axes>

- The most occuring category is the 'R' category

In [50]:
plt.figure(figsize=(20,8))
df['duration'].iplot(kind='hist')
plt.show()

<Figure size 1440x576 with 0 Axes>

In [53]:
plt.figure(figsize=(20,8))
df['rating'].iplot(kind='hist')
plt.show()

<Figure size 1440x576 with 0 Axes>

In [58]:
plt.figure(figsize=(20,8))
df['votes'].iplot(kind='hist')
plt.show()

<Figure size 1440x576 with 0 Axes>

In [62]:
df['director'].value_counts().sort_values(ascending=False)[1:10].iplot(kind='bar')
plt.show()