In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('Datasets\9_IMDB-Movie-Data.csv')

In [None]:
data.head(10)

In [None]:
data.tail(10)

In [None]:
print('The total number of rows are: ',data.shape[0])
print('The total number of columns are: ',data.shape[1])

In [None]:
data.info()

In [None]:
data.isnull().sum().plot(kind='bar')
plt.show()

In [None]:
data.dropna(axis=0,inplace=True)

In [None]:
data[data.duplicated()]

In [None]:
data.describe()

In [None]:
data.columns

In [None]:
data[data['Runtime (Minutes)'] >=180]['Title']

In [None]:
data[data['Votes']==data['Votes'].max()]['Year']

In [None]:
avg_votesperyear = data.groupby("Year")['Votes'].mean()
avg_votesperyear.sort_values(ascending=False)

We can clearly see here SNS automatically performs the averaging for this data

In [None]:

sns.barplot(x='Year',y='Votes',data=data)

In [None]:
plt.figure(figsize=(10,5))
sns.barplot(x='Year',y='Revenue (Millions)',data=data)
plt.grid(True)

In [None]:
data.groupby('Director')['Rating'].mean()

In [None]:
data.columns

In [None]:
top_10_by_runtime = data.nlargest(10,'Runtime (Minutes)')[['Title','Runtime (Minutes)']]\
    .set_index('Title')

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(x='Runtime (Minutes)',y=top_10_by_runtime.index,data=top_10_by_runtime)

In [None]:
data.columns

In [None]:
movies_year=data.groupby('Year')['Title'].count()
num_movies_year = pd.DataFrame(movies_year).reset_index()

num_movies_year

In [None]:
plt.figure(figsize=(12,7))
ax= sns.barplot(x='Year',y='Title',data=num_movies_year)
for p in ax.patches:
    ax.annotate(format(p.get_height(), '.0f'), 
                 (p.get_x() + p.get_width() / 2., p.get_height()), 
                 ha = 'center', va = 'center', 
                 xytext = (0, 9), 
                 textcoords = 'offset points')
plt.tight_layout()
plt.ylabel('Number of titles released per year')
plt.grid(True)

In [None]:
data.columns

In [None]:
data[data['Revenue (Millions)'] == data['Revenue (Millions)'].max()]['Title']

In [None]:
top_10_by_rating = data.nlargest(10,'Rating')[['Title','Rating','Director']]\
    .set_index('Title')

In [None]:
top_10_by_rating


In [None]:
sns.barplot(x='Rating',y=top_10_by_rating.index,data=top_10_by_rating,hue='Director',dodge=False)
plt.legend(bbox_to_anchor=(1.05,1),loc=2)

In [None]:
sns.scatterplot(x='Rating',y='Revenue (Millions)',data=data)

In [None]:
data.columns

In [None]:
def rate(rating):
    if rating>= 8.0:
        return 'Excellent'
    elif rating>= 6.0:
        return 'Good'    
    else:
        return 'Average'

In [None]:
data['New_rating'] = data['Rating'].apply(rate)

In [None]:
data['New_rating']

In [None]:
data['Genre'].str.contains('Action',case=False).sum()

In [212]:
data['Genre']

0       Action,Adventure,Sci-Fi
1      Adventure,Mystery,Sci-Fi
2               Horror,Thriller
3       Animation,Comedy,Family
4      Action,Adventure,Fantasy
                 ...           
993     Action,Adventure,Horror
994                      Comedy
996                      Horror
997         Drama,Music,Romance
999       Comedy,Family,Fantasy
Name: Genre, Length: 838, dtype: object

The below code is not trying to seperate the value of a cell into 3 parts, rather it's splitting the value of different rows by a comma and adding the same list into a 2-d list.

In [213]:
list = []
for value in data['Genre']:
    list.append(value.split(','))
list


[['Action', 'Adventure', 'Sci-Fi'],
 ['Adventure', 'Mystery', 'Sci-Fi'],
 ['Horror', 'Thriller'],
 ['Animation', 'Comedy', 'Family'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Drama', 'Music'],
 ['Action', 'Adventure', 'Biography'],
 ['Adventure', 'Drama', 'Romance'],
 ['Adventure', 'Family', 'Fantasy'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Comedy', 'Drama'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Biography', 'Drama', 'History'],
 ['Action', 'Thriller'],
 ['Biography', 'Drama'],
 ['Drama', 'Mystery', 'Sci-Fi'],
 ['Adventure', 'Drama', 'Thriller'],
 ['Drama'],
 ['Animation', 'Adventure', 'Comedy'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Comedy'],
 ['Action', 'Adventure', 'Drama'],
 ['Comedy'],
 ['Drama', 'Thriller'],
 ['Action', 'Adventure', 'Sci-Fi'],
 ['Action', 'Adventure', 'Comedy'],
 ['Action', 'Horror', 'Sci-Fi'],
 ['Action', 'Adventure', 'Sci-Fi']

In [217]:
list_1=[]
for item in list:
    for item1 in item:
        list_1.append(item1)
list_1

['Action',
 'Adventure',
 'Sci-Fi',
 'Adventure',
 'Mystery',
 'Sci-Fi',
 'Horror',
 'Thriller',
 'Animation',
 'Comedy',
 'Family',
 'Action',
 'Adventure',
 'Fantasy',
 'Action',
 'Adventure',
 'Fantasy',
 'Comedy',
 'Drama',
 'Music',
 'Action',
 'Adventure',
 'Biography',
 'Adventure',
 'Drama',
 'Romance',
 'Adventure',
 'Family',
 'Fantasy',
 'Biography',
 'Drama',
 'History',
 'Action',
 'Adventure',
 'Sci-Fi',
 'Animation',
 'Adventure',
 'Comedy',
 'Action',
 'Comedy',
 'Drama',
 'Animation',
 'Adventure',
 'Comedy',
 'Biography',
 'Drama',
 'History',
 'Action',
 'Thriller',
 'Biography',
 'Drama',
 'Drama',
 'Mystery',
 'Sci-Fi',
 'Adventure',
 'Drama',
 'Thriller',
 'Drama',
 'Animation',
 'Adventure',
 'Comedy',
 'Action',
 'Adventure',
 'Sci-Fi',
 'Comedy',
 'Action',
 'Adventure',
 'Drama',
 'Comedy',
 'Drama',
 'Thriller',
 'Action',
 'Adventure',
 'Sci-Fi',
 'Action',
 'Adventure',
 'Comedy',
 'Action',
 'Horror',
 'Sci-Fi',
 'Action',
 'Adventure',
 'Sci-Fi',
 'Advent

In [None]:
uni_list=[]
for item in list_1:
    if item not in uni_list:
        uni_list.append(item)

len(uni_list)

In [214]:
from collections import Counter

In [216]:
Counter(list_1)

Counter({'Action': 277,
         'Adventure': 244,
         'Sci-Fi': 107,
         'Mystery': 86,
         'Horror': 87,
         'Thriller': 148,
         'Animation': 45,
         'Comedy': 250,
         'Family': 48,
         'Fantasy': 92,
         'Drama': 419,
         'Music': 15,
         'Biography': 67,
         'Romance': 120,
         'History': 25,
         'Western': 4,
         'Crime': 126,
         'War': 10,
         'Musical': 5,
         'Sport': 15})