# Chapter 1: Pandas Foundations

## Recipes
* [Dissecting the anatomy of a DataFrame](#Dissecting-the-anatomy-of-a-DataFrame)
* [Accessing the main DataFrame components](#Accessing-the-main-DataFrame-components)
* [Understanding data types](#Understanding-data-types)
* [Selecting a single column of data as a Series](#Selecting-a-single-column-of-data-as-a-Series)
* [Calling Series methods](#Calling-Series-methods)
* [Working with operators on a Series](#Working-with-operators-on-a-Series)
* [Chaining Series methods together](#Chaining-Series-methods-together)
* [Making the index meaningful](#Making-the-index-meaningful)
* [Renaming row and column names](#Renaming-row-and-column-names)
* [Creating and deleting columns](#Creating-and-deleting-columns)

In [1]:
import pandas as pd
import numpy as np

# Dissecting the anatomy of a DataFrame

#### Change options to get specific output for book

In [2]:
pd.set_option('display.max_columns', 8); pd.set_option('display.max_rows', 10)

In [3]:
movie = pd.read_csv('../notebooks/../notebooks/data/movie.csv')
movie.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
1,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
2,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
3,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
4,,Doug Walker,,,...,12.0,7.1,,0


# Accessing the main DataFrame components

In [4]:
columns = movie.columns
index = movie.index
data = movie.values

In [5]:
columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [6]:
index

RangeIndex(start=0, stop=5043, step=1)

In [7]:
data

array([['Color', 'James Cameron', 723.0, ..., 7.9, 1.78, 33000],
       ['Color', 'Gore Verbinski', 302.0, ..., 7.1, 2.35, 0],
       ['Color', 'Sam Mendes', 602.0, ..., 6.8, 2.35, 85000],
       ...,
       ['Color', 'Benjamin Roberds', 13.0, ..., 6.3, nan, 16],
       ['Color', 'Daniel Hsia', 14.0, ..., 6.3, 2.35, 660],
       ['Color', 'Jon Gunn', 43.0, ..., 6.6, 1.85, 456]], dtype=object)

In [8]:
type(index)

pandas.core.indexes.range.RangeIndex

In [9]:
type(columns)

pandas.core.indexes.base.Index

In [10]:
type(data)

numpy.ndarray

In [11]:
issubclass(pd.RangeIndex, pd.Index)

True

## There's more

In [12]:
index.values

array([   0,    1,    2, ..., 5040, 5041, 5042])

In [13]:
columns.values

array(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes',
       'actor_2_name', 'actor_1_facebook_likes', 'gross', 'genres',
       'actor_1_name', 'movie_title', 'num_voted_users',
       'cast_total_facebook_likes', 'actor_3_name',
       'facenumber_in_poster', 'plot_keywords', 'movie_imdb_link',
       'num_user_for_reviews', 'language', 'country', 'content_rating',
       'budget', 'title_year', 'actor_2_facebook_likes', 'imdb_score',
       'aspect_ratio', 'movie_facebook_likes'], dtype=object)

# Understanding data types

In [14]:
movie.dtypes

color                       object
director_name               object
num_critic_for_reviews     float64
duration                   float64
director_facebook_likes    float64
                            ...   
title_year                 float64
actor_2_facebook_likes     float64
imdb_score                 float64
aspect_ratio               float64
movie_facebook_likes         int64
Length: 28, dtype: object

In [15]:
movie.value_counts()

color  director_name        num_critic_for_reviews  duration  director_facebook_likes  actor_3_facebook_likes  actor_2_name      actor_1_facebook_likes  gross        genres                                  actor_1_name       movie_title       num_voted_users  cast_total_facebook_likes  actor_3_name        facenumber_in_poster  plot_keywords                                                               movie_imdb_link                                       num_user_for_reviews  language  country  content_rating  budget       title_year  actor_2_facebook_likes  imdb_score  aspect_ratio  movie_facebook_likes
Color  Josh Trank           369.0                   100.0     128.0                    78.0                    Reg E. Cathey     596.0                   56114221.0   Action|Adventure|Sci-Fi                 Tim Blake Nelson   Fantastic Four    110486           1261                       Tim Heidecker       3.0                   box office flop|critically bashed|portal|teleportation|trou

# Selecting a single column of data as a Series

In [16]:
movie['director_name']

0           James Cameron
1          Gore Verbinski
2              Sam Mendes
3       Christopher Nolan
4             Doug Walker
              ...        
5038          Scott Smith
5039                  NaN
5040     Benjamin Roberds
5041          Daniel Hsia
5042             Jon Gunn
Name: director_name, Length: 5043, dtype: object

In [17]:
movie.director_name

0           James Cameron
1          Gore Verbinski
2              Sam Mendes
3       Christopher Nolan
4             Doug Walker
              ...        
5038          Scott Smith
5039                  NaN
5040     Benjamin Roberds
5041          Daniel Hsia
5042             Jon Gunn
Name: director_name, Length: 5043, dtype: object

In [18]:
type(movie['director_name'])

pandas.core.series.Series

## There's more

In [19]:
director = movie['director_name'] # save Series to variable
director.name

'director_name'

In [20]:
director.to_frame().head()

Unnamed: 0,director_name
0,James Cameron
1,Gore Verbinski
2,Sam Mendes
3,Christopher Nolan
4,Doug Walker


# Calling Series methods

## Getting ready...

In [21]:
s_attr_methods = set(dir(pd.Series))
len(s_attr_methods)

421

In [22]:
df_attr_methods = set(dir(pd.DataFrame))
len(df_attr_methods)

439

In [23]:
len(s_attr_methods & df_attr_methods)

364

## How to do it...

In [24]:
director = movie['director_name']
actor_1_fb_likes = movie['actor_1_facebook_likes']

In [25]:
director.head()

0        James Cameron
1       Gore Verbinski
2           Sam Mendes
3    Christopher Nolan
4          Doug Walker
Name: director_name, dtype: object

In [26]:
actor_1_fb_likes.head()

0     1000.0
1    40000.0
2    11000.0
3    27000.0
4      131.0
Name: actor_1_facebook_likes, dtype: float64

In [27]:
pd.set_option('display.max_rows', 8)
director.value_counts()

director_name
Steven Spielberg    26
Woody Allen         22
Clint Eastwood      20
Martin Scorsese     20
                    ..
Rob Pritts           1
David S. Ward        1
R.J. Cutler          1
Daniel Hsia          1
Name: count, Length: 2398, dtype: int64

In [28]:
actor_1_fb_likes.value_counts()

actor_1_facebook_likes
1000.0     449
11000.0    211
2000.0     197
3000.0     155
          ... 
175.0        1
961.0        1
230.0        1
291.0        1
Name: count, Length: 878, dtype: int64

In [29]:
director.size

5043

In [30]:
director.shape

(5043,)

In [31]:
len(director)

5043

In [32]:
director.count()

np.int64(4939)

In [33]:
actor_1_fb_likes.count()

np.int64(5036)

In [34]:
actor_1_fb_likes.quantile()

np.float64(988.0)

In [35]:
actor_1_fb_likes.min(), actor_1_fb_likes.max(), \
actor_1_fb_likes.mean(), actor_1_fb_likes.median(), \
actor_1_fb_likes.std(), actor_1_fb_likes.sum()

(np.float64(0.0),
 np.float64(640000.0),
 np.float64(6560.04706115965),
 np.float64(988.0),
 np.float64(15020.759119984092),
 np.float64(33036397.0))

In [36]:
actor_1_fb_likes.describe()

count      5036.000000
mean       6560.047061
std       15020.759120
min           0.000000
25%         614.000000
50%         988.000000
75%       11000.000000
max      640000.000000
Name: actor_1_facebook_likes, dtype: float64

In [37]:
director.describe()

count                 4939
unique                2398
top       Steven Spielberg
freq                    26
Name: director_name, dtype: object

In [38]:
actor_1_fb_likes.quantile(.2)

np.float64(522.0)

In [39]:
actor_1_fb_likes.quantile([.1, .2, .3, .4, .5, .6, .7, .8, .9])

0.1      248.5
0.2      522.0
0.3      700.0
0.4      862.0
        ...   
0.6     2000.0
0.7     8000.0
0.8    13000.0
0.9    18500.0
Name: actor_1_facebook_likes, Length: 9, dtype: float64

In [40]:
director.isnull()

0       False
1       False
2       False
3       False
        ...  
5039     True
5040    False
5041    False
5042    False
Name: director_name, Length: 5043, dtype: bool

In [41]:
actor_1_fb_likes_filled = actor_1_fb_likes.fillna(0)
actor_1_fb_likes_filled.count()

np.int64(5043)

In [42]:
actor_1_fb_likes_dropped = actor_1_fb_likes.dropna()
actor_1_fb_likes_dropped.size

5036

## There's more...

In [43]:
director.value_counts(normalize=True)

director_name
Steven Spielberg    0.005264
Woody Allen         0.004454
Clint Eastwood      0.004049
Martin Scorsese     0.004049
                      ...   
Rob Pritts          0.000202
David S. Ward       0.000202
R.J. Cutler         0.000202
Daniel Hsia         0.000202
Name: proportion, Length: 2398, dtype: float64

In [44]:
director.hasnans

True

In [45]:
director.notnull()

0        True
1        True
2        True
3        True
        ...  
5039    False
5040     True
5041     True
5042     True
Name: director_name, Length: 5043, dtype: bool

# Working with operators on a Series

In [46]:
pd.options.display.max_rows = 6

In [47]:
5 + 9    # plus operator example. Adds 5 and 9

14

In [48]:
4 ** 2   # exponentiation operator. Raises 4 to the second power

16

In [49]:
a = 10   # assignment operator.

In [50]:
5 <= 9   # less than or equal to operator

True

In [51]:
'abcde' + 'fg'    # plus operator for strings. C

'abcdefg'

In [52]:
not (5 <= 9)      # not is an operator that is a reserved keyword and reverse a boolean

False

In [53]:
7 in [1, 2, 6]    # in operator checks for membership of a list

False

In [54]:
set([1,2,3]) & set([2,3,4])

{2, 3}

In [55]:
my_list = [1, 2, 6]
result = [x - 3 for x in my_list]  # Result: [-2, -1, 0]

## Getting ready...

In [56]:
imdb_score = movie['imdb_score']
imdb_score

0       7.9
1       7.1
2       6.8
       ... 
5040    6.3
5041    6.3
5042    6.6
Name: imdb_score, Length: 5043, dtype: float64

In [57]:
imdb_score + 1

0       8.9
1       8.1
2       7.8
       ... 
5040    7.3
5041    7.3
5042    7.6
Name: imdb_score, Length: 5043, dtype: float64

In [58]:
imdb_score * 2.5

0       19.75
1       17.75
2       17.00
        ...  
5040    15.75
5041    15.75
5042    16.50
Name: imdb_score, Length: 5043, dtype: float64

In [59]:
imdb_score // 7

0       1.0
1       1.0
2       0.0
       ... 
5040    0.0
5041    0.0
5042    0.0
Name: imdb_score, Length: 5043, dtype: float64

In [60]:
imdb_score > 7

0        True
1        True
2       False
        ...  
5040    False
5041    False
5042    False
Name: imdb_score, Length: 5043, dtype: bool

In [61]:
director = movie['director_name']

In [62]:
director == 'James Cameron'

0        True
1       False
2       False
        ...  
5040    False
5041    False
5042    False
Name: director_name, Length: 5043, dtype: bool

## There's more...

In [63]:
imdb_score.add(1)              # imdb_score + 1

0       8.9
1       8.1
2       7.8
       ... 
5040    7.3
5041    7.3
5042    7.6
Name: imdb_score, Length: 5043, dtype: float64

In [64]:
imdb_score.mul(2.5)            # imdb_score * 2.5

0       19.75
1       17.75
2       17.00
        ...  
5040    15.75
5041    15.75
5042    16.50
Name: imdb_score, Length: 5043, dtype: float64

In [65]:
imdb_score.floordiv(7)         # imdb_score // 7

0       1.0
1       1.0
2       0.0
       ... 
5040    0.0
5041    0.0
5042    0.0
Name: imdb_score, Length: 5043, dtype: float64

In [66]:
imdb_score.gt(7)               # imdb_score > 7

0        True
1        True
2       False
        ...  
5040    False
5041    False
5042    False
Name: imdb_score, Length: 5043, dtype: bool

In [67]:
director.eq('James Cameron')   # director == 'James Cameron'

0        True
1       False
2       False
        ...  
5040    False
5041    False
5042    False
Name: director_name, Length: 5043, dtype: bool

In [68]:
imdb_score.astype(int).mod(5)

0       2
1       2
2       1
       ..
5040    1
5041    1
5042    1
Name: imdb_score, Length: 5043, dtype: int64

In [69]:
a = type(1)

In [70]:
type(a)

type

In [71]:
a = type(imdb_score)

In [72]:
a([1,2,3])

0    1
1    2
2    3
dtype: int64

# Chaining Series methods together

In [73]:
actor_1_fb_likes = movie['actor_1_facebook_likes']
director = movie['director_name']

In [74]:
director.value_counts().head(3)

director_name
Steven Spielberg    26
Woody Allen         22
Clint Eastwood      20
Name: count, dtype: int64

In [75]:
actor_1_fb_likes.isnull().sum()

np.int64(7)

In [76]:
actor_1_fb_likes.dtype

dtype('float64')

In [77]:
actor_1_fb_likes.fillna(0)\
                .astype(int)\
                .head()

0     1000
1    40000
2    11000
3    27000
4      131
Name: actor_1_facebook_likes, dtype: int64

## There's more...

In [78]:
actor_1_fb_likes.isnull().mean()

np.float64(0.001388062661114416)

In [79]:
(actor_1_fb_likes.fillna(0)
                 .astype(int)
                 .head())

0     1000
1    40000
2    11000
3    27000
4      131
Name: actor_1_facebook_likes, dtype: int64

# Making the index meaningful

In [80]:
movie.shape

(5043, 28)

In [81]:
movie2 = movie.set_index('movie_title')
movie2

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avatar,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
...,...,...,...,...,...,...,...,...,...
A Plague So Pleasant,Color,Benjamin Roberds,13.0,76.0,...,0.0,6.3,,16
Shanghai Calling,Color,Daniel Hsia,14.0,100.0,...,719.0,6.3,2.35,660
My Date with Drew,Color,Jon Gunn,43.0,90.0,...,23.0,6.6,1.85,456


In [82]:
pd.read_csv('../notebooks/../notebooks/data/movie.csv', index_col='movie_title')

Unnamed: 0_level_0,color,director_name,num_critic_for_reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avatar,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
...,...,...,...,...,...,...,...,...,...
A Plague So Pleasant,Color,Benjamin Roberds,13.0,76.0,...,0.0,6.3,,16
Shanghai Calling,Color,Daniel Hsia,14.0,100.0,...,719.0,6.3,2.35,660
My Date with Drew,Color,Jon Gunn,43.0,90.0,...,23.0,6.6,1.85,456


# There's more...

In [83]:
movie2.reset_index()

Unnamed: 0,movie_title,color,director_name,num_critic_for_reviews,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
0,Avatar,Color,James Cameron,723.0,...,936.0,7.9,1.78,33000
1,Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,...,5000.0,7.1,2.35,0
2,Spectre,Color,Sam Mendes,602.0,...,393.0,6.8,2.35,85000
...,...,...,...,...,...,...,...,...,...
5040,A Plague So Pleasant,Color,Benjamin Roberds,13.0,...,0.0,6.3,,16
5041,Shanghai Calling,Color,Daniel Hsia,14.0,...,719.0,6.3,2.35,660
5042,My Date with Drew,Color,Jon Gunn,43.0,...,23.0,6.6,1.85,456


# Renaming row and column names

In [84]:
movie = pd.read_csv('../notebooks/../notebooks/data/movie.csv', index_col='movie_title')

In [85]:
idx_rename = {'Avatar':'Ratava', 'Spectre': 'Ertceps'} 
col_rename = {'director_name':'Director Name', 
              'num_critic_for_reviews': 'Critical Reviews'} 

In [86]:
movie.rename(index=idx_rename, 
             columns=col_rename).head()

Unnamed: 0_level_0,color,Director Name,Critical Reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
movie_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avatar,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
Spectre,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,...,12.0,7.1,,0


# There's more

In [87]:
movie = pd.read_csv('../notebooks/../notebooks/data/movie.csv', index_col='movie_title')
index = movie.index
columns = movie.columns

index_list = index.tolist()
column_list = columns.tolist()

index_list[0] = 'Ratava'
index_list[2] = 'Ertceps'
column_list[1] = 'Director Name'
column_list[2] = 'Critical Reviews'

In [88]:
print(index_list[:5])

['Ratava', "Pirates of the Caribbean: At World's End\xa0", 'Ertceps', 'The Dark Knight Rises\xa0', 'Star Wars: Episode VII - The Force Awakens\xa0            ']


In [89]:
print(column_list)

['color', 'Director Name', 'Critical Reviews', 'duration', 'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name', 'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name', 'num_voted_users', 'cast_total_facebook_likes', 'actor_3_name', 'facenumber_in_poster', 'plot_keywords', 'movie_imdb_link', 'num_user_for_reviews', 'language', 'country', 'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes', 'imdb_score', 'aspect_ratio', 'movie_facebook_likes']


In [90]:
movie.index = index_list
movie.columns = column_list

In [91]:
movie.head()

Unnamed: 0,color,Director Name,Critical Reviews,duration,...,actor_2_facebook_likes,imdb_score,aspect_ratio,movie_facebook_likes
Ratava,Color,James Cameron,723.0,178.0,...,936.0,7.9,1.78,33000
Pirates of the Caribbean: At World's End,Color,Gore Verbinski,302.0,169.0,...,5000.0,7.1,2.35,0
Ertceps,Color,Sam Mendes,602.0,148.0,...,393.0,6.8,2.35,85000
The Dark Knight Rises,Color,Christopher Nolan,813.0,164.0,...,23000.0,8.5,2.35,164000
Star Wars: Episode VII - The Force Awakens,,Doug Walker,,,...,12.0,7.1,,0


# Creating and deleting columns

In [92]:
movie = pd.read_csv('../notebooks/../notebooks/data/movie.csv')

In [93]:
movie['has_seen'] = 0

In [94]:
movie.columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes', 'has_seen'],
      dtype='object')

In [95]:
movie['actor_director_facebook_likes'] = (movie['actor_1_facebook_likes'] + 
                                              movie['actor_2_facebook_likes'] + 
                                              movie['actor_3_facebook_likes'] + 
                                              movie['director_facebook_likes'])

In [96]:
movie['actor_director_facebook_likes'].isnull().sum()

np.int64(124)

In [97]:
movie['actor_director_facebook_likes'] = movie['actor_director_facebook_likes'].fillna(0)

In [98]:
movie['is_cast_likes_more'] = (movie['cast_total_facebook_likes'] >= 
                                  movie['actor_director_facebook_likes'])

In [99]:
movie['is_cast_likes_more'].all()

np.False_

In [100]:
movie = movie.drop('actor_director_facebook_likes', axis='columns')

In [101]:
movie['actor_total_facebook_likes'] = (movie['actor_1_facebook_likes'] + 
                                       movie['actor_2_facebook_likes'] + 
                                       movie['actor_3_facebook_likes'])

movie['actor_total_facebook_likes'] = movie['actor_total_facebook_likes'].fillna(0)

In [102]:
movie['is_cast_likes_more'] = movie['cast_total_facebook_likes'] >= \
                                  movie['actor_total_facebook_likes']
    
movie['is_cast_likes_more'].all()

np.True_

In [103]:
movie['pct_actor_cast_like'] = (movie['actor_total_facebook_likes'] / 
                                movie['cast_total_facebook_likes'])

In [104]:
movie['pct_actor_cast_like'].min(), movie['pct_actor_cast_like'].max() 

(np.float64(0.0), np.float64(1.0))

In [105]:
movie.set_index('movie_title')['pct_actor_cast_like'].head()

movie_title
Avatar                                                     0.577369
Pirates of the Caribbean: At World's End                   0.951396
Spectre                                                    0.987521
The Dark Knight Rises                                      0.683783
Star Wars: Episode VII - The Force Awakens                 0.000000
Name: pct_actor_cast_like, dtype: float64

## There's more...

In [106]:
profit_index = movie.columns.get_loc('gross') + 1
profit_index

9

In [107]:
movie.insert(loc=profit_index,
                 column='profit',
                 value=movie['gross'] - movie['budget'])

In [108]:
movie.head()

Unnamed: 0,color,director_name,num_critic_for_reviews,duration,...,has_seen,is_cast_likes_more,actor_total_facebook_likes,pct_actor_cast_like
0,Color,James Cameron,723.0,178.0,...,0,True,2791.0,0.577369
1,Color,Gore Verbinski,302.0,169.0,...,0,True,46000.0,0.951396
2,Color,Sam Mendes,602.0,148.0,...,0,True,11554.0,0.987521
3,Color,Christopher Nolan,813.0,164.0,...,0,True,73000.0,0.683783
4,,Doug Walker,,,...,0,True,0.0,0.0
