## Pandas Techniques

In [29]:
import pandas as pd

### Inspecting the DataFrame

In [30]:
# Reading the csv file
movies = pd.read_csv('movies.csv')

In [31]:
# View the first five rows
movies.head()

Unnamed: 0,id,name,genre,year,imdb_rating
0,1,Avatar,action,2009,7.9
1,2,Jurassic World,action,2015,7.3
2,3,The Avengers,action,2012,8.1
3,4,The Dark Knight,action,2008,9.0
4,5,Star Wars: Episode I - The Phantom Menace,action,1999,6.6


In [32]:
# View the last five rows
movies.tail()

Unnamed: 0,id,name,genre,year,imdb_rating
215,216,Hannibal,drama,2001,6.7
216,217,Catch Me If You Can,drama,2002,8.0
217,218,Big Daddy,drama,1999,6.4
218,219,Se7en,drama,1995,8.6
219,220,Seven,drama,1979,6.1


In [33]:
# Viewing statistic summary of movies
movies.describe()

Unnamed: 0,id,year,imdb_rating
count,220.0,220.0,220.0
mean,110.5,2002.963636,6.993636
std,63.652704,11.132598,0.96975
min,1.0,1937.0,4.2
25%,55.75,1999.0,6.4
50%,110.5,2005.5,7.1
75%,165.25,2011.0,7.7
max,220.0,2015.0,9.0


In [34]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           220 non-null    int64  
 1   name         220 non-null    object 
 2   genre        220 non-null    object 
 3   year         220 non-null    int64  
 4   imdb_rating  220 non-null    float64
dtypes: float64(1), int64(2), object(2)
memory usage: 8.7+ KB


### Filtering DataFrame

In [35]:
movies.name

0                                         Avatar
1                                 Jurassic World
2                                   The Avengers
3                                The Dark Knight
4      Star Wars: Episode I - The Phantom Menace
                         ...                    
215                                     Hannibal
216                          Catch Me If You Can
217                                    Big Daddy
218                                        Se7en
219                                        Seven
Name: name, Length: 220, dtype: object

In [36]:
movies[['name', 'year', 'imdb_rating']]

Unnamed: 0,name,year,imdb_rating
0,Avatar,2009,7.9
1,Jurassic World,2015,7.3
2,The Avengers,2012,8.1
3,The Dark Knight,2008,9.0
4,Star Wars: Episode I - The Phantom Menace,1999,6.6
...,...,...,...
215,Hannibal,2001,6.7
216,Catch Me If You Can,2002,8.0
217,Big Daddy,1999,6.4
218,Se7en,1995,8.6


In [37]:
movies.iloc[:100]

Unnamed: 0,id,name,genre,year,imdb_rating
0,1,Avatar,action,2009,7.9
1,2,Jurassic World,action,2015,7.3
2,3,The Avengers,action,2012,8.1
3,4,The Dark Knight,action,2008,9.0
4,5,Star Wars: Episode I - The Phantom Menace,action,1999,6.6
...,...,...,...,...,...
95,96,Ice Age: Dawn of the Dinosaurs,comedy,2009,7.0
96,97,Ice Age: The Meltdown,comedy,2006,6.9
97,98,World War Z,horror,2013,7.0
98,99,What Lies Beneath,horror,2000,6.6


In [38]:
movies[movies.year >= 2010]

Unnamed: 0,id,name,genre,year,imdb_rating
1,2,Jurassic World,action,2015,7.3
2,3,The Avengers,action,2012,8.1
6,7,Avengers: Age of Ultron,action,2015,7.9
7,8,The Dark Knight Rises,action,2012,8.5
9,10,Iron Man 3,action,2013,7.3
...,...,...,...,...,...
206,207,The Karate Kid,drama,2010,6.2
209,210,True Grit,drama,2010,7.7
211,212,The Help,drama,2011,8.1
213,214,Gone Girl,drama,2014,8.2


In [39]:
movies[movies.imdb_rating >= 8]

Unnamed: 0,id,name,genre,year,imdb_rating
2,3,The Avengers,action,2012,8.1
3,4,The Dark Knight,action,2008,9.0
5,6,Star Wars,action,1977,8.7
7,8,The Dark Knight Rises,action,2012,8.5
18,19,Guardians of the Galaxy,action,2014,8.1
25,26,Star Wars: Episode VI - Return of the Jedi,action,1983,8.4
27,28,Pirates of the Caribbean: The Curse of the Bla...,action,2003,8.1
29,30,Inception,action,2010,8.8
31,32,Star Wars: Episode V - The Empire Strikes Back,action,1980,8.8
34,35,The Incredibles,action,2004,8.0


In [40]:
movies[(movies.year >= 2010) & (movies.imdb_rating >= 8)]

Unnamed: 0,id,name,genre,year,imdb_rating
2,3,The Avengers,action,2012,8.1
7,8,The Dark Knight Rises,action,2012,8.5
18,19,Guardians of the Galaxy,action,2014,8.1
29,30,Inception,action,2010,8.8
47,48,X-Men: Days of Future Past,action,2014,8.1
51,52,Toy Story 3,comedy,2010,8.4
55,56,Inside Out,comedy,2015,8.6
191,192,Harry Potter and the Deathly Hallows: Part 2,drama,2011,8.1
200,201,Interstellar,drama,2014,8.7
211,212,The Help,drama,2011,8.1


In [41]:
movies[movies.imdb_rating == movies.imdb_rating.max()]

Unnamed: 0,id,name,genre,year,imdb_rating
3,4,The Dark Knight,action,2008,9.0


### Manipulating and Transforming DataFrame

In [42]:
movies.groupby('genre').imdb_rating.mean().reset_index()

Unnamed: 0,genre,imdb_rating
0,action,7.396
1,comedy,7.140426
2,drama,7.563333
3,horror,6.34898
4,romance,6.709091


In [43]:
employees = pd.read_csv('employees.csv')
employees.head()

Unnamed: 0,id,name,hourly_wage,hours_worked
0,10310,Lauren Durham,19,43
1,18656,Grace Sellers,17,40
2,61254,Shirley Rasmussen,16,30
3,16886,Brian Rojas,18,47
4,89010,Samantha Mosley,11,38


In [44]:
employees['user_name'] = employees.name.apply(lambda x: x.split()[0][0] + x.split()[-1])
employees.head()

Unnamed: 0,id,name,hourly_wage,hours_worked,user_name
0,10310,Lauren Durham,19,43,LDurham
1,18656,Grace Sellers,17,40,GSellers
2,61254,Shirley Rasmussen,16,30,SRasmussen
3,16886,Brian Rojas,18,47,BRojas
4,89010,Samantha Mosley,11,38,SMosley


In [45]:
employees['total_earned'] = employees.apply(lambda row: (row['hourly_wage'] * row['hours_worked']), axis = 1)
employees.head()

Unnamed: 0,id,name,hourly_wage,hours_worked,user_name,total_earned
0,10310,Lauren Durham,19,43,LDurham,817
1,18656,Grace Sellers,17,40,GSellers,680
2,61254,Shirley Rasmussen,16,30,SRasmussen,480
3,16886,Brian Rojas,18,47,BRojas,846
4,89010,Samantha Mosley,11,38,SMosley,418
