# Pandas Tutorial - Part 1

# DataFrame and Series Basics - Selecting Rows and Columns

### Implementation: Ali Moghanni

*Resources:*

This Jupyter notebook can be obtained at [https://github.com/alimoghanni/Pandas](https://github.com/alimoghanni/Pandas). 

updated: **2020-03-31**

In [1]:
# Preamble: useful toolboxes, librairies, functions, etc.

import pandas as pd
import numpy as np

In [2]:
# Python dictionary 1

person = {
    "first": "Albert", 
    "last": "Einstein", 
    "email": "AlbertEinstein@mail.com"
}

In [3]:
# Python dictionary 2

person = {
    "first": ["Albert"], 
    "last": ["Einstein"], 
    "email": ["AlbertEinstein@mail.com"]
}

In [4]:
# Python dictionary 3

people = {
    "first": ["Agatha", "Hercule","Jane", "David", "Nicholle", "Bruce", "Jacob", "Maggie", "Cristiano"], 
    "last": ["Christie", "Poirot", "Marple", "Tom", "Tom", "Lee", "Gyllenhaal", "Gyllenhaal", "Ronaldo"], 
    "email": ["AgathaChristie@mail.com", "Hercule Poirot@mail.com", "NA", "DavidTom@gmail.com", "NicholleTom@gmail.com", "BruceLee@yahoo.com","JacobGyllenhaal@mail.com", "MaggieGyllenhaal@mail.com", "CristianoRonaldo@mail.com"],
    "age": [85, 54, np.nan ,np.nan, 42, 32, 39, 42, 35 ],
    "occupation": ["Novelist","Private investigator", "Amateur detective", "Actor", "Actress", "Martial artist", "Actor", "producer", "footballer"],
    "nationality": ["English", "Belgian", "British", "American", "American", "Chinese", "American", "American", "Portuguese"],
    "born": ["15 September 1890", "Missing", "December 1927", "March 23, 1978", "March 23, 1978", "November 27, 1940", "December 19, 1980", "November 16, 1977", "February 5, 1985"],
    "male": ["No", "Yes", "No", "Yes", "No", "Yes", "Yes", "No", "Yes"]
}

In [5]:
# Find values of a key in dictionary

people["last"]

['Christie',
 'Poirot',
 'Marple',
 'Tom',
 'Tom',
 'Lee',
 'Gyllenhaal',
 'Gyllenhaal',
 'Ronaldo']

In [6]:
# Create pandas DataFrame from dictionary

df = pd.DataFrame(people)

df

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
0,Agatha,Christie,AgathaChristie@mail.com,85.0,Novelist,English,15 September 1890,No
1,Hercule,Poirot,Hercule Poirot@mail.com,54.0,Private investigator,Belgian,Missing,Yes
2,Jane,Marple,,,Amateur detective,British,December 1927,No
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No
5,Bruce,Lee,BruceLee@yahoo.com,32.0,Martial artist,Chinese,"November 27, 1940",Yes
6,Jacob,Gyllenhaal,JacobGyllenhaal@mail.com,39.0,Actor,American,"December 19, 1980",Yes
7,Maggie,Gyllenhaal,MaggieGyllenhaal@mail.com,42.0,producer,American,"November 16, 1977",No
8,Cristiano,Ronaldo,CristianoRonaldo@mail.com,35.0,footballer,Portuguese,"February 5, 1985",Yes


In [7]:
# Show first 5 rows

df.head() # or df.head(5)

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
0,Agatha,Christie,AgathaChristie@mail.com,85.0,Novelist,English,15 September 1890,No
1,Hercule,Poirot,Hercule Poirot@mail.com,54.0,Private investigator,Belgian,Missing,Yes
2,Jane,Marple,,,Amateur detective,British,December 1927,No
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No


In [8]:
# Show last 5 rows

df.tail()

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No
5,Bruce,Lee,BruceLee@yahoo.com,32.0,Martial artist,Chinese,"November 27, 1940",Yes
6,Jacob,Gyllenhaal,JacobGyllenhaal@mail.com,39.0,Actor,American,"December 19, 1980",Yes
7,Maggie,Gyllenhaal,MaggieGyllenhaal@mail.com,42.0,producer,American,"November 16, 1977",No
8,Cristiano,Ronaldo,CristianoRonaldo@mail.com,35.0,footballer,Portuguese,"February 5, 1985",Yes


In [9]:
# Get the number of rows and columns of the DataFrame

df.shape

(9, 8)

In [10]:
# Print a concise summary of a DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   first        9 non-null      object 
 1   last         9 non-null      object 
 2   email        9 non-null      object 
 3   age          7 non-null      float64
 4   occupation   9 non-null      object 
 5   nationality  9 non-null      object 
 6   born         9 non-null      object 
 7   male         9 non-null      object 
dtypes: float64(1), object(7)
memory usage: 704.0+ bytes


In [11]:
# Get the column labels of the DataFrame

df.columns

Index(['first', 'last', 'email', 'age', 'occupation', 'nationality', 'born',
       'male'],
      dtype='object')

In [12]:
# Access to values of single column
# Using square brackets

df['email']   

0      AgathaChristie@mail.com
1      Hercule Poirot@mail.com
2                           NA
3           DavidTom@gmail.com
4        NicholleTom@gmail.com
5           BruceLee@yahoo.com
6     JacobGyllenhaal@mail.com
7    MaggieGyllenhaal@mail.com
8    CristianoRonaldo@mail.com
Name: email, dtype: object

In [13]:
# Show type of specific column

type(df['email'])

pandas.core.series.Series

In [14]:
# Access to values of single column
# Using dot notation

df.email

0      AgathaChristie@mail.com
1      Hercule Poirot@mail.com
2                           NA
3           DavidTom@gmail.com
4        NicholleTom@gmail.com
5           BruceLee@yahoo.com
6     JacobGyllenhaal@mail.com
7    MaggieGyllenhaal@mail.com
8    CristianoRonaldo@mail.com
Name: email, dtype: object

In [15]:
# Access to values of multiple columns

df[["first", "email"]]

Unnamed: 0,first,email
0,Agatha,AgathaChristie@mail.com
1,Hercule,Hercule Poirot@mail.com
2,Jane,
3,David,DavidTom@gmail.com
4,Nicholle,NicholleTom@gmail.com
5,Bruce,BruceLee@yahoo.com
6,Jacob,JacobGyllenhaal@mail.com
7,Maggie,MaggieGyllenhaal@mail.com
8,Cristiano,CristianoRonaldo@mail.com


In [16]:
# Get the rows of the DataFrame

# integer-location based indexing / selection by position

df.iloc[0]  # gives first row

first                           Agatha
last                          Christie
email          AgathaChristie@mail.com
age                               85.0
occupation                    Novelist
nationality                    English
born                 15 September 1890
male                                No
Name: 0, dtype: object

In [17]:
# Get multiple rows

df.iloc[[0, 1]]  

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
0,Agatha,Christie,AgathaChristie@mail.com,85.0,Novelist,English,15 September 1890,No
1,Hercule,Poirot,Hercule Poirot@mail.com,54.0,Private investigator,Belgian,Missing,Yes


In [18]:
# Get multiple rows and colunms

df.iloc[[0, 1], 2]  

0    AgathaChristie@mail.com
1    Hercule Poirot@mail.com
Name: email, dtype: object

In [19]:
df.head()

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
0,Agatha,Christie,AgathaChristie@mail.com,85.0,Novelist,English,15 September 1890,No
1,Hercule,Poirot,Hercule Poirot@mail.com,54.0,Private investigator,Belgian,Missing,Yes
2,Jane,Marple,,,Amateur detective,British,December 1927,No
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
4,Nicholle,Tom,NicholleTom@gmail.com,42.0,Actress,American,"March 23, 1978",No


In [20]:
# Get the rows of the DataFrame

# Label-based / Index-based indexing

df.loc[0]

first                           Agatha
last                          Christie
email          AgathaChristie@mail.com
age                               85.0
occupation                    Novelist
nationality                    English
born                 15 September 1890
male                                No
Name: 0, dtype: object

In [21]:
# Get multiple rows

df.loc[[3, 7]]

Unnamed: 0,first,last,email,age,occupation,nationality,born,male
3,David,Tom,DavidTom@gmail.com,,Actor,American,"March 23, 1978",Yes
7,Maggie,Gyllenhaal,MaggieGyllenhaal@mail.com,42.0,producer,American,"November 16, 1977",No


In [22]:
# Get multiple rows and colunms

df.loc[[0, 1], 'email']

0    AgathaChristie@mail.com
1    Hercule Poirot@mail.com
Name: email, dtype: object

In [23]:
df.loc[[0, 1], ['email', 'last']]

Unnamed: 0,email,last
0,AgathaChristie@mail.com,Christie
1,Hercule Poirot@mail.com,Poirot


In [24]:
df.loc[0:2, ['email', 'last']]

Unnamed: 0,email,last
0,AgathaChristie@mail.com,Christie
1,Hercule Poirot@mail.com,Poirot
2,,Marple


In [25]:
df.loc[3:7, 'last':'nationality']

Unnamed: 0,last,email,age,occupation,nationality
3,Tom,DavidTom@gmail.com,,Actor,American
4,Tom,NicholleTom@gmail.com,42.0,Actress,American
5,Lee,BruceLee@yahoo.com,32.0,Martial artist,Chinese
6,Gyllenhaal,JacobGyllenhaal@mail.com,39.0,Actor,American
7,Gyllenhaal,MaggieGyllenhaal@mail.com,42.0,producer,American
