### Installing pandas library using pip

In [62]:
import sys
!{sys.executable} -m pip install pandas



### Importing pandas library
NOTE: We can use the as keyword 'as' to use the shorthand 'pd'.

In [63]:
import pandas as pd

# Creating a pandas DataFrame object for patient info
###### We use pd.DataFrame() constructor, which takes in some keyword arguments: data & columns
###### NOTE: In Jupyter Notebook, we write df rather than print(df)


In [64]:
df = pd.DataFrame(columns=['patientID', 'name', 'age'], data=[[123,'Ben',21],[456,'Bob',19],[789,'Sam',20]])
df

Unnamed: 0,patientID,name,age
0,123,Ben,21
1,456,Bob,19
2,789,Sam,20


## 'Attributes' of our DataFrame
###### Click this link and scroll down to 'attributes' (a.k.a. properties): https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html


###### df.columns: returrns the NAMES of columns in the DataFrame

In [65]:
df.columns

Index(['patientID', 'name', 'age'], dtype='object')

###### df.shape: returns the (number of rows, number of columns) in the DataFrame

In [66]:
df.shape

(3, 3)

# Accessing cells in the DataFrame

In [67]:
df

Unnamed: 0,patientID,name,age
0,123,Ben,21
1,456,Bob,19
2,789,Sam,20


## Using df.iloc[]
###### (stands for integer-location)
We can pass in 1 or 2 parameters in the square brackets: df.iloc[row_index, column_index (optional)]

In [68]:
df.iloc[0,1]

'Ben'

1. Accessing element in 1st row (0th index), 1st column (0th index):

In [69]:
df.iloc[0,0:]

patientID    123
name         Ben
age           21
Name: 0, dtype: object

2. Accessing 1st row. NOTE: leaving column_index blank means all columns are accessed

In [70]:
df.iloc[0]

patientID    123
name         Ben
age           21
Name: 0, dtype: object

In [71]:
df

Unnamed: 0,patientID,name,age
0,123,Ben,21
1,456,Bob,19
2,789,Sam,20


3. Accessing 2nd and 3rd rows & all columns. NOTE: use list slicing syntax to select multiple columns

In [72]:
df.iloc[1:]

Unnamed: 0,patientID,name,age
1,456,Bob,19
2,789,Sam,20


4. CHALLENGE: Can you access 'Bob' from the DataFrame?

In [73]:
df.iloc[1,1]

'Bob'

5. CHALLENGE: Can you access the name & age columns of the 2nd and 3rd rows?

In [74]:
df.iloc[1:,1:]

Unnamed: 0,name,age
1,Bob,19
2,Sam,20


## Accessing rows using df.loc[]

In [75]:
df

Unnamed: 0,patientID,name,age
0,123,Ben,21
1,456,Bob,19
2,789,Sam,20


In [76]:
df.iloc[0]

patientID    123
name         Ben
age           21
Name: 0, dtype: object

In [77]:
df

Unnamed: 0,patientID,name,age
0,123,Ben,21
1,456,Bob,19
2,789,Sam,20


### Must use df.set_index() to set the index column of the DataFrame
In this case, we pass in the column name 'patientID'

In [83]:
df = df.set_index('patientID')

In [84]:
df

Unnamed: 0_level_0,name,age
patientID,Unnamed: 1_level_1,Unnamed: 2_level_1
123,Ben,21
456,Bob,19
789,Sam,20


### Accessing rows using df.loc[]

1. Accessing the row corresponding to a patientID of 123 (1st row)

In [85]:
df.loc[123]

name    Ben
age      21
Name: 123, dtype: object

2. CHALLENGE: Accessing the row corresponding to a patientID of 789 (3rd row)

In [86]:
df.loc[789]

name    Sam
age      20
Name: 789, dtype: object

## Accessing columns

In [87]:
df

Unnamed: 0_level_0,name,age
patientID,Unnamed: 1_level_1,Unnamed: 2_level_1
123,Ben,21
456,Bob,19
789,Sam,20


### Can access the 'age' column using:

In [88]:
df['age']

patientID
123    21
456    19
789    20
Name: age, dtype: int64

In [91]:
df

Unnamed: 0_level_0,name,age
patientID,Unnamed: 1_level_1,Unnamed: 2_level_1
123,Ben,21
456,Bob,19
789,Sam,20


## Accessing rows based on the values in columns
### Syntax is: df[condition to satisfy]

1: accessing rows where 'age' column = 20

In [92]:
df[df['age' == 20]]

KeyError: False

CHALLENGE: Can you access the row where the 'name' column = 'Ben'