### Installing pandas library using pip

In [7]:
import sys
!{sys.executable} -m pip install pandas



### Importing pandas library
NOTE: We can use the as keyword 'as' to use the shorthand 'pd'.

In [4]:
import pandas as pd

# Creating a pandas DataFrame object for patient info
###### We use pd.DataFrame() method, which takes in some keyword arguments: data & columns
###### NOTE: In Jupyter Notebook, we write df rather than print(df)


In [9]:
df = pd.DataFrame(columns = ['ID', 'name', 'age'], data=[[123, 'Ben', 21], [456, 'Alex', 20], [789, 'Will', 20]])
df

Unnamed: 0,ID,name,age
0,123,Ben,21
1,456,Alex,20
2,789,Will,20


## 'Attributes' of our DataFrame
###### Click this link and scroll down to 'attributes' (a.k.a. properties): https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html


###### df.columns: returrns the NAMES of columns in the DataFrame

In [8]:
df.columns

Index(['ID', 'name', 'age'], dtype='object')

###### df.shape: returns the (number of rows, number of columns) in the DataFrame

In [10]:
df.shape

(3, 3)

# Accessing cells in the DataFrame

## Using df.iloc[]
###### (stands for index location)
We can pass in 1 or 2 parameters in the square brackets: df.iloc[row_index, column_index (optional)]

In [18]:
df

Unnamed: 0,ID,name,age
0,123,Ben,21
1,456,Alex,20
2,789,Will,20


1. Accessing element in 1st row (0th index), 1st column (0th index):

In [11]:
df.iloc[0,0]

123

2. Accessing 1st row. NOTE: leaving column_index blank means all columns are accessed

In [12]:
df.iloc[0]

ID      123
name    Ben
age      21
Name: 0, dtype: object

3. Accessing 2nd and 3rd rows & all columns. NOTE: use list slicing syntax to select multiple columns

In [14]:
df.iloc[1:]

Unnamed: 0,ID,name,age
1,456,Alex,20
2,789,Will,20


In [19]:
df

Unnamed: 0,ID,name,age
0,123,Ben,21
1,456,Alex,20
2,789,Will,20


4. CHALLENGE: Can you access 'Alex' from the DataFrame?

In [16]:
df.iloc[1, 1]

20

5. CHALLENGE: Can you access the name & age columns of the 1st and 2nd rows?

In [15]:
df.iloc[1:, 1:]

Unnamed: 0,name,age
1,Alex,20
2,Will,20


## Accessing rows using df.loc[]

In [21]:
df

Unnamed: 0_level_0,name,age
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
123,Ben,21
456,Alex,20
789,Will,20


### Must use df.set_index() to set the index column of the DataFrame
In this case, we pass in the column name

In [20]:
df = df.set_index('ID')
df

Unnamed: 0_level_0,name,age
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
123,Ben,21
456,Alex,20
789,Will,20


### Accessing rows using df.loc[]

1. Accessing the row corresponding to a patientID of 123 (1st row)

In [22]:
df.loc[123]

name    Ben
age      21
Name: 123, dtype: object

2. Accessing the row corresponding to a patientID of 789 (3rd row)

In [25]:
df.loc[789]

name    Will
age       20
Name: 789, dtype: object

## Accessing columns

### Can access the 'age' column using:

In [26]:
df['age']

ID
123    21
456    20
789    20
Name: age, dtype: int64

## Accessing rows based on the values in columns
### Syntax is: df[condition to satisfy]

### Example: accessing rows where 'age' column = 20

In [27]:
df[df['age'] == 20]

Unnamed: 0_level_0,name,age
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
456,Alex,20
789,Will,20


### CHALLENGE: Can you access the row where the 'name' column = 'Ben'

In [30]:
df[df['name'] == 'Ben']

Unnamed: 0_level_0,name,age
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
123,Ben,21
