In [2]:
person = {
    "first": "Griffin",
    "last": "Sargent",
    "email": "griffin.a.sargent@gmail.com"
}

In [3]:
people = {
    "first": ["Griffin"],
    "last": ["Sargent"],
    "email": ["griffin.a.sargent@gmail.com"]
}

In [4]:
people = {
    "first": ["Griffin", "Jane", "Mike"],
    "last": ["Sargent", "Doe", "Jones"],
    "email": ["griffin.a.sargent@gmail.com", "jane@email.com", "MikeJ@email.com"]
}

In [5]:
people['email']

['griffin.a.sargent@gmail.com', 'jane@email.com', 'MikeJ@email.com']

## Dataframes *feel* similar to python dictionaries
Dataframes are sorta like python dictionaries in the way you reference their items
Except Dataframes are MUCH more powerful

In [6]:
import pandas as pd

In [8]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,Mike,Jones,MikeJ@email.com


In [9]:
# access individual columns similarly to how you would reference a python dict item
df['email']

0    griffin.a.sargent@gmail.com
1                 jane@email.com
2                MikeJ@email.com
Name: email, dtype: object

In [12]:
# Notice that this is a series (1-D array), not a dataframe
type(df['email'])

pandas.core.series.Series

In [14]:
emails = pd.DataFrame(df['email'])
emails

Unnamed: 0,email
0,griffin.a.sargent@gmail.com
1,jane@email.com
2,MikeJ@email.com


In [16]:
# access the columns by either the dict item notation OR the dot (.) notation
# dont use the dot notation - its ambiguous, and could lead to issues when a df's column is named the same as a Dataframe method
df.email

0    griffin.a.sargent@gmail.com
1                 jane@email.com
2                MikeJ@email.com
Name: email, dtype: object

In [19]:
# instead of providing a single column name (as a string), you can provide a list (notice the extra brackets)
df[['first', 'email']]

Unnamed: 0,first,email
0,Griffin,griffin.a.sargent@gmail.com
1,Jane,jane@email.com
2,Mike,MikeJ@email.com


In [20]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

## What about filtering rows?
loc and iloc return a series for a single item or dataframe if you pass a list of rows, example [0:]

**iloc** : pass numbers to specify columns and row IDs

**loc** : pass names to specify columns

In [21]:
df.iloc[0]

first                        Griffin
last                         Sargent
email    griffin.a.sargent@gmail.com
Name: 0, dtype: object

In [24]:
df.iloc[[0, 1]]

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com


In [26]:
# adding 2nd arg allows you to pick specific columns (by column ID)
df.iloc[[0, 1], 0]

0    Griffin
1       Jane
Name: first, dtype: object

In [30]:
# adding 2nd arg allows you to pick specific columns (by column name)
df.loc[[0, 1], ['email','last']]

Unnamed: 0,email,last
0,griffin.a.sargent@gmail.com,Sargent
1,jane@email.com,Doe
