# Video 2: DataFrame and Series Basics

In [252]:
import pandas as pd

In [253]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@gmail.com"]
}

people["first"]

['Corey', 'Jane', 'John']

In [254]:
df = pd.DataFrame(people)

In [255]:
df["first"]  # returns a Series

0    Corey
1     Jane
2     John
Name: first, dtype: object

In [256]:
df[["first", "last"]]  # returns a DataFrame

Unnamed: 0,first,last
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [257]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [258]:
df.index = ['cs', 'janed', 'johnd']

In [259]:
df.loc[['cs', 'janed']]

Unnamed: 0,first,last,email
cs,Corey,Schafer,CoreyMSchafer@gmail.com
janed,Jane,Doe,JaneDoe@email.com


In [260]:
df.iloc[[0, 1], [0, 1]]

Unnamed: 0,first,last
cs,Corey,Schafer
janed,Jane,Doe


In [261]:
df.iloc[0:2]

Unnamed: 0,first,last,email
cs,Corey,Schafer,CoreyMSchafer@gmail.com
janed,Jane,Doe,JaneDoe@email.com


In [262]:
df['email']

cs       CoreyMSchafer@gmail.com
janed          JaneDoe@email.com
johnd          JohnDoe@gmail.com
Name: email, dtype: object

In [263]:
df.set_index('email', inplace=True)

In [264]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
CoreyMSchafer@gmail.com,Corey,Schafer
JaneDoe@email.com,Jane,Doe
JohnDoe@gmail.com,John,Doe


In [265]:
df.index

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@gmail.com'], dtype='object', name='email')

In [266]:
df.reset_index()  # note it returns, you can also use inplace=True

Unnamed: 0,email,first,last
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@gmail.com,John,Doe


In [267]:
filt = df['last'] == 'Doe'

In [268]:
df[filt]

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
JaneDoe@email.com,Jane,Doe
JohnDoe@gmail.com,John,Doe


In [269]:
df.loc[filt]

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
JaneDoe@email.com,Jane,Doe
JohnDoe@gmail.com,John,Doe


In [270]:
df.loc[filt, 'first']

email
JaneDoe@email.com    Jane
JohnDoe@gmail.com    John
Name: first, dtype: object

In [271]:
filt = (df['last'] == 'Schafer') | (df['first'] == 'John')

In [272]:
df.loc[~filt, 'first']

email
JaneDoe@email.com    Jane
Name: first, dtype: object

In [273]:
df = pd.DataFrame(people)

In [274]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@gmail.com


In [275]:
df['first'] + ' ' + df['last']

0    Corey Schafer
1         Jane Doe
2         John Doe
dtype: object

In [276]:
df['full_name'] = df['first'] + ' ' + df['last']

In [277]:
df.drop(columns=['first', 'last'])

Unnamed: 0,email,full_name
0,CoreyMSchafer@gmail.com,Corey Schafer
1,JaneDoe@email.com,Jane Doe
2,JohnDoe@gmail.com,John Doe


In [278]:
df['full_name'].str.split(' ')

0    [Corey, Schafer]
1         [Jane, Doe]
2         [John, Doe]
Name: full_name, dtype: object

In [279]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [280]:
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@gmail.com,John Doe


In [281]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [282]:
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@gmail.com,John Doe


In [283]:
df.append({'first': 'Tony'}, ignore_index=True)

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@gmail.com,John Doe
3,Tony,,,


In [284]:
people = {
    "first": ["Tony", "Steve"],
    "last": ["Stark", "Rogers"],
    "email": ["ironman@avenge.com", "cap@avenge.com"]
}
df2 = pd.DataFrame(people)


In [285]:
df.append(df2, ignore_index=True)

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@gmail.com,John Doe
3,Tony,Stark,ironman@avenge.com,
4,Steve,Rogers,cap@avenge.com,


In [286]:
df = df.append(df2, ignore_index=True)

In [287]:
the_does = df['last'] == 'Doe'
df.drop(index=df[the_does].index, inplace=True)

In [288]:
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
3,Tony,Stark,ironman@avenge.com,
4,Steve,Rogers,cap@avenge.com,
