In [136]:
person = {
    "first": "Corey",
    "last": "Sachafer",
    "email": "corey@gmail.com"
}

In [137]:
people = {
    "first": ["Corey"],
    "last": ["Sachafer"],
    "email": ["corey@gmail.com"]
}

In [138]:
people = {
    "first": ["Atul", "Jane", "John"],
    "last": ["Beniwal", "Doe", "Doe"],
    "email": ["Corey@gmail.com", "JaneDoe@gmail.com", "JohnDoe@gmail.com"]
}

In [139]:
people['email']

['Corey@gmail.com', 'JaneDoe@gmail.com', 'JohnDoe@gmail.com']

In [140]:
import pandas as pd

In [141]:
df = pd.DataFrame(people)

In [142]:
df

Unnamed: 0,first,last,email
0,Atul,Beniwal,Corey@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [143]:
df['email']

0      Corey@gmail.com
1    JaneDoe@gmail.com
2    JohnDoe@gmail.com
Name: email, dtype: object

In [144]:
type(df['email'])

pandas.core.series.Series

In [145]:
df.email[0]

'Corey@gmail.com'

In [146]:
df[['last','email']]

Unnamed: 0,last,email
0,Beniwal,Corey@gmail.com
1,Doe,JaneDoe@gmail.com
2,Doe,JohnDoe@gmail.com


In [147]:
type(df[['last','email']])

pandas.core.frame.DataFrame

In [148]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [149]:
# We search by interger location using iloc
df.iloc[0]

first               Atul
last             Beniwal
email    Corey@gmail.com
Name: 0, dtype: object

In [150]:
df.iloc[[0, 1], 2]

0      Corey@gmail.com
1    JaneDoe@gmail.com
Name: email, dtype: object

In [151]:
# With loc we search using lables
df

Unnamed: 0,first,last,email
0,Atul,Beniwal,Corey@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [152]:
df.loc[0:2, 'last':'email']

Unnamed: 0,last,email
0,Beniwal,Corey@gmail.com
1,Doe,JaneDoe@gmail.com
2,Doe,JohnDoe@gmail.com


## Indexes - How to Set, Reset, and Use Indexes

In [153]:
df

Unnamed: 0,first,last,email
0,Atul,Beniwal,Corey@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [154]:
df.set_index('email', inplace=True)

In [155]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Corey@gmail.com,Atul,Beniwal
JaneDoe@gmail.com,Jane,Doe
JohnDoe@gmail.com,John,Doe


In [156]:
df.index

Index(['Corey@gmail.com', 'JaneDoe@gmail.com', 'JohnDoe@gmail.com'], dtype='object', name='email')

In [157]:
df.loc['Corey@gmail.com']

first       Atul
last     Beniwal
Name: Corey@gmail.com, dtype: object

In [158]:
df.iloc[0]

first       Atul
last     Beniwal
Name: Corey@gmail.com, dtype: object

In [159]:
df.reset_index(inplace=True)

In [160]:
df

Unnamed: 0,email,first,last
0,Corey@gmail.com,Atul,Beniwal
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Doe


##  Filtering - Using Conditionals to Filter Rows and Columns

In [161]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John')

In [162]:
filt

0    False
1    False
2     True
dtype: bool

In [163]:
df.loc[~filt, 'email']
# ~ for opposite result

0      Corey@gmail.com
1    JaneDoe@gmail.com
Name: email, dtype: object

## Updating Rows and Columns - Modifying Data Within DataFrames

In [164]:
df.columns

Index(['email', 'first', 'last'], dtype='object')

In [165]:
df.columns = ['email', 'first_name', 'last_name']

In [166]:
df.columns

Index(['email', 'first_name', 'last_name'], dtype='object')

In [167]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,email,first_name,last_name
0,Corey@gmail.com,Atul,Beniwal
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Doe


In [168]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)

In [169]:
df.loc[2, ['last','email']] = ['Doe', 'JohnDoe@gmail.com']

In [170]:
df

Unnamed: 0,email,first,last
0,Corey@gmail.com,Atul,Beniwal
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Doe


In [171]:
df.loc[2, 'last'] = 'Doe'

In [172]:
df

Unnamed: 0,email,first,last
0,Corey@gmail.com,Atul,Beniwal
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Doe


In [173]:
filt = (df['email'] == 'JohnDoe@gmail.com')
df.loc[filt, 'last'] = 'Smith'
# df.loc[filt]

In [174]:
df

Unnamed: 0,email,first,last
0,Corey@gmail.com,Atul,Beniwal
1,JaneDoe@gmail.com,Jane,Doe
2,JohnDoe@gmail.com,John,Smith


In [175]:
df['email'] = df['email'].str.lower()

In [176]:
df

Unnamed: 0,email,first,last
0,corey@gmail.com,Atul,Beniwal
1,janedoe@gmail.com,Jane,Doe
2,johndoe@gmail.com,John,Smith


In [177]:
# Apply => use for calling fun on our value, can work on df or values
df['email'].apply(len)

0    15
1    17
2    17
Name: email, dtype: int64

In [178]:
def update_email(email):
    return email.upper()

In [179]:
df['email'].apply(update_email)

0      COREY@GMAIL.COM
1    JANEDOE@GMAIL.COM
2    JOHNDOE@GMAIL.COM
Name: email, dtype: object

In [180]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,email,first,last
0,COREY@GMAIL.COM,Atul,Beniwal
1,JANEDOE@GMAIL.COM,Jane,Doe
2,JOHNDOE@GMAIL.COM,John,Smith


In [181]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,email,first,last
0,corey@gmail.com,Atul,Beniwal
1,janedoe@gmail.com,Jane,Doe
2,johndoe@gmail.com,John,Smith


In [182]:
df['email'].apply(len)

0    15
1    17
2    17
Name: email, dtype: int64

In [183]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [184]:
df.apply(pd.Series.min)

email    corey@gmail.com
first               Atul
last             Beniwal
dtype: object

In [185]:
df.apply(lambda x: x.min())

email    corey@gmail.com
first               Atul
last             Beniwal
dtype: object

In [186]:
# ApplyMap => This method applies a function that accepts and returns a scalar to every element of a DataFrame.
df.applymap(len)

Unnamed: 0,email,first,last
0,15,4,7
1,17,4,3
2,17,4,5


In [187]:
df.applymap(str.lower)

Unnamed: 0,email,first,last
0,corey@gmail.com,atul,beniwal
1,janedoe@gmail.com,jane,doe
2,johndoe@gmail.com,john,smith


In [188]:
# Map => only works on series
df['first'] = df['first'].replace({'atul': 'Alex', 'john': 'mary'})

In [189]:
df

Unnamed: 0,email,first,last
0,corey@gmail.com,Atul,Beniwal
1,janedoe@gmail.com,Jane,Doe
2,johndoe@gmail.com,John,Smith


## Add/Remove Rows and Columns From DataFrames

In [190]:
df['first'] + ' ' + df['last']

0    Atul Beniwal
1        Jane Doe
2      John Smith
dtype: object

In [191]:
df['full_name'] = df['first'] + ' ' + df['last']

In [192]:
df

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith


In [193]:
df.drop(columns=['first','last'])

Unnamed: 0,email,full_name
0,corey@gmail.com,Atul Beniwal
1,janedoe@gmail.com,Jane Doe
2,johndoe@gmail.com,John Smith


In [194]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Atul,Beniwal
1,Jane,Doe
2,John,Smith


In [195]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [196]:
df

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith


In [197]:
df.append({'first': 'Tony'}, ignore_index=True)

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith
3,,Tony,,


In [198]:
people = {
    "first": ["Tony", "Steve"],
    "last": ["Stark", "Rogers"],
    "email": ["Tony@gmail.com", "Steve@gmail.com"]
}
df2 = pd.DataFrame(people)

In [199]:
df2

Unnamed: 0,first,last,email
0,Tony,Stark,Tony@gmail.com
1,Steve,Rogers,Steve@gmail.com


In [200]:
df.append(df2, ignore_index=True)

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith
3,Tony@gmail.com,Tony,Stark,
4,Steve@gmail.com,Steve,Rogers,


In [201]:
df = df.append(df2, ignore_index=True)
df

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith
3,Tony@gmail.com,Tony,Stark,
4,Steve@gmail.com,Steve,Rogers,


In [202]:
df.drop(index=4)

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith
3,Tony@gmail.com,Tony,Stark,


In [203]:
filt = df['last'] == 'Doe'
df.drop(index=df[filt].index)

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
2,johndoe@gmail.com,John,Smith,John Smith
3,Tony@gmail.com,Tony,Stark,
4,Steve@gmail.com,Steve,Rogers,


## Sorting Data

In [204]:
df.sort_values(by='last', ascending=False)

Unnamed: 0,email,first,last,full_name
3,Tony@gmail.com,Tony,Stark,
2,johndoe@gmail.com,John,Smith,John Smith
4,Steve@gmail.com,Steve,Rogers,
1,janedoe@gmail.com,Jane,Doe,Jane Doe
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal


In [206]:
df.sort_values(by=['last', 'first'], ascending=[False, True])

Unnamed: 0,email,first,last,full_name
3,Tony@gmail.com,Tony,Stark,
2,johndoe@gmail.com,John,Smith,John Smith
4,Steve@gmail.com,Steve,Rogers,
1,janedoe@gmail.com,Jane,Doe,Jane Doe
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal


In [207]:
df.sort_index()

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Atul,Beniwal,Atul Beniwal
1,janedoe@gmail.com,Jane,Doe,Jane Doe
2,johndoe@gmail.com,John,Smith,John Smith
3,Tony@gmail.com,Tony,Stark,
4,Steve@gmail.com,Steve,Rogers,
