In [36]:
import pandas as pd

In [37]:
people = {
    "first": ["John", "Jane", "Max"],
    "last": ["Doe", "Smith", "Payne"],
    "email": ["john@gmail.com", "jane@gmail.com", "max@gmail.com"]
}

In [38]:
people['email']

['john@gmail.com', 'jane@gmail.com', 'max@gmail.com']

In [39]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [6]:
df['email']

0    john@gmail.com
1    jane@gmail.com
2     max@gmail.com
Name: email, dtype: object

In [7]:
type(df['email'])

pandas.core.series.Series

In [8]:
df.email

0    john@gmail.com
1    jane@gmail.com
2     max@gmail.com
Name: email, dtype: object

In [9]:
df[['last', 'email']]

Unnamed: 0,last,email
0,Doe,john@gmail.com
1,Smith,jane@gmail.com
2,Payne,max@gmail.com


In [10]:
# Access Columns
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [11]:
# iloc - access rows using integer location
df.iloc[0]

first              John
last                Doe
email    john@gmail.com
Name: 0, dtype: object

In [12]:
df.iloc[[0, 1]]

Unnamed: 0,first,last,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com


In [13]:
# Access columns with iloc
df.iloc[[0, 1], 2]

0    john@gmail.com
1    jane@gmail.com
Name: email, dtype: object

In [14]:
# loc - Searching with label
df.loc[0]

first              John
last                Doe
email    john@gmail.com
Name: 0, dtype: object

In [15]:
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com


In [16]:
df.loc[[0, 1], 'email']

0    john@gmail.com
1    jane@gmail.com
Name: email, dtype: object

In [17]:
df.loc[[0, 1], ['email', 'first']]

Unnamed: 0,email,first
0,john@gmail.com,John
1,jane@gmail.com,Jane


## Set, reset and use Indexes

In [18]:
df.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
john@gmail.com,John,Doe
jane@gmail.com,Jane,Smith
max@gmail.com,Max,Payne


In [19]:
df.set_index('email', inplace=True)

In [20]:
df.index

Index(['john@gmail.com', 'jane@gmail.com', 'max@gmail.com'], dtype='object', name='email')

In [21]:
df.loc['john@gmail.com']

first    John
last      Doe
Name: john@gmail.com, dtype: object

In [22]:
df.loc['john@gmail.com', 'first']

'John'

In [23]:
df.iloc[0]
# df.loc[0] will give type error

first    John
last      Doe
Name: john@gmail.com, dtype: object

In [24]:
# inplace when set to True will modify the actual data frame
df.reset_index(inplace=True)
df

Unnamed: 0,email,first,last
0,john@gmail.com,John,Doe
1,jane@gmail.com,Jane,Smith
2,max@gmail.com,Max,Payne


## Filtering

In [25]:
filt = df['last'] == 'Doe'

In [26]:
df[filt]

Unnamed: 0,email,first,last
0,john@gmail.com,John,Doe


In [27]:
df.loc[filt]

Unnamed: 0,email,first,last
0,john@gmail.com,John,Doe


In [28]:
df.loc[filt, 'email']

0    john@gmail.com
Name: email, dtype: object

In [29]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John')

In [30]:
df.loc[filt]

Unnamed: 0,email,first,last
0,john@gmail.com,John,Doe


In [31]:
filt = (df['last'] == 'Doe') | (df['first'] == 'Jane')

In [32]:
df.loc[filt]

Unnamed: 0,email,first,last
0,john@gmail.com,John,Doe
1,jane@gmail.com,Jane,Smith


In [33]:
df.loc[~filt]

Unnamed: 0,email,first,last
2,max@gmail.com,Max,Payne


## Update Rows and Columns

In [40]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [41]:
df.columns = ['first_name', 'last_name', 'email']

In [42]:
df

Unnamed: 0,first_name,last_name,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [43]:
df.columns = [x.upper() for x in df.columns]

In [44]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [45]:
df.columns = [x.lower() for x in df.columns]

In [46]:
df

Unnamed: 0,first_name,last_name,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [49]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)

In [50]:
df

Unnamed: 0,first,last,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [54]:
df.loc[2] = ['Tim', 'Payne', 'tim@gmail.com']

In [55]:
df

Unnamed: 0,first,last,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Tim,Payne,tim@gmail.com


In [56]:
df.loc[2, ['first', 'email']] = ['Max', 'max@gmail.com']

In [57]:
df

Unnamed: 0,first,last,email
0,John,Doe,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [61]:
filt = (df['email'] == 'john@gmail.com')
df.loc[filt, 'last'] = "Smith"

In [62]:
df

Unnamed: 0,first,last,email
0,John,Smith,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [63]:
df['email'] = df['email'].str.upper()
df

Unnamed: 0,first,last,email
0,John,Smith,JOHN@GMAIL.COM
1,Jane,Smith,JANE@GMAIL.COM
2,Max,Payne,MAX@GMAIL.COM


In [64]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,John,Smith,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


### apply

In [65]:
df['email'].apply(len)

0    14
1    14
2    13
Name: email, dtype: int64

In [67]:
def update_email(val):
    return val.upper()

df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,John,Smith,JOHN@GMAIL.COM
1,Jane,Smith,JANE@GMAIL.COM
2,Max,Payne,MAX@GMAIL.COM


In [68]:
# Using lambda function
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,John,Smith,john@gmail.com
1,Jane,Smith,jane@gmail.com
2,Max,Payne,max@gmail.com


In [69]:
# apply on data frame
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [70]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

### applymap

In [71]:
df.applymap(len)

Unnamed: 0,first,last,email
0,4,5,14
1,4,5,14
2,3,5,13


In [73]:
df.applymap(str.upper)

Unnamed: 0,first,last,email
0,JOHN,SMITH,JOHN@GMAIL.COM
1,JANE,SMITH,JANE@GMAIL.COM
2,MAX,PAYNE,MAX@GMAIL.COM


### map

In [75]:
df['first'].map({'John': 'Will'})

0    Will
1     NaN
2     NaN
Name: first, dtype: object

### replace

In [76]:
df['first'].replace({'John': 'Will'})

0    Will
1    Jane
2     Max
Name: first, dtype: object