# Traditional Way in Python without Pandas

In [116]:
person = {
    "first": "Ashish",
    "last": "Rai",
    "email": "ashishrai@email.com"
}

In [117]:
people = {
    "first": ["Ashish"],
    "last": ["Rai"],
    "email": ["ashishrai@email.com"]
}

In [118]:
people = {
    "first": ["Ashish", "Anjali", "Sudha"],
    "last": ["Rai", "Rai", "Sharma"],
    "email": ["ashishrai@email.com", "anjalirai@email.com", "sudhasharma@email.com"]
}

In [119]:
people #prints whole dictionary 

{'first': ['Ashish', 'Anjali', 'Sudha'],
 'last': ['Rai', 'Rai', 'Sharma'],
 'email': ['ashishrai@email.com',
  'anjalirai@email.com',
  'sudhasharma@email.com']}

In [120]:
people['email'] #prints the 'email' column

['ashishrai@email.com', 'anjalirai@email.com', 'sudhasharma@email.com']

## Similar thing using Pandas

In [121]:
import pandas as pd

### Creating DataFrame

In [122]:
df = pd.DataFrame(people)

In [123]:
df # prints whole dataframe

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


### Parsing Through a DataFrame

In [124]:
df['email'] # prints the 'email' column

0      ashishrai@email.com
1      anjalirai@email.com
2    sudhasharma@email.com
Name: email, dtype: object

In [125]:
df.email # another way to print a particular column, less suitable

0      ashishrai@email.com
1      anjalirai@email.com
2    sudhasharma@email.com
Name: email, dtype: object

In [126]:
type(df) # gives type of dataframe

pandas.core.frame.DataFrame

In [127]:
type(df['email']) # gives type of a column of dataframe, which is a series

# A series cann be known as a single column of a dataframe

pandas.core.series.Series

In [128]:
df[['last', 'first']] # gives columns particularly passed in a list (note repetiton of square brackets)

# the output will now no more be a series as there are more than one columns

Unnamed: 0,last,first
0,Rai,Ashish
1,Rai,Anjali
2,Sharma,Sudha


In [129]:
df.columns # gives indexes and datatype all columns

Index(['first', 'last', 'email'], dtype='object')

### The iloc attribute

In [130]:
df.iloc[0] # gives first row of all columns

first                 Ashish
last                     Rai
email    ashishrai@email.com
Name: 0, dtype: object

In [131]:
df.iloc[[0, 1]] # gives mentioned rows of all columns passed in a list (note repition of square brackets)

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com


In [132]:
df.iloc[[0, 1], 1] # gives mentioned rows of mentioned columns

0    Rai
1    Rai
Name: last, dtype: object

In [133]:
df.iloc[[0, 1], [1, 2]] # gives mentioned rows of mentioned columns

Unnamed: 0,last,email
0,Rai,ashishrai@email.com
1,Rai,anjalirai@email.com


### The loc attribute 

In [134]:
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [135]:
df.loc[0]

first                 Ashish
last                     Rai
email    ashishrai@email.com
Name: 0, dtype: object

In [136]:
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com


In [137]:
df.loc[[0, 1], ['email', 'last']]

Unnamed: 0,email,last
0,ashishrai@email.com,Rai
1,anjalirai@email.com,Rai


In [138]:
df


Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


### set_index method 

In [139]:
df_copy = df.copy()

In [140]:
df_copy.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
ashishrai@email.com,Ashish,Rai
anjalirai@email.com,Anjali,Rai
sudhasharma@email.com,Sudha,Sharma


In [141]:
df_copy

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


we can see thath set_index method didn't changed the index of the original dataframe itself. To do so we can write df.set_index('email', inplace=True)

In [142]:
df_copy.set_index('email', inplace=True)
df_copy

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
ashishrai@email.com,Ashish,Rai
anjalirai@email.com,Anjali,Rai
sudhasharma@email.com,Sudha,Sharma


In [143]:
df_copy.loc['anjalirai@email.com', 'first'] # email is used as index

'Anjali'

Now we can not use the previous index values of 0,1... in loc as they have been replaced by 'email' but can be used in iloc

In [144]:
df_copy.iloc[1, 0]

'Anjali'

If we want to reset the default index values then we can do that by using reset_index() along with passing inplace=True so that the changes actually takes place

In [145]:
df_copy.reset_index(inplace=True)
df_copy

Unnamed: 0,email,first,last
0,ashishrai@email.com,Ashish,Rai
1,anjalirai@email.com,Anjali,Rai
2,sudhasharma@email.com,Sudha,Sharma


## Filtering 

In [146]:
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [147]:
df[df['last'] == 'Sharma'] # first way with direct assignment

Unnamed: 0,first,last,email
2,Sudha,Sharma,sudhasharma@email.com


In [148]:
filt = df['last'] == 'Sharma' # second way by using a variald 'filt'
df[filt] # calling variable 'filt' first way

Unnamed: 0,first,last,email
2,Sudha,Sharma,sudhasharma@email.com


In [149]:
df.loc[filt] # second way to call 'filt' variable (using .loc) and getting same result

Unnamed: 0,first,last,email
2,Sudha,Sharma,sudhasharma@email.com


In [150]:
filt # third way to call 'filt' variable which is a Series and a series is printed here as shown

0    False
1    False
2     True
Name: last, dtype: bool

In [151]:
df.loc[filt, 'email'] # the second way, i.e. using .loc, we can access a particular column if we want as well

2    sudhasharma@email.com
Name: email, dtype: object

### ANDing and ORing using '&' and '|'

In [152]:
filt = (df['last'] == 'Rai') & (df['first'] == 'Anjali')
df.loc[filt, 'email']

1    anjalirai@email.com
Name: email, dtype: object

In [153]:
filt = (df['last'] == 'Sharma') | (df['first'] == 'Anjali')
df.loc[filt, 'email']

1      anjalirai@email.com
2    sudhasharma@email.com
Name: email, dtype: object

If we want the opposite of the previous line, we can go and change the query from '==' to '!=' or can use a tilder sign '~' as follows

In [154]:
df.loc[~filt, 'email']

0    ashishrai@email.com
Name: email, dtype: object

## Updating Rows and Columns

### 1. Updating Columns 

In [155]:
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [156]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

Changing all column names

In [157]:
df.columns = ['first_name', 'last_name', 'email']
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [158]:
df

Unnamed: 0,first_name,last_name,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


Making column names uppercase, lowercase or title

In [159]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [160]:
df.columns = [x.title() for x in df.columns]
df

Unnamed: 0,First_Name,Last_Name,Email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [161]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


Replacing underscores by spaces and vice versa in column names

In [162]:
df.columns = df.columns.str.replace('_', ' ')
df

Unnamed: 0,first name,last name,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [163]:
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,first_name,last_name,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


Renaming particular columns can be done as follows

In [164]:
df.rename(columns={'first_name': 'first', 'last_name' : 'last'})
# passing a dictionary with current and new names as key-value pair

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [165]:
df # changes didn't actually took place

Unnamed: 0,first_name,last_name,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [166]:
# therefore we will use inplace=True
df.rename(columns={'first_name': 'first', 'last_name' : 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


### 2.Updating data in rows 

#### Single Row Operations 

Changing all column's data of a row

In [167]:
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [169]:
df.loc[2] = ['Mummy', 'Sharma', 'mummyrai@email.com']
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Mummy,Sharma,mummyrai@email.com


Changing particular columns

In [170]:
df.loc[2, ['first', 'email']] = ['Sudha', 'sudhasharma@email.com']
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


when we want to change a single value, we can use .loc but we can also use .at may be for performance reasons but unknown to me 

In [172]:
df.at[0, 'first'] = 'Tony'
df

Unnamed: 0,first,last,email
0,Tony,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [173]:
filt = (df['email'] == 'ashishrai@email.com')
df.loc[filt, 'first'] = 'Ashish'
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


#### Multiple Row Operations 

In [176]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


#### 1.Apply 

Usign Apply method on Series

In [174]:
df['email'] = df['email'].apply(lambda x: x.upper())  # Using lambda function
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ASHISHRAI@EMAIL.COM
1,Anjali,Rai,ANJALIRAI@EMAIL.COM
2,Sudha,Sharma,SUDHASHARMA@EMAIL.COM


In [175]:
def email_lower(email):  # Using regular functions
    return email.lower()

df['email'] = df['email'].apply(email_lower)
df

Unnamed: 0,first,last,email
0,Ashish,Rai,ashishrai@email.com
1,Anjali,Rai,anjalirai@email.com
2,Sudha,Sharma,sudhasharma@email.com


In [177]:
df['email'].apply(len)

0    19
1    19
2    21
Name: email, dtype: int64

Usign Apply method on DataFrame

In [178]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [179]:
df.apply(pd.Series.min)

first                 Anjali
last                     Rai
email    anjalirai@email.com
dtype: object

In [181]:
df.apply(lambda x: x.min())

first                 Anjali
last                     Rai
email    anjalirai@email.com
dtype: object

Apply method applies a function on each element of a series if applied to a series, or on each row (or column if axis='columns' is used) of a dataframe. If we eant to apply a method on each element of a dataframe then applymap method is used. **ApplyMap only works on dataframes** 

#### 2.ApplyMap 

In [184]:
df.applymap(len) # note the change between df.apply(len) and df.applymap(len)

Unnamed: 0,first,last,email
0,6,3,19
1,6,3,19
2,5,6,21


In [185]:
# converting every element of dataframe into lowercase
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,ashish,rai,ashishrai@email.com
1,anjali,rai,anjalirai@email.com
2,sudha,sharma,sudhasharma@email.com


In [186]:
# converting every element of dataframe into title
df.applymap(str.title)

Unnamed: 0,first,last,email
0,Ashish,Rai,Ashishrai@Email.Com
1,Anjali,Rai,Anjalirai@Email.Com
2,Sudha,Sharma,Sudhasharma@Email.Com


In [188]:
df['email'].apply(str.lower)

0      ashishrai@email.com
1      anjalirai@email.com
2    sudhasharma@email.com
Name: email, dtype: object

#### 3.Map 

**Map Method only works on a series**. It can be used to substitute each value of a series with another value

In [189]:
df['first'].map({'Ashish': 'Tony', 'Anjali': 'Didi'})

0    Tony
1    Didi
2     NaN
Name: first, dtype: object

The map method converts the mentioned values into th new values but also **changes the value not mentioned into NaN** or Not a Number. If we want to avoid this then we can use .replace() method. Also without substituting the query (like df['first'] = df['first'].map({'Ashish': 'Tony', 'Anjali': 'Didi'})) our original dataframe is not gonna get changed

In [190]:
df['first'].replace({'Ashish': 'Tony', 'Anjali': 'Didi'})

0     Tony
1     Didi
2    Sudha
Name: first, dtype: object