In [None]:
# Topics to cover for today -
# (1) Label Encoder
# (2) One Hot Encoder (Using pd.get_dummies)
# (3) Concatenate
# (4) Append

In [6]:
# Importing important libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [7]:
# Importing the dataset
df = pd.read_csv('Mall_Customers_BP.csv')
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [8]:
# Checking for null values
df.isnull().sum()

CustomerID                0
Genre                     0
Age                       0
Annual Income (k$)        0
Spending Score (1-100)    0
dtype: int64

In [9]:
# Applying label encoding technique on 'genre' column
encoder = LabelEncoder()
df['Genre'] = encoder.fit_transform(df['Genre'])
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,1,19,15,39
1,2,1,21,15,81
2,3,0,20,16,6
3,4,0,23,16,77
4,5,0,31,17,40


In [11]:
# Importing the dataset
df = pd.read_csv('Mall_Customers_BP.csv')
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [17]:
# One Hot Encoder with get dummies
dummy_gender = pd.get_dummies(df['Genre'])
print(dummy_gender)

     Female  Male
0         0     1
1         0     1
2         1     0
3         1     0
4         1     0
..      ...   ...
195       1     0
196       1     0
197       0     1
198       0     1
199       0     1

[200 rows x 2 columns]


In [18]:
# Merging the two columns
df = pd.merge(
    left=df,
    right=dummy_gender,
    left_index=True,
    right_index=True,
)
print(df)

     CustomerID   Genre  Age  Annual Income (k$)  Spending Score (1-100)  \
0             1    Male   19                  15                      39   
1             2    Male   21                  15                      81   
2             3  Female   20                  16                       6   
3             4  Female   23                  16                      77   
4             5  Female   31                  17                      40   
..          ...     ...  ...                 ...                     ...   
195         196  Female   35                 120                      79   
196         197  Female   45                 126                      28   
197         198    Male   32                 126                      74   
198         199    Male   32                 137                      18   
199         200    Male   30                 137                      83   

     Female  Male  
0         0     1  
1         0     1  
2         1     0  
3      

In [19]:
# Creating dataset
data1 = {'Name' : ['Pranav', 'Sai', 'Niraj', 'Vijay'],
        'Age' : [27,28,29,30],
        'Address' : ['Nagpur', 'Kanpur', 'Satara', 'Pune'],
        'Qualification' : ['ME', 'MBA', 'MSC', 'MCA']}

data2 = {'Name' : ['Rahul', 'Ayushi', 'Sharvari', 'Dhiraj'],
        'Age' : [22,23,24,25],
        'Address' : ['Mumbai', 'Delhi', 'Wardha', 'Vizag'],
        'Qualification' : ['BE', 'BA', 'BCOM', 'BCA']}

# Converting dictionaries to dataframes
df = pd.DataFrame(data1, index = [0,1,2,3])
df1 = pd.DataFrame(data2, index = [4,5,6,7])

# Printing the dataset
print(df, '\n\n', df1)

     Name  Age Address Qualification
0  Pranav   27  Nagpur            ME
1     Sai   28  Kanpur           MBA
2   Niraj   29  Satara           MSC
3   Vijay   30    Pune           MCA 

        Name  Age Address Qualification
4     Rahul   22  Mumbai            BE
5    Ayushi   23   Delhi            BA
6  Sharvari   24  Wardha          BCOM
7    Dhiraj   25   Vizag           BCA


In [20]:
# Concatenating the two given dataframes
frames = [df, df1]
res1 = pd.concat(frames)
res1

Unnamed: 0,Name,Age,Address,Qualification
0,Pranav,27,Nagpur,ME
1,Sai,28,Kanpur,MBA
2,Niraj,29,Satara,MSC
3,Vijay,30,Pune,MCA
4,Rahul,22,Mumbai,BE
5,Ayushi,23,Delhi,BA
6,Sharvari,24,Wardha,BCOM
7,Dhiraj,25,Vizag,BCA


In [None]:
# Exploring axis and join argument in pd.concat

In [21]:
# Creating dataset
data1 = {'Name' : ['Pranav', 'Sai', 'Niraj', 'Vijay'],
        'Age' : [27,28,29,30],
        'Address' : ['Nagpur', 'Kanpur', 'Satara', 'Pune'],
        'Qualification' : ['ME', 'MBA', 'MSC', 'MCA']}

data2 = {'Name' : ['Rahul', 'Ayushi', 'Sharvari', 'Dhiraj'],
        'Age' : [22,23,24,25],
        'Address' : ['Mumbai', 'Delhi', 'Wardha', 'Vizag'],
        'Qualification' : ['BE', 'BA', 'BCOM', 'BCA']}

# Converting dictionaries to dataframes
df = pd.DataFrame(data1, index = [0,1,2,3])
df1 = pd.DataFrame(data2, index = [2,3,6,7])

# Printing the dataset
print(df, '\n\n', df1)

     Name  Age Address Qualification
0  Pranav   27  Nagpur            ME
1     Sai   28  Kanpur           MBA
2   Niraj   29  Satara           MSC
3   Vijay   30    Pune           MCA 

        Name  Age Address Qualification
2     Rahul   22  Mumbai            BE
3    Ayushi   23   Delhi            BA
6  Sharvari   24  Wardha          BCOM
7    Dhiraj   25   Vizag           BCA


In [22]:
# Concatenating
res2 = pd.concat([df, df1], axis = 1, join = 'inner')
res2

Unnamed: 0,Name,Age,Address,Qualification,Name.1,Age.1,Address.1,Qualification.1
2,Niraj,29,Satara,MSC,Rahul,22,Mumbai,BE
3,Vijay,30,Pune,MCA,Ayushi,23,Delhi,BA


In [23]:
# Sort argument in concatenating
res2 = pd.concat([df, df1], axis = 1, sort = False)
res2

Unnamed: 0,Name,Age,Address,Qualification,Name.1,Age.1,Address.1,Qualification.1
0,Pranav,27.0,Nagpur,ME,,,,
1,Sai,28.0,Kanpur,MBA,,,,
2,Niraj,29.0,Satara,MSC,Rahul,22.0,Mumbai,BE
3,Vijay,30.0,Pune,MCA,Ayushi,23.0,Delhi,BA
6,,,,,Sharvari,24.0,Wardha,BCOM
7,,,,,Dhiraj,25.0,Vizag,BCA


In [None]:
# append() function

In [25]:
# Creating dataset
data1 = {'Name' : ['Pranav', 'Sai', 'Niraj', 'Vijay'],
        'Age' : [27,28,29,30],
        'Address' : ['Nagpur', 'Kanpur', 'Satara', 'Pune'],
        'Qualification' : ['ME', 'MBA', 'MSC', 'MCA']}

data2 = {'Name' : ['Rahul', 'Ayushi', 'Sharvari', 'Dhiraj'],
        'Age' : [22,23,24,25],
        'Address' : ['Mumbai', 'Delhi', 'Wardha', 'Vizag'],
        'Qualification' : ['BE', 'BA', 'BCOM', 'BCA']}

# Converting dictionaries to dataframes
df = pd.DataFrame(data1, index = [0,1,2,3])
df1 = pd.DataFrame(data2, index = [4,5,6,7])

# Printing the dataset
print(df, '\n\n', df1)

     Name  Age Address Qualification
0  Pranav   27  Nagpur            ME
1     Sai   28  Kanpur           MBA
2   Niraj   29  Satara           MSC
3   Vijay   30    Pune           MCA 

        Name  Age Address Qualification
4     Rahul   22  Mumbai            BE
5    Ayushi   23   Delhi            BA
6  Sharvari   24  Wardha          BCOM
7    Dhiraj   25   Vizag           BCA


In [26]:
# Using append function
res = df.append(df1)
res

Unnamed: 0,Name,Age,Address,Qualification
0,Pranav,27,Nagpur,ME
1,Sai,28,Kanpur,MBA
2,Niraj,29,Satara,MSC
3,Vijay,30,Pune,MCA
4,Rahul,22,Mumbai,BE
5,Ayushi,23,Delhi,BA
6,Sharvari,24,Wardha,BCOM
7,Dhiraj,25,Vizag,BCA


In [27]:
# Creating dataset
data1 = {'Name' : ['Pranav', 'Sai', 'Niraj', 'Vijay'],
        'Age' : [27,28,29,30],
        'Address' : ['Nagpur', 'Kanpur', 'Satara', 'Pune'],
        'Qualification' : ['ME', 'MBA', 'MSC', 'MCA']}

data2 = {'Name' : ['Rahul', 'Ayushi', 'Sharvari', 'Dhiraj'],
        'Age' : [22,23,24,25],
        'Address' : ['Mumbai', 'Delhi', 'Wardha', 'Vizag'],
        'Qualification' : ['BE', 'BA', 'BCOM', 'BCA']}

# Converting dictionaries to dataframes
df = pd.DataFrame(data1, index = [0,1,2,3])
df1 = pd.DataFrame(data2, index = [2,3,6,7])

# Printing the dataset
print(df, '\n\n', df1)

     Name  Age Address Qualification
0  Pranav   27  Nagpur            ME
1     Sai   28  Kanpur           MBA
2   Niraj   29  Satara           MSC
3   Vijay   30    Pune           MCA 

        Name  Age Address Qualification
2     Rahul   22  Mumbai            BE
3    Ayushi   23   Delhi            BA
6  Sharvari   24  Wardha          BCOM
7    Dhiraj   25   Vizag           BCA


In [28]:
# ignore_index argument 
res = pd.concat([df, df1], ignore_index = True)
res

Unnamed: 0,Name,Age,Address,Qualification
0,Pranav,27,Nagpur,ME
1,Sai,28,Kanpur,MBA
2,Niraj,29,Satara,MSC
3,Vijay,30,Pune,MCA
4,Rahul,22,Mumbai,BE
5,Ayushi,23,Delhi,BA
6,Sharvari,24,Wardha,BCOM
7,Dhiraj,25,Vizag,BCA


In [29]:
# Creating dataset
data1 = {'Name' : ['Pranav', 'Sai', 'Niraj', 'Vijay'],
        'Age' : [27,28,29,30],
        'Address' : ['Nagpur', 'Kanpur', 'Satara', 'Pune'],
        'Qualification' : ['ME', 'MBA', 'MSC', 'MCA']}

data2 = {'Name' : ['Rahul', 'Ayushi', 'Sharvari', 'Dhiraj'],
        'Age' : [22,23,24,25],
        'Address' : ['Mumbai', 'Delhi', 'Wardha', 'Vizag'],
        'Qualification' : ['BE', 'BA', 'BCOM', 'BCA']}

# Converting dictionaries to dataframes
df = pd.DataFrame(data1, index = [0,1,2,3])
df1 = pd.DataFrame(data2, index = [4,5,6,7])

# Printing the dataset
print(df, '\n\n', df1)

     Name  Age Address Qualification
0  Pranav   27  Nagpur            ME
1     Sai   28  Kanpur           MBA
2   Niraj   29  Satara           MSC
3   Vijay   30    Pune           MCA 

        Name  Age Address Qualification
4     Rahul   22  Mumbai            BE
5    Ayushi   23   Delhi            BA
6  Sharvari   24  Wardha          BCOM
7    Dhiraj   25   Vizag           BCA


In [30]:
# concatenating using keys
frames = [df, df1]
res = pd.concat(frames, keys = ['x', 'y'])
res

Unnamed: 0,Unnamed: 1,Name,Age,Address,Qualification
x,0,Pranav,27,Nagpur,ME
x,1,Sai,28,Kanpur,MBA
x,2,Niraj,29,Satara,MSC
x,3,Vijay,30,Pune,MCA
y,4,Rahul,22,Mumbai,BE
y,5,Ayushi,23,Delhi,BA
y,6,Sharvari,24,Wardha,BCOM
y,7,Dhiraj,25,Vizag,BCA


In [31]:
# Creating dataset
data1 = {'Name' : ['Pranav', 'Sai', 'Niraj', 'Vijay'],
        'Age' : [27,28,29,30],
        'Address' : ['Nagpur', 'Kanpur', 'Satara', 'Pune'],
        'Qualification' : ['ME', 'MBA', 'MSC', 'MCA']}

# Converting dictionaries to dataframes
df = pd.DataFrame(data1, index = [0,1,2,3])

# Creating a series
s1 = pd.Series([1000, 2000, 3000, 4000], name = 'Salary')
print(df, '\n\n', s1)

     Name  Age Address Qualification
0  Pranav   27  Nagpur            ME
1     Sai   28  Kanpur           MBA
2   Niraj   29  Satara           MSC
3   Vijay   30    Pune           MCA 

 0    1000
1    2000
2    3000
3    4000
Name: Salary, dtype: int64


In [32]:
# Concatenating a dataframe and a series
res = pd.concat([df, s1], axis = 1)
res

Unnamed: 0,Name,Age,Address,Qualification,Salary
0,Pranav,27,Nagpur,ME,1000
1,Sai,28,Kanpur,MBA,2000
2,Niraj,29,Satara,MSC,3000
3,Vijay,30,Pune,MCA,4000


In [33]:
# Getting the data types
res.dtypes

Name             object
Age               int64
Address          object
Qualification    object
Salary            int64
dtype: object

In [36]:
# Converting int64 to int8
res['Age'] = res['Age'].astype(np.int8)
res.dtypes

Name             object
Age                int8
Address          object
Qualification    object
Salary            int64
dtype: object

In [39]:
# Converting int64 to float
res['Salary'] = res['Salary'].astype(float)
res.dtypes

Name              object
Age                 int8
Address           object
Qualification     object
Salary           float64
dtype: object

In [40]:
# Concatenating a dataframe and a series
res = pd.concat([df, s1], axis = 1)
res

Unnamed: 0,Name,Age,Address,Qualification,Salary
0,Pranav,27,Nagpur,ME,1000
1,Sai,28,Kanpur,MBA,2000
2,Niraj,29,Satara,MSC,3000
3,Vijay,30,Pune,MCA,4000


In [41]:
# Getting the data types
res.dtypes

Name             object
Age               int64
Address          object
Qualification    object
Salary            int64
dtype: object

In [42]:
# Converting multiple columns
res[['Age', 'Salary']] = res[['Age', 'Salary']].astype(float)
res.dtypes

Name              object
Age              float64
Address           object
Qualification     object
Salary           float64
dtype: object

In [43]:
# Concatenating a dataframe and a series
res = pd.concat([df, s1], axis = 1)
res

Unnamed: 0,Name,Age,Address,Qualification,Salary
0,Pranav,27,Nagpur,ME,1000
1,Sai,28,Kanpur,MBA,2000
2,Niraj,29,Satara,MSC,3000
3,Vijay,30,Pune,MCA,4000


In [44]:
# Getting the data types
res.dtypes

Name             object
Age               int64
Address          object
Qualification    object
Salary            int64
dtype: object

In [45]:
# Using downcast argument
res['Age'] = pd.to_numeric(res['Age'], downcast = 'float')
res.dtypes

Name              object
Age              float32
Address           object
Qualification     object
Salary             int64
dtype: object

In [46]:
# Concatenating a dataframe and a series
res = pd.concat([df, s1], axis = 1)
res

Unnamed: 0,Name,Age,Address,Qualification,Salary
0,Pranav,27,Nagpur,ME,1000
1,Sai,28,Kanpur,MBA,2000
2,Niraj,29,Satara,MSC,3000
3,Vijay,30,Pune,MCA,4000


In [47]:
# Getting the data types
res.dtypes

Name             object
Age               int64
Address          object
Qualification    object
Salary            int64
dtype: object