In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
d1 = {'State':['UP','MP','AP','Maharashtra','UP','MP','Tamil Nadu','HP','UP','AP'],
      'Prod_Type':['a','b','d','a','c','b','a','d','c','c'],
     'Month':['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sept','Oct'],
      'Sales':[1350,2450,4530,4352,5420,2094,5609,6050,3020,5000],
     'Profit(%)':[23,14,35,15,18,17.2,9.5,-0.3,4.3,17.5]}

df = pd.DataFrame(d1)
df

Unnamed: 0,State,Prod_Type,Month,Sales,Profit(%)
0,UP,a,Jan,1350,23.0
1,MP,b,Feb,2450,14.0
2,AP,d,Mar,4530,35.0
3,Maharashtra,a,Apr,4352,15.0
4,UP,c,May,5420,18.0
5,MP,b,Jun,2094,17.2
6,Tamil Nadu,a,Jul,5609,9.5
7,HP,d,Aug,6050,-0.3
8,UP,c,Sept,3020,4.3
9,AP,c,Oct,5000,17.5


In [3]:
df.shape

(10, 5)

In [4]:
df.isnull().sum()

State        0
Prod_Type    0
Month        0
Sales        0
Profit(%)    0
dtype: int64

In [5]:
df.dtypes

State         object
Prod_Type     object
Month         object
Sales          int64
Profit(%)    float64
dtype: object

### 1) Find and Replace

In [6]:
df['Month'].value_counts()

May     1
Jun     1
Jan     1
Mar     1
Feb     1
Jul     1
Aug     1
Sept    1
Apr     1
Oct     1
Name: Month, dtype: int64

In [7]:
df['Month'] = df['Month'].replace({'Jan':0,'Feb':1,'Mar':2,'Apr':3,'May':4,
                                  'Jun':5,'Jul':6,'Aug':7,'Sept':8,'Oct':9})

In [8]:
# df['Month'] = df['Month'].apply({'Jan':0,'Feb':1,'Mar':2,'Apr':3,'May':4,
#                                   'Jun':5,'Jul':6,'Aug':7,'Sept':8,'Oct':9}.get())

In [9]:
df['Month'].value_counts()

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: Month, dtype: int64

In [10]:
df.dtypes

State         object
Prod_Type     object
Month          int64
Sales          int64
Profit(%)    float64
dtype: object

### 2) LabelEncoder

In [11]:
from sklearn.preprocessing import LabelEncoder

In [12]:
lb = LabelEncoder()

In [13]:
df['State'].value_counts()

UP             3
AP             2
MP             2
Maharashtra    1
Tamil Nadu     1
HP             1
Name: State, dtype: int64

In [14]:
df['State'] = lb.fit_transform(df['State'])

In [15]:
df['State'].value_counts()

5    3
0    2
2    2
1    1
3    1
4    1
Name: State, dtype: int64

In [16]:
df.dtypes

State          int32
Prod_Type     object
Month          int64
Sales          int64
Profit(%)    float64
dtype: object

### 3) get_dummies()

In [17]:
df.head()

Unnamed: 0,State,Prod_Type,Month,Sales,Profit(%)
0,5,a,0,1350,23.0
1,2,b,1,2450,14.0
2,0,d,2,4530,35.0
3,3,a,3,4352,15.0
4,5,c,4,5420,18.0


In [18]:
###### df['Prod_Type'].value_counts()

In [19]:
df2 = pd.get_dummies(df,columns=['Prod_Type'])
df2

Unnamed: 0,State,Month,Sales,Profit(%),Prod_Type_a,Prod_Type_b,Prod_Type_c,Prod_Type_d
0,5,0,1350,23.0,1,0,0,0
1,2,1,2450,14.0,0,1,0,0
2,0,2,4530,35.0,0,0,0,1
3,3,3,4352,15.0,1,0,0,0
4,5,4,5420,18.0,0,0,1,0
5,2,5,2094,17.2,0,1,0,0
6,4,6,5609,9.5,1,0,0,0
7,1,7,6050,-0.3,0,0,0,1
8,5,8,3020,4.3,0,0,1,0
9,0,9,5000,17.5,0,0,1,0


In [20]:
df.head()

Unnamed: 0,State,Prod_Type,Month,Sales,Profit(%)
0,5,a,0,1350,23.0
1,2,b,1,2450,14.0
2,0,d,2,4530,35.0
3,3,a,3,4352,15.0
4,5,c,4,5420,18.0


In [21]:
d = {'Type':['Mobile','Laptop','Mobile','Games','Foot Wear'],
    "Sales":[1234,3432,4354,2132,4565]}
df1 = pd.DataFrame(d)
df1

Unnamed: 0,Type,Sales
0,Mobile,1234
1,Laptop,3432
2,Mobile,4354
3,Games,2132
4,Foot Wear,4565


In [22]:
df1.dtypes

Type     object
Sales     int64
dtype: object

In [23]:
df1['Type'] = df1['Type'].apply({'Mobile':0,'Laptop':1,'Games':2,'Foot Wear':3}.get)
df1.head()

Unnamed: 0,Type,Sales
0,0,1234
1,1,3432
2,0,4354
3,2,2132
4,3,4565


### OneHotEncoding

In [24]:
from sklearn.preprocessing import OneHotEncoder

In [25]:
ohe = OneHotEncoder()

In [26]:
d = {'Type':['Mobile','Laptop','Mobile','Games','Foot Wear'],
    "Sales":[1234,3432,4354,2132,4565]}
df3 = pd.DataFrame(d)
df3

Unnamed: 0,Type,Sales
0,Mobile,1234
1,Laptop,3432
2,Mobile,4354
3,Games,2132
4,Foot Wear,4565


In [27]:
new_df3 = pd.DataFrame(ohe.fit_transform(df3[['Type']]).toarray())
new_df3

Unnamed: 0,0,1,2,3
0,0.0,0.0,0.0,1.0
1,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,1.0
3,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0
