## Import the libraries

In [2]:
import pandas as pd

df = pd.DataFrame({
    "City": ["Mumbai", "Delhi", "Mumbai", "Chennai", "Delhi"],
    "Education": ["Bachelor", "Master", "PhD", "Bachelor", "Master"],
    "Price": [10, 7, 12, 6, 8]
})

df


Unnamed: 0,City,Education,Price
0,Mumbai,Bachelor,10
1,Delhi,Master,7
2,Mumbai,PhD,12
3,Chennai,Bachelor,6
4,Delhi,Master,8


## Task 1 : Create a manual mapping

In [3]:
education_map = {
"Bachelor":1,
    "Master" : 2,
    "PhD" :3
}

In [4]:
df["Education"] = df["Education"].map(education_map)

In [5]:
df

Unnamed: 0,City,Education,Price
0,Mumbai,1,10
1,Delhi,2,7
2,Mumbai,3,12
3,Chennai,1,6
4,Delhi,2,8


## Task 2 : Encode City (NOMINAL)

In [6]:
df_encoded = pd.get_dummies(df, columns=["City"])
df_encoded

Unnamed: 0,Education,Price,City_Chennai,City_Delhi,City_Mumbai
0,1,10,0,0,1
1,2,7,0,1,0
2,3,12,0,0,1
3,1,6,1,0,0
4,2,8,0,1,0


## Task 3 : Create ML-ready matrices

In [12]:
X = df_encoded.drop("Price", axis=1).values
y = df_encoded['Price'].values
X, y

(array([[1, 0, 0, 1],
        [2, 0, 1, 0],
        [3, 0, 0, 1],
        [1, 1, 0, 0],
        [2, 0, 1, 0]], dtype=int64),
 array([10,  7, 12,  6,  8], dtype=int64))

In [13]:
X.shape, y.shape

((5, 4), (5,))

In [14]:
X = df_encoded.drop("Price", axis=1)
y = df_encoded['Price']

In [15]:
X, y, X.shape, y.shape

(   Education  City_Chennai  City_Delhi  City_Mumbai
 0          1             0           0            1
 1          2             0           1            0
 2          3             0           0            1
 3          1             1           0            0
 4          2             0           1            0,
 0    10
 1     7
 2    12
 3     6
 4     8
 Name: Price, dtype: int64,
 (5, 4),
 (5,))

In [16]:
df_encoded.head()

Unnamed: 0,Education,Price,City_Chennai,City_Delhi,City_Mumbai
0,1,10,0,0,1
1,2,7,0,1,0
2,3,12,0,0,1
3,1,6,1,0,0
4,2,8,0,1,0


## Task 4 : Mini Coding task

In [40]:
df = pd.DataFrame({
    "JobLevel": ["Junior", "Senior", "Mid", "Senior", "Junior"],
    "Salary": [5, 12, 8, 11, 6]
})
df

Unnamed: 0,JobLevel,Salary
0,Junior,5
1,Senior,12
2,Mid,8
3,Senior,11
4,Junior,6


In [41]:
job_level_map = {
    "Junior" : 1,
    "Mid" : 2,
    "Senior" : 3
}

In [42]:
df["JobLevel"] = df["JobLevel"].map(job_level_map)

In [43]:
df

Unnamed: 0,JobLevel,Salary
0,1,5
1,3,12
2,2,8
3,3,11
4,1,6


In [44]:
X = df[["JobLevel"]].values
y = df['Salary'].values

In [45]:
df = pd.DataFrame({
    "City": ["Mumbai", "Delhi", "Mumbai", "Pune"],
    "Purchased": [1, 0, 1, 0]
})
df

Unnamed: 0,City,Purchased
0,Mumbai,1
1,Delhi,0
2,Mumbai,1
3,Pune,0


In [33]:
df_encoded = pd.get_dummies(df, columns=["City"])
df_encoded

Unnamed: 0,Purchased,City_Delhi,City_Mumbai,City_Pune
0,1,0,1,0
1,0,1,0,0
2,1,0,1,0
3,0,0,0,1


In [38]:
X = df_encoded.drop("Purchased", axis = 1).values
y = df_encoded["Purchased"].values

In [36]:
X, y, X.shape, y.shape

(array([[0, 1, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]], dtype=uint8),
 0    1
 1    0
 2    1
 3    0
 Name: Purchased, dtype: int64,
 (4, 3),
 (4,))