In [1]:
# import libraries
import pandas as pd

In [2]:
# load the dataset
data = pd.read_csv('example_dataset.csv')

## Identifying missing values

In [3]:
# Check for missing values
missing_values = data.isnull().sum()
print(missing_values)

Name      0
Age       2
Gender    1
Salary    1
dtype: int64


## Dropping missing values

In [4]:
# Drop rows with missing values
cleaned_data = data.dropna()
print(cleaned_data)

   Name   Age  Gender   Salary
0  John  28.0    Male  60000.0
3   Eve  24.0  Female  70000.0


## Imputing missing values

### Impute numerical values with mean

In [5]:
# Impute the missing values with mean
data["Age"].fillna(data["Age"].mean(), inplace=True)
data["Salary"].fillna(data["Salary"].mean(), inplace=True)
data

Unnamed: 0,Name,Age,Gender,Salary
0,John,28.0,Male,60000.0
1,Alice,28.0,Female,55000.0
2,Bob,32.0,Male,65000.0
3,Eve,24.0,Female,70000.0
4,Mike,28.0,,75000.0


### Impute categorical values with mode

In [6]:
# Impute missing categorical values with mode
data["Gender"].fillna(data["Gender"].mode()[0], inplace=True)
print(data)

    Name   Age  Gender   Salary
0   John  28.0    Male  60000.0
1  Alice  28.0  Female  55000.0
2    Bob  32.0    Male  65000.0
3    Eve  24.0  Female  70000.0
4   Mike  28.0  Female  75000.0


## Data Normalisation

### Min-Max Scaling

In [9]:
# import library for MinMaxScaler
from sklearn.preprocessing import MinMaxScaler

In [10]:
# Initialize the MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [11]:
# Fit and transform the data
normalized_data = scaler.fit_transform(data[["Age", "Salary"]])
print(normalized_data)

[[0.5  0.25]
 [0.5  0.  ]
 [1.   0.5 ]
 [0.   0.75]
 [0.5  1.  ]]


### Z-score Normalisation

In [12]:
# Import StandardScaler
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
normalized_data = scaler.fit_transform(data[["Age", "Salary"]])
print(normalized_data)

[[ 0.         -0.70710678]
 [ 0.         -1.41421356]
 [ 1.58113883  0.        ]
 [-1.58113883  0.70710678]
 [ 0.          1.41421356]]
