# <h4> Tasks :
            - Check for missing values and handle them by either filling with the mean or dropping rows
            - Normalize the numerical columns so that each feature falls within the range [0, 1].
            - Add a new column called sepal_to_petal_ratio that calculates the ratio of sepal length to petal length.

In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [32]:
df = pd.read_csv('./dataset/iris.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)
df.head(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    float64
dtypes: float64(5)
memory usage: 6.0 KB


In [34]:
df.isnull().sum()


sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64

In [35]:
X = df.drop('target', axis=1)
y = df.target

In [36]:
# Normalize the numerical columns
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

In [39]:
X.head(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [40]:
X_normalized = pd.DataFrame(X_normalized, columns = X.columns)
X_normalized.head(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.222222,0.625,0.067797,0.041667
1,0.166667,0.416667,0.067797,0.041667
2,0.111111,0.5,0.050847,0.041667
3,0.083333,0.458333,0.084746,0.041667
4,0.194444,0.666667,0.067797,0.041667


In [55]:
dff = pd.merge(X_normalized, y,left_index=True, right_index=True)

In [58]:
dff['sepal_to_petal_ratio'] = dff['sepal length (cm)'] / dff['petal length (cm)']
dff

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target,sepal_to_petal_ratio
0,0.222222,0.625000,0.067797,0.041667,0.0,3.277778
1,0.166667,0.416667,0.067797,0.041667,0.0,2.458333
2,0.111111,0.500000,0.050847,0.041667,0.0,2.185185
3,0.083333,0.458333,0.084746,0.041667,0.0,0.983333
4,0.194444,0.666667,0.067797,0.041667,0.0,2.868056
...,...,...,...,...,...,...
145,0.666667,0.416667,0.711864,0.916667,2.0,0.936508
146,0.555556,0.208333,0.677966,0.750000,2.0,0.819444
147,0.611111,0.416667,0.711864,0.791667,2.0,0.858466
148,0.527778,0.583333,0.745763,0.916667,2.0,0.707702


In [59]:
dff.to_csv('./dataset/preprocessed_iris.csv')