# **Iris Dataset**

# **Load Dataset and Inspect**

In [21]:
import pandas as pd

df = pd.read_csv('Iris.csv')

In [22]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [2]:
df.shape

(150, 6)

# **Handling Missing and Duplicated Values**

In [4]:
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [20]:
df.duplicated().sum()
df = df.drop_duplicates()

# **Creating a new column in DataFrame**

In [25]:
df['PetalArea'] = df['PetalLengthCm'] * df['PetalWidthCm']
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,PetalArea
0,1,5.1,3.5,1.4,0.2,Iris-setosa,0.28
1,2,4.9,3.0,1.4,0.2,Iris-setosa,0.28
2,3,4.7,3.2,1.3,0.2,Iris-setosa,0.26
3,4,4.6,3.1,1.5,0.2,Iris-setosa,0.30
4,5,5.0,3.6,1.4,0.2,Iris-setosa,0.28
...,...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica,11.96
146,147,6.3,2.5,5.0,1.9,Iris-virginica,9.50
147,148,6.5,3.0,5.2,2.0,Iris-virginica,10.40
148,149,6.2,3.4,5.4,2.3,Iris-virginica,12.42


## **Converting Categorical Column to Numerical Column**

In [9]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [12]:
mapping = {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}
df['Species'] = df['Species'].map(mapping)
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species,PetalArea
0,1,5.1,3.5,1.4,0.2,0,0.28
1,2,4.9,3.0,1.4,0.2,0,0.28
2,3,4.7,3.2,1.3,0.2,0,0.26
3,4,4.6,3.1,1.5,0.2,0,0.30
4,5,5.0,3.6,1.4,0.2,0,0.28
...,...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,2,11.96
146,147,6.3,2.5,5.0,1.9,2,9.50
147,148,6.5,3.0,5.2,2.0,2,10.40
148,149,6.2,3.4,5.4,2.3,2,12.42


# **Performing Aggregation**

In [14]:
df.groupby('Species')[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'PetalArea']].mean()

Unnamed: 0_level_0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,PetalArea
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,5.006,3.418,1.464,0.244,0.3628
1,5.936,2.77,4.26,1.326,5.7204
2,6.588,2.974,5.552,2.026,11.2962


# **Reshaping DataFrame**

In [26]:
pd.melt(df, id_vars=['Id'], value_vars=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'PetalArea'], var_name='Measurement', value_name='Value')

Unnamed: 0,Id,Measurement,Value
0,1,SepalLengthCm,5.10
1,2,SepalLengthCm,4.90
2,3,SepalLengthCm,4.70
3,4,SepalLengthCm,4.60
4,5,SepalLengthCm,5.00
...,...,...,...
745,146,PetalArea,11.96
746,147,PetalArea,9.50
747,148,PetalArea,10.40
748,149,PetalArea,12.42
