# Iris Flower Dataset

## Load in numpy pandas matplotlib

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

## Read csv data

In [2]:
irisDf = pd.read_csv("./data/iris.csv")

In [3]:
irisDf.head()

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Get data from dataframe

In [4]:
sepalLength = irisDf['sepal length in cm']
sepalLength.head()

0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: sepal length in cm, dtype: float64

In [5]:
sepalWidth = irisDf['sepal width in cm']
sepalWidth.head()

0    3.5
1    3.0
2    3.2
3    3.1
4    3.6
Name: sepal width in cm, dtype: float64

In [6]:
petalLength = irisDf['petal length in cm']
petalLength.head()

0    1.4
1    1.4
2    1.3
3    1.5
4    1.4
Name: petal length in cm, dtype: float64

In [7]:
petalWidth = irisDf['petal width in cm']
petalWidth.head()

0    0.2
1    0.2
2    0.2
3    0.2
4    0.2
Name: petal width in cm, dtype: float64

##  Get max and min data

In [8]:
maxSepalLength =sepalLength.max()
maxSepalLength

7.9

In [9]:
minSepalLength =sepalLength.min()
minSepalLength

4.3

In [10]:
maxSepalWidth = sepalWidth.max()
maxSepalWidth

4.4

In [11]:
minSepalWidth = sepalWidth.min()
minSepalWidth

2.0

In [12]:
maxPetalLength = petalLength.max()
maxPetalLength

6.9

In [13]:
minPetalLength = petalLength.min()
minPetalLength

1.0

In [14]:
maxPetalWidth = petalWidth.max()
maxPetalWidth

2.5

In [15]:
minPetalWidth = petalWidth.min()
minPetalWidth

0.1

In [16]:
minmaxData = np.array([
    maxSepalLength, minSepalLength, maxSepalWidth, minSepalWidth, 
    maxPetalLength, minPetalLength, maxPetalWidth, minPetalWidth
])
print(minmaxData)

[7.9 4.3 4.4 2.  6.9 1.  2.5 0.1]


In [17]:
minmaxData

array([7.9, 4.3, 4.4, 2. , 6.9, 1. , 2.5, 0.1])

# Normalization

## Minmax Normalization

In [18]:
def minmax(data) :
    result = data.copy()
    minA = data.min()
    maxA = data.max()
    minB = 0
    maxB = 2
    
    for i in range(len(data)):
        result[i] = ((data[i] - minA)/(maxA-minA))*(maxB-minB)+minB
    return result

In [19]:
dataSepalLengthMinMax = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Minmax)': minmax(sepalLength)})
dataSepalLengthMinMax

Unnamed: 0,Sepal Length,Normalisasi (Minmax)
0,5.1,0.444444
1,4.9,0.333333
2,4.7,0.222222
3,4.6,0.166667
4,5.0,0.388889
...,...,...
145,6.7,1.333333
146,6.3,1.111111
147,6.5,1.222222
148,6.2,1.055556


## Binary Normalization 

In [20]:
medianSepalLength = sepalLength.median()
medianSepalLength

5.8

In [21]:
sepalLengthData = sepalLength

In [22]:
def binary(data):
    result = data.copy()
    for i in range(len(data)):
        if data[i] > np.median(data):
            result[i] = 1
        else:
            result[i] = 0
    return result

In [23]:
dataSepalLengthBinerisasi = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Binerisasi)': binary(sepalLength)})
dataSepalLengthBinerisasi

Unnamed: 0,Sepal Length,Normalisasi (Binerisasi)
0,5.1,0.0
1,4.9,0.0
2,4.7,0.0
3,4.6,0.0
4,5.0,0.0
...,...,...
145,6.7,1.0
146,6.3,1.0
147,6.5,1.0
148,6.2,1.0


## Decimal Point Normalization 

In [24]:
def decimal(data):
    result = data.copy()
    d = 1
    for i in range(len(data)):
        result[i] = data[i]/10**d
    return  result

In [25]:
dataSepalLengthBinerisasi = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Decimal)': decimal(sepalLength)})
dataSepalLengthBinerisasi

Unnamed: 0,Sepal Length,Normalisasi (Decimal)
0,5.1,0.51
1,4.9,0.49
2,4.7,0.47
3,4.6,0.46
4,5.0,0.50
...,...,...
145,6.7,0.67
146,6.3,0.63
147,6.5,0.65
148,6.2,0.62


##  Z Score Normalization

In [26]:
import statistics    

In [27]:
def zscore(data):
    result = data.copy()
    rerata = data.mean()
    deviasi = statistics.stdev(data)
    
    for i in range(len(data)):
        result[i] = (data[i]-rerata)/deviasi
    return result

In [28]:
dataSepalLengthZScore = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Decimal)': zscore(sepalLength)})
dataSepalLengthZScore

Unnamed: 0,Sepal Length,Normalisasi (Decimal)
0,5.1,-0.897674
1,4.9,-1.139200
2,4.7,-1.380727
3,4.6,-1.501490
4,5.0,-1.018437
...,...,...
145,6.7,1.034539
146,6.3,0.551486
147,6.5,0.793012
148,6.2,0.430722
