# Iris Flower Dataset

## Load in numpy pandas matplotlib

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

## Read csv data

In [2]:
irisDf = pd.read_csv("./data/iris.csv")

In [3]:
irisDf.head()

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Get data from dataframe

In [4]:
sepalLength = irisDf['sepal length in cm']
sepalLength.head()

0    5.1
1    4.9
2    4.7
3    4.6
4    5.0
Name: sepal length in cm, dtype: float64

In [5]:
sepalWidth = irisDf['sepal width in cm']
sepalWidth.head()

0    3.5
1    3.0
2    3.2
3    3.1
4    3.6
Name: sepal width in cm, dtype: float64

In [6]:
petalLength = irisDf['petal length in cm']
petalLength.head()

0    1.4
1    1.4
2    1.3
3    1.5
4    1.4
Name: petal length in cm, dtype: float64

In [7]:
petalWidth = irisDf['petal width in cm']
petalWidth.head()

0    0.2
1    0.2
2    0.2
3    0.2
4    0.2
Name: petal width in cm, dtype: float64

##  Get max and min data

In [8]:
maxSepalLength =sepalLength.max()
maxSepalLength

7.9

In [9]:
minSepalLength =sepalLength.min()
minSepalLength

4.3

In [10]:
maxSepalWidth = sepalWidth.max()
maxSepalWidth

4.4

In [11]:
minSepalWidth = sepalWidth.min()
minSepalWidth

2.0

In [12]:
maxPetalLength = petalLength.max()
maxPetalLength

6.9

In [13]:
minPetalLength = petalLength.min()
minPetalLength

1.0

In [14]:
maxPetalWidth = petalWidth.max()
maxPetalWidth

2.5

In [15]:
minPetalWidth = petalWidth.min()
minPetalWidth

0.1

In [16]:
minmaxData = np.array([
    maxSepalLength, minSepalLength, maxSepalWidth, minSepalWidth, 
    maxPetalLength, minPetalLength, maxPetalWidth, minPetalWidth
])
print(minmaxData)

[7.9 4.3 4.4 2.  6.9 1.  2.5 0.1]


In [17]:
minmaxData

array([7.9, 4.3, 4.4, 2. , 6.9, 1. , 2.5, 0.1])

# Normalization

## Minmax Normalization

In [18]:
def minmax(data) :
    result = data.copy()
    minA = data.min()
    maxA = data.max()
    minB = 0
    maxB = 2
    
    for i in range(len(data)):
        result[i] = ((data[i] - minA)/(maxA-minA))*(maxB-minB)+minB
    return result

In [19]:
dataSepalLengthMinMax = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Minmax)': minmax(sepalLength)})
dataSepalLengthMinMax

Unnamed: 0,Sepal Length,Normalisasi (Minmax)
0,5.1,0.444444
1,4.9,0.333333
2,4.7,0.222222
3,4.6,0.166667
4,5.0,0.388889
...,...,...
145,6.7,1.333333
146,6.3,1.111111
147,6.5,1.222222
148,6.2,1.055556


## Binary Normalization 

In [20]:
medianSepalLength = sepalLength.median()
medianSepalLength

5.8

In [21]:
sepalLengthData = sepalLength

In [22]:
def binary(data):
    result = data.copy()
    for i in range(len(data)):
        if data[i] > np.median(data):
            result[i] = 1
        else:
            result[i] = 0
    return result

In [23]:
dataSepalLengthBinerisasi = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Binerisasi)': binary(sepalLength)})
dataSepalLengthBinerisasi

Unnamed: 0,Sepal Length,Normalisasi (Binerisasi)
0,5.1,0.0
1,4.9,0.0
2,4.7,0.0
3,4.6,0.0
4,5.0,0.0
...,...,...
145,6.7,1.0
146,6.3,1.0
147,6.5,1.0
148,6.2,1.0


## Decimal Point Normalization 

In [24]:
def decimal(data):
    result = data.copy()
    d = 1
    for i in range(len(data)):
        result[i] = data[i]/10**d
    return  result

In [25]:
dataSepalLengthDecimali = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Decimal)': decimal(sepalLength)})
dataSepalLengthDecimali

Unnamed: 0,Sepal Length,Normalisasi (Decimal)
0,5.1,0.51
1,4.9,0.49
2,4.7,0.47
3,4.6,0.46
4,5.0,0.50
...,...,...
145,6.7,0.67
146,6.3,0.63
147,6.5,0.65
148,6.2,0.62


##  Z Score Normalization

In [26]:
import statistics    

In [27]:
def zscore(data):
    result = data.copy()
    rerata = data.mean()
    deviasi = statistics.stdev(data)
    
    for i in range(len(data)):
        result[i] = (data[i]-rerata)/deviasi
    return result

In [28]:
dataSepalLengthZScore = pd.DataFrame({'Sepal Length': sepalLength, 'Normalisasi (Decimal)': zscore(sepalLength)})
dataSepalLengthZScore

Unnamed: 0,Sepal Length,Normalisasi (Decimal)
0,5.1,-0.897674
1,4.9,-1.139200
2,4.7,-1.380727
3,4.6,-1.501490
4,5.0,-1.018437
...,...,...
145,6.7,1.034539
146,6.3,0.551486
147,6.5,0.793012
148,6.2,0.430722


# Forrest Fires Dataset

In [29]:
forrestDf = pd.read_csv("./data/forestfires.csv")
forrestDf.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


## Get data from dataframe

In [30]:
windSpeed = forrestDf['wind']
windSpeed.head()

0    6.7
1    0.9
2    1.3
3    4.0
4    1.8
Name: wind, dtype: float64

In [31]:
rain = forrestDf['rain']
rain.head()

0    0.0
1    0.0
2    0.0
3    0.2
4    0.0
Name: rain, dtype: float64

## Get max and min data

In [32]:
windSpeedMax = windSpeed.max()
windSpeedMax

9.4

In [33]:
windSpeedMin = windSpeed.min()
windSpeedMin

0.4

In [34]:
rainMax = rain.max()
rainMax

6.4

In [35]:
rainMin = rain.min()
rainMin

0.0

# Normalization

## Minmax Normalization

In [49]:
dataWindSpeedMinMax = pd.DataFrame({'Wind Speed': windSpeed, 'Normalisasi (Minmax)': minmax(windSpeed)})
dataWindSpeedMinMax

Unnamed: 0,Wind Speed,Normalisasi (Minmax)
0,6.7,1.400000
1,0.9,0.111111
2,1.3,0.200000
3,4.0,0.800000
4,1.8,0.311111
...,...,...
512,2.7,0.511111
513,5.8,1.200000
514,6.7,1.400000
515,4.0,0.800000


## Binary Normalization 

In [37]:
dataWindSpeedBiner = pd.DataFrame({'Wind Speed': windSpeed, 'Normalisasi (Binerisasi)': binary(windSpeed)})
dataWindSpeedBiner

Unnamed: 0,Wind Speed,Normalisasi (Binerisasi)
0,6.7,1.0
1,0.9,0.0
2,1.3,0.0
3,4.0,0.0
4,1.8,0.0
...,...,...
512,2.7,0.0
513,5.8,1.0
514,6.7,1.0
515,4.0,0.0


## Decimal Point Normalization 

In [38]:
dataWindSpeedDecimali = pd.DataFrame({'Wind Speed': windSpeed, 'Normalisasi (Decimal)': decimal(windSpeed)})
dataWindSpeedDecimali

Unnamed: 0,Wind Speed,Normalisasi (Decimal)
0,6.7,0.67
1,0.9,0.09
2,1.3,0.13
3,4.0,0.40
4,1.8,0.18
...,...,...
512,2.7,0.27
513,5.8,0.58
514,6.7,0.67
515,4.0,0.40


## Z Score Normalization

In [39]:
dataWindSpeedZ = pd.DataFrame({'Wind Speed': windSpeed, 'Normalisasi (Decimal)': zscore(windSpeed)})
dataWindSpeedZ

Unnamed: 0,Wind Speed,Normalisasi (Decimal)
0,6.7,1.497164
1,0.9,-1.740070
2,1.3,-1.516813
3,4.0,-0.009824
4,1.8,-1.237741
...,...,...
512,2.7,-0.735411
513,5.8,0.994835
514,6.7,1.497164
515,4.0,-0.009824


# Sungai Cibeureum Dataset

In [40]:
sungaiDf = pd.read_csv("./data/data_sungaicibereum.csv")
sungaiDf.head()

Unnamed: 0,LOCATION,DAY,DATE,TIME_WIB,TDS_PPM,TEMP_RIVER_C,TEMP_AIR_C
0,Sungai Cibeureum,Sun,3/19/17,10:00,88,22,25
1,Sungai Cibeureum,Sun,3/19/17,12:00,97,22,30
2,Sungai Cibeureum,Sun,3/19/17,14:00,83,22,33
3,Sungai Cibeureum,Sun,3/19/17,16:00,100,22,27
4,Sungai Cibeureum,Sun,4/9/17,10:00,100,22,32


## Get data from dataframe

In [41]:
tempRiver = sungaiDf['TEMP_RIVER_C']
tempRiver.head()

0    22
1    22
2    22
3    22
4    22
Name: TEMP_RIVER_C, dtype: int64

In [42]:
tempAir = sungaiDf['TEMP_AIR_C']
tempAir.head()

0    25
1    30
2    33
3    27
4    32
Name: TEMP_AIR_C, dtype: int64

## Get max and min data 

In [43]:
tempRiverMax = tempRiver.max()
tempRiverMax

25

In [44]:
tempRiverMin = tempRiver.min()
tempRiverMin

15

In [45]:
tempAirMax = tempAir.max()
tempAirMax

41

In [46]:
tempAirMin = tempAir.min()
tempAirMin

24

# Normalization

## Minmax Normalization 

In [50]:
dataTempRiverMinMax = pd.DataFrame({'River Temperature': tempRiver, 'Normalisasi (Minmax)': minmax(tempRiver)})
dataTempRiverMinMax

Unnamed: 0,River Temperature,Normalisasi (Minmax)
0,22,1
1,22,1
2,22,1
3,22,1
4,22,1
...,...,...
375,22,1
376,19,0
377,19,0
378,20,1


## Binary Normalization

In [51]:
dataTempRiverBinary = pd.DataFrame({'River Temperature': tempRiver, 'Normalisasi (Binerisasi)': binary(tempRiver)})
dataTempRiverBinary

Unnamed: 0,River Temperature,Normalisasi (Binerisasi)
0,22,1
1,22,1
2,22,1
3,22,1
4,22,1
...,...,...
375,22,1
376,19,0
377,19,0
378,20,0


## Decimal Point Normalization

In [52]:
dataTempRiverDecimal = pd.DataFrame({'River Temperature': tempRiver, 'Normalisasi (Decimal)': decimal(tempRiver)})
dataTempRiverDecimal

Unnamed: 0,River Temperature,Normalisasi (Decimal)
0,22,2
1,22,2
2,22,2
3,22,2
4,22,2
...,...,...
375,22,2
376,19,1
377,19,1
378,20,2


## Z Score Normalization

In [53]:
dataTempRiverZScore = pd.DataFrame({'River Temperature': tempRiver, 'Normalisasi (Z Score)': zscore(tempRiver)})
dataTempRiverZScore

Unnamed: 0,River Temperature,Normalisasi (Z Score)
0,22,0
1,22,0
2,22,0
3,22,0
4,22,0
...,...,...
375,22,0
376,19,0
377,19,0
378,20,0
