In [None]:
import pandas as pd
import numpy as np

In [None]:
columns = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"]
df = pd.read_csv(r'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=columns)
df

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


### Assinging Column

In [None]:
df["petal-area"] = 0.5 * df["petal-width"] * df["petal-length"]
df

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class,petal-area
0,5.1,3.5,1.4,0.2,Iris-setosa,0.14
1,4.9,3.0,1.4,0.2,Iris-setosa,0.14
2,4.7,3.2,1.3,0.2,Iris-setosa,0.13
3,4.6,3.1,1.5,0.2,Iris-setosa,0.15
4,5.0,3.6,1.4,0.2,Iris-setosa,0.14
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,5.98
146,6.3,2.5,5.0,1.9,Iris-virginica,4.75
147,6.5,3.0,5.2,2.0,Iris-virginica,5.20
148,6.2,3.4,5.4,2.3,Iris-virginica,6.21


In [None]:
rename_species = {'Iris-setosa':'setosa', 'Iris-versicolor':'not-setosa','Iris-virginica':'not-setosa'}
df["is_setosa"] = df["class"].replace(rename_species)
df["is_setosa"].unique()
df

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class,petal-area,is_setosa
0,5.1,3.5,1.4,0.2,Iris-setosa,0.14,setosa
1,4.9,3.0,1.4,0.2,Iris-setosa,0.14,setosa
2,4.7,3.2,1.3,0.2,Iris-setosa,0.13,setosa
3,4.6,3.1,1.5,0.2,Iris-setosa,0.15,setosa
4,5.0,3.6,1.4,0.2,Iris-setosa,0.14,setosa
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,5.98,not-setosa
146,6.3,2.5,5.0,1.9,Iris-virginica,4.75,not-setosa
147,6.5,3.0,5.2,2.0,Iris-virginica,5.20,not-setosa
148,6.2,3.4,5.4,2.3,Iris-virginica,6.21,not-setosa


In [None]:
def kategori_luas(luas) :
  if luas < 2 :
    kategori = "small"
  elif (luas < 6) & (luas >= 2):
    kategori = "medium"
  elif luas >= 6:
    kategori = "large"
  return kategori

In [None]:
df["petal-area"] = df["petal-area"].apply(kategori_luas)
df

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class,petal-area,is_setosa
0,5.1,3.5,1.4,0.2,Iris-setosa,small,setosa
1,4.9,3.0,1.4,0.2,Iris-setosa,small,setosa
2,4.7,3.2,1.3,0.2,Iris-setosa,small,setosa
3,4.6,3.1,1.5,0.2,Iris-setosa,small,setosa
4,5.0,3.6,1.4,0.2,Iris-setosa,small,setosa
...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,medium,not-setosa
146,6.3,2.5,5.0,1.9,Iris-virginica,medium,not-setosa
147,6.5,3.0,5.2,2.0,Iris-virginica,medium,not-setosa
148,6.2,3.4,5.4,2.3,Iris-virginica,large,not-setosa


# Regresi


In [None]:
y = df["petal-width"]
X = df.drop(['is_setosa',"class",'petal-width','petal-area'],axis=1)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=101)

In [None]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(X_train,y_train)
y_predict = linear_model.predict(X_test)
pd.DataFrame(linear_model.coef_,X.columns,columns=['Coefficient'])

Unnamed: 0,Coefficient
sepal-length,-0.217074
sepal-width,0.26947
petal-length,0.539593


In [None]:
y_predict

array([ 0.3063928 ,  0.19329989,  0.33421927,  1.98820898,  1.49196615,
        1.61710157,  1.28823683,  1.19733969,  1.61716676,  0.11327303,
        1.81640555,  0.39852734,  0.16648322,  2.31141097,  1.74135756,
        1.32553269,  1.41822075,  1.42352553,  0.22174505,  2.01128419,
        1.11636823,  0.26903176,  1.49401781,  1.36826369,  1.36894757,
        1.12147745,  0.92816214,  1.86050432,  0.19948409, -0.0424857 ])

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_predict))
print('MSE:', metrics.mean_squared_error(y_test, y_predict))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_predict)))

MAE: 0.18416301535664786
MSE: 0.056154057654557436
RMSE: 0.2369684739676513


# Buat Data Prediksi

In [None]:
new = pd.DataFrame(y_predict, columns=['prediksi'])
new

Unnamed: 0,prediksi
0,0.306393
1,0.1933
2,0.334219
3,1.988209
4,1.491966
5,1.617102
6,1.288237
7,1.19734
8,1.617167
9,0.113273


In [None]:
prediksi = pd.concat([df,new],axis =1)
prediksi

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class,petal-area,is_setosa,prediksi
0,5.1,3.5,1.4,0.2,Iris-setosa,small,setosa,0.306393
1,4.9,3.0,1.4,0.2,Iris-setosa,small,setosa,0.193300
2,4.7,3.2,1.3,0.2,Iris-setosa,small,setosa,0.334219
3,4.6,3.1,1.5,0.2,Iris-setosa,small,setosa,1.988209
4,5.0,3.6,1.4,0.2,Iris-setosa,small,setosa,1.491966
...,...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,medium,not-setosa,
146,6.3,2.5,5.0,1.9,Iris-virginica,medium,not-setosa,
147,6.5,3.0,5.2,2.0,Iris-virginica,medium,not-setosa,
148,6.2,3.4,5.4,2.3,Iris-virginica,large,not-setosa,


concat table ini ga penting karena column data prediksi tidak sesuai dengan masing-masing class yang ada tapi fungsi dari prediksi tersebut adalah memperkirakan masing-masing petal-width yang akan muncul jika bunga-bunga akan tumbuh dengan metode regresi

tapi dalam beberapa case, jika data yang digunakan time-series, maka metode concat ini bisa di lakukan untuk melakukan prediksi di tiap hari/minggu/bulan/tahun-nya

# Prediksi Lain
Kenapa cuma 30 data?? karena kita udah split datalatih dan data hasil prediksi. data latih di handle oleh library sklearn.

kalau kita mau full prediksi, misalnya 150 data, kita tinggal panggil lagi aja function learningnya dan ganti variable pada line ke-4 yang tadinya X_test jadi X.

In [None]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(X,y)
y_predictnew = linear_model.predict(X)
pd.DataFrame(linear_model.coef_,X.columns,columns=['Coefficient'])

Unnamed: 0,Coefficient
sepal-length,-0.210271
sepal-width,0.228777
petal-length,0.526088


In [None]:
new150 = pd.DataFrame(y_predictnew, columns=['prediksi'])
new150

Unnamed: 0,prediksi
0,0.216136
1,0.143802
2,0.179003
3,0.282370
4,0.260041
...,...
145,1.764449
146,1.628951
147,1.806503
148,2.066313


In [None]:
prediksisemua = pd.concat([df,new150],axis =1)
prediksisemua

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class,petal-area,is_setosa,prediksi
0,5.1,3.5,1.4,0.2,Iris-setosa,small,setosa,0.216136
1,4.9,3.0,1.4,0.2,Iris-setosa,small,setosa,0.143802
2,4.7,3.2,1.3,0.2,Iris-setosa,small,setosa,0.179003
3,4.6,3.1,1.5,0.2,Iris-setosa,small,setosa,0.282370
4,5.0,3.6,1.4,0.2,Iris-setosa,small,setosa,0.260041
...,...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica,medium,not-setosa,1.764449
146,6.3,2.5,5.0,1.9,Iris-virginica,medium,not-setosa,1.628951
147,6.5,3.0,5.2,2.0,Iris-virginica,medium,not-setosa,1.806503
148,6.2,3.4,5.4,2.3,Iris-virginica,large,not-setosa,2.066313
