# Deteksi Outlier ABOD 

### Import library yang dibutuhkan 

In [5]:
import os
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine

### Load dataset

In [7]:
load_dotenv()

# Ambil variabel dari .env
user_postgres = os.getenv("USER_POSTGRES")
password_postgres = os.getenv("PASS_POSTGRES")
host_postgres = os.getenv("HOST_POSTGRES")
port_postgres = os.getenv("PORT_POSTGRES")
db_postgres = os.getenv("DB_POSTGRES")

user_mysql = os.getenv("USER_MYSQL")
password_mysql = os.getenv("PASS_MYSQL")
host_mysql = os.getenv("HOST_MYSQL")
port_mysql = os.getenv("PORT_MYSQL")
db_mysql = os.getenv("DB_MYSQL")


postgres_conn = f"postgresql+psycopg2://{user_postgres}:{password_postgres}@{host_postgres}:{port_postgres}/{db_postgres}"

mysql_conn = f"mysql+pymysql://{user_mysql}:{password_mysql}@{host_mysql}:{port_mysql}/{db_mysql}"

postgres_engine = create_engine(postgres_conn)
postgres_engine = create_engine(mysql_conn)

### Membuat koneksi (engine) dan mengambil dari database

In [19]:
# Engine untuk PostgreSQL
postgres_engine = create_engine(postgres_conn)

# Engine untuk MySQL
mysql_engine = create_engine(mysql_conn)


# Ambil data dari MySQL
mysql_df_iris = pd.read_sql("SELECT * FROM iris_full", mysql_engine)

# Ambil data dari PostgreSQL
postgres_df_iris = pd.read_sql("SELECT * FROM iris_full", postgres_engine)


### Mengabungkan 2 database

In [None]:

merge_df = pd.merge(mysql_df_iris, postgres_df_iris, left_on="id", right_on='id', how='outer')
print(merge_df)

      id  petal length  petal width  sepal length  sepal width
0      1           1.4          0.2           5.1          3.5
1      2           1.4          0.2           4.9          3.0
2      3           1.3          0.2           4.7          3.2
3      4           1.5          0.2           4.6          3.1
4      5           1.4          0.2           5.0          3.6
..   ...           ...          ...           ...          ...
145  146           5.2          2.3           6.7          3.0
146  147           5.0          1.9           6.3          2.5
147  148           5.2          2.0           6.5          3.0
148  149           5.4          2.3           6.2          3.4
149  150           5.1          1.8           5.9          3.0

[150 rows x 5 columns]


### Salinan data
Buat salinan data dari hasil penggabungan 

In [20]:
data_train = merge_df.copy()

### Model ABOD

In [14]:
from pycaret.anomaly import *

# Setup PyCaret untuk anomaly detection
exp_ano = setup(data=data_train)

# Buat model ABOD
abod_model = create_model('abod')

# Assign label outlier ke setiap baris
dataset_outliers = assign_model(abod_model)

# Hapus baris yang dianggap outlier
dataset_clean = dataset_outliers[dataset_outliers['Anomaly'] == 1]

dataset_clean

Unnamed: 0,Description,Value
0,Session id,890
1,Original data shape,"(150, 5)"
2,Transformed data shape,"(150, 5)"
3,Numeric features,5
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,id,petal length,petal width,sepal length,sepal width,Anomaly,Anomaly_Score
98,99,3.0,1.1,5.1,2.5,1,-0.006736
106,107,4.5,1.7,4.9,2.5,1,-0.001276
117,118,6.7,2.2,7.7,3.8,1,-0.005361
118,119,6.9,2.3,7.7,2.6,1,-0.005204
119,120,5.0,1.5,6.0,2.2,1,-0.00608
122,123,6.7,2.0,7.7,2.8,1,-0.004978
135,136,6.1,2.3,7.7,3.0,1,-0.006185
149,150,5.1,1.8,5.9,3.0,1,-0.006176


### Menghapus data dengan skor outlier tertinggi

In [None]:
# Urutkan dataset_outliers berdasarkan skor outlier (descending)
outliers_sorted = dataset_outliers.sort_values(by="Anomaly_Score", ascending=False)

# Ambil index 2 skor outlier tertinggi
top2_outliers_index = outliers_sorted.head(2).index

# Hapus 2 data tersebut dari dataset_outliers
dataset_clean_top2 = dataset_outliers.drop(index=top2_outliers_index)

print("Data setelah 2 outlier tertinggi dihapus:")
print(dataset_clean_top2.head())


Data setelah 2 outlier tertinggi dihapus:
   id  petal length  petal width  sepal length  sepal width  Anomaly  \
0   1           1.4          0.2           5.1          3.5        0   
1   2           1.4          0.2           4.9          3.0        0   
2   3           1.3          0.2           4.7          3.2        0   
3   4           1.5          0.2           4.6          3.1        0   
4   5           1.4          0.2           5.0          3.6        0   

   Anomaly_Score  
0      -0.010918  
1      -0.105879  
2      -0.160850  
3      -0.108965  
4      -0.103372  
