In [None]:
import configparser

import pandas as pd
import numpy as np
from numpy import where

from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM

import vertica_python
from vertica_python.errors import MissingRelation
import verticapy as vp
from verticapy import pandas_to_vertica, insert_into, drop

import matplotlib.pyplot as plt

### Helper functions

In [None]:
config_path = 'config.ini'

def get_VRT_cursor(path=config_path):
    config = configparser.ConfigParser()
    config.read(path)
    
    credentials = {
        'database': config['VERTICA']['database'],
        'user': config['VERTICA']['user'],
        'password': config['VERTICA']['password'],
        'host': config['VERTICA']['host'],
        'port': config['VERTICA']['port']
    }
    
    conn = vertica_python.connect(**credentials)
    cursor = conn.cursor()
    return cursor, conn
    
def get_df_from_sql(cursor, sql='SELECT now(), version()'):
    cursor.execute(sql)
    columns = cursor.description
    result = cursor.fetchall()
    df = pd.DataFrame(result, columns=[tuple[0] for tuple in columns])
    return df

VRT_cursor, conn = get_VRT_cursor()
vp.set_connection(conn)

### Get data

In [None]:
sql = """
select date,
    value
from test_table
order by date
"""

In [None]:
df = get_df_from_sql(cursor=VRT_cursor, sql=sql)
df

### Isolation forest

In [None]:
model = IsolationForest(random_state = 0, contamination = float(0.05))
model.fit(df[['value']])

In [None]:
df['score_isol_forest'] = model.decision_function(df[['value']])
df['anomaly_value_isol_forest'] = model.predict(df[['value']])
df.head()

In [None]:
outliers = df.loc[df['anomaly_value_isol_forest'] == -1]
outlier_index = list(outliers.index)
outlier_date = outliers['date']

#datapoints classified -1 are anomalous
print(df['anomaly_value_isol_forest'].value_counts())
df['anomaly_value_isol_forest'].value_counts().plot(kind = 'bar')

In [None]:
plt.figure(figsize = (16, 8))

plt.plot(df['date'], df['value'], marker = '.')
plt.plot(outlier_date, outliers['value'], 'o', color = 'red', label = 'outlier')
plt.title('Anomaly detection By Isolation Forest')

#plt.grid()
plt.xlabel('Date')
plt.ylabel('Neutral Current')
plt.legend()

### SVM

In [None]:
svm = OneClassSVM(kernel='rbf', gamma=0.001, nu=0.03)
print(svm)

In [None]:
svm.fit(df[['value']])
df['anomaly_value_svm'] = svm.predict(df[['value']])
df.head()

In [None]:
outliers_svm = df.loc[df['anomaly_value_svm'] == -1]
outliers_svm_date = outliers_svm['date']

#datapoints classified -1 are anomalous
print(df['anomaly_value_svm'].value_counts())
df['anomaly_value_svm'].value_counts().plot(kind = 'bar')

In [None]:
plt.figure(figsize = (16, 8))

plt.plot(df['date'], df['value'], marker = '.')
plt.plot(outliers_svm_date, outliers_svm['value'], 'o', color = 'red', label = 'outlier')
plt.title('Anomaly detection By SVM')

#plt.grid()
plt.xlabel('Date')
plt.ylabel('Neutral Current')
plt.legend()

### Percentile

In [None]:
percentile_99 = np.percentile(df['value'], 99)
percentile_1 = np.percentile(df['value'], 1)
print(percentile_5, percentile_95)

In [None]:
df['anomaly_percentile'] = np.where(((df['value'] < percentile_1) | (df['value'] > percentile_99)), -1, 1)
outliers_percentile = df.loc[df['anomaly_percentile'] == -1]
outliers_percentile_date = outliers_percentile['date']

#datapoints classified -1 are anomalous
print(df['anomaly_percentile'].value_counts())
df['anomaly_percentile'].value_counts().plot(kind = 'bar')

In [None]:
plt.figure(figsize = (16, 8))

plt.plot(df['date'], df['value'], marker = '.')
plt.plot(outliers_percentile_date, outliers_percentile['value'], 'o', color = 'red', label = 'outlier')
plt.title('Anomaly detection By Percentile')

#plt.grid()
plt.xlabel('Date')
plt.ylabel('Neutral Current')
plt.legend()