You can use the Jupyter notebook to load 

#### Environment set

In [None]:
! pip install scikit-learn

In [None]:
###### Import necessary libraries
from IPython.core.debugger import set_trace
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

plt.style.use(style="seaborn")
%matplotlib inline

#### Data preparation

Synthetic Financial Datasets For Fraud Detection

It simulates mobile money transactions based on a sample of real transactions extracted from one month of financial logs from a mobile money service implemented in an African country. 

Src: https://www.kaggle.com/datasets/ealaxi/paysim1

In [None]:
df = pd.read_csv(
    f"PS_20174392719_1491204439457_log.csv", 
    encoding="ISO-8859-1", 
    error_bad_lines=False
)
df.shape

In [None]:
df.head(5)

In [None]:
df.dtypes

In [None]:
####### Resample 'amount' field with interval=10000

amount = df['amount'].to_frame()
subsample = amount[::10000].reset_index(drop=True)
subsample.head(20)

In [None]:
####### Visualize

# draw line plot of amount
f = plt.figure(1, figsize=(16, 6))
ax1 = f.add_subplot(2,1,1)
ax1.plot(subsample)
ax1.title.set_text("Amount - line plot")

# draw boxplot of amount
ax2 = f.add_subplot(2,1,2)
_ = ax2.boxplot(subsample)
ax2.title.set_text("Amount - Box plot")

plt.show()

### Boxplot definition
![box_plot.png](attachment:box_plot.png)

In [None]:
####### iForest train

from sklearn.ensemble import IsolationForest

# definition
model = IsolationForest(
    contamination=0.01, 
    n_estimators=100
)

# train
model.fit(subsample)

In [None]:
####### Prediction

subsample["iforest"] = pd.Series(model.predict(subsample))
subsample["iforest"] = subsample["iforest"].map({1: 0, -1: 1})

In [None]:
####### Visualize anomalies

####### draw line plot of amount
f = plt.figure(1, figsize=(16, 6))
ax = f.add_subplot(2,1,1)
ax.plot(subsample)
ax.title.set_text("Amount")

####### plot anomaly with marker
anomaly = subsample[subsample['iforest']==1]

ax.scatter(
    anomaly.index,
    anomaly['amount'],
    marker='o',
    c='red',
    s=40
)

plt.show()

In [None]:
##### Plot the boxplot after removing the extreme anomaleis
normal = subsample[subsample['iforest']==0]

f = plt.figure(1, figsize=(16, 6))
ax = f.add_subplot(1,1,1)
ax.boxplot(normal['amount'])
plt.show()

### Task: 
    1. Set up the python environment with anaconda, and run the sample code in this notebook to get to know the basic visualizaiton and anomaly detection.
    2. Review above code snippets and read paper with web article to study the detail of iForest.
        a). https://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/icdm08b.pdf?q=isolation-forest
        b). https://www.analyticsvidhya.com/blog/2021/07/anomaly-detection-using-isolation-forest-a-complete-guide/
    3. Visualize the synthetic financial dataset with multiple fields.
    4. Perform iForest on these multivariate time series to detect the anomaly.
    5. Visualize the multivariate anomalies and compare the results with that based of only 'amount' field series.
    
    