### Install and import pycatcher package

In [1]:
pip install pycatcher --upgrade

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import pycatcher as pc
import pandas as pd

ModuleNotFoundError: No module named 'sklearn.externals'

### Package functions to detect outliers

- detect_outliers_classic(df): Detect outliers in a time-series dataframe using seasonal trend decomposition. 
- detect_outliers_today_classic(df) Detect outliers for the current date using Classical Seasonal Decomposition.
- detect_outliers_latest_classic(df): Detect latest outliers using Classical Seasonal Decomposition.
- detect_outliers_stl(df): Detect outliers using Seasonal-Trend Decomposition using LOESS (STL).
- detect_outliers_mstl(df): Detect outliers using Multiple Seasonal-Trend Decomposition using LOESS (MSTL).
- detect_outliers_esd(df): Detect outliers using using the Extreme Studentized Deviate (ESD) algorithm. 
- detect_outliers_today_stl(df) Detect outliers for the current date using Seasonal-Trend Decomposition using LOESS (STL).
- detect_outliers_latest_stl(df): Detect latest outliers using Seasonal-Trend Decomposition using LOESS (STL).
- detect_outliers_today_mstl(df) Detect outliers for the current date using MSTL.
- detect_outliers_latest_mstl(df): Detect latest outliers using MSTL.
- detect_outliers_latest_esd(df): Detect latest outliers using ESD.
- detect_outliers_today_esd(df) Detect outliers for the current date using ESD.
- detect_outliers_iqr(df): Detect outliers in a time-series dataframe when there's less than 2 years of data.
- detect_outliers_moving_average(df): Detect outliers using moving average method.

##### Note - Use seasonal decomposition algorithms when there is at least 2 years of data, otherwise we can use Interquartile Range (IQR) or Moving Average methods for smaller timeframe.

### Diagnostic functions for plotting

In [None]:
from pycatcher.diagnostics import build_seasonal_plot_classic as build_seasonal_plot_classic
from pycatcher.diagnostics import build_outliers_plot_classic as build_outliers_plot_classic
from pycatcher.diagnostics import build_outliers_plot_moving_average as build_outliers_plot_moving_average
from pycatcher.diagnostics import build_outliers_plot_stl as build_outliers_plot_stl
from pycatcher.diagnostics import build_seasonal_plot_stl as build_seasonal_plot_stl
from pycatcher.diagnostics import build_outliers_plot_mstl as build_outliers_plot_mstl
from pycatcher.diagnostics import build_outliers_plot_esd as build_outliers_plot_esd
from pycatcher.diagnostics import build_seasonal_plot_mstl as build_seasonal_plot_mstl
from pycatcher.diagnostics import build_monthwise_plot as build_monthwise_plot
from pycatcher.diagnostics import build_iqr_plot as build_iqr_plot
from pycatcher.diagnostics import build_decomposition_results as build_decomposition_results
from pycatcher.diagnostics import conduct_stationarity_check as conduct_stationarity_check

### Example 1 - Finding Outliers Using Classical Seasonal Decomposition Algorithm

In [None]:
!pip install fsspec

In [None]:
!pip install --upgrade certifi

In [None]:
# Read TSA Air passenger dataset

df_tsa=pd.read_csv("https://huggingface.co/datasets/bbgatch/tsa-passengers/resolve/main/tsa.csv",sep=',')
df_tsa.head(5)

In [None]:
# Read Air passenger dataset

df_air=pd.read_csv(
    "https://raw.githubusercontent.com/kennedykwangari/Time-Series-Analysis-and-Forecasting-with-Python/refs/heads/master/AirPassengers.csv",sep=',')  
df_air.head(5)

In [None]:
# Read Ice cream interest dataset

df_ice = pd.read_csv("https://raw.githubusercontent.com/ritvikmath/Time-Series-Analysis/master/ice_cream_interest.csv")
df_ice.head(10)

In [None]:
df_tsa.head(5)

In [None]:
x = pc.detect_outliers_latest_classic(df_tsa) 
print(x)

In [None]:
x = pc.detect_outliers_classic(df_tsa) 
print(x)

In [None]:
x = pc.detect_outliers_today_classic(df_tsa) 
print(x)

In [None]:
x = pc.detect_outliers_latest_classic(df_tsa) 
print(x)

In [None]:
pc.build_outliers_plot_classic(df_tsa)

In [None]:
pc.detect_outliers_classic(df_air) 

In [None]:
pc.build_outliers_plot_classic(df_air)

In [None]:
#Diagnostic plots
pc.build_monthwise_plot(df_tsa)

In [None]:
#Diagnostic plots
pc.build_seasonal_plot_classic(df_tsa)

In [None]:
pc.build_seasonal_plot_classic(df_air)

In [None]:
x = pc.detect_outliers_classic(df_ice) 
print(x)

### Example 2 - Finding Outliers Using Seasonal-Trend Decomposition using LOESS (STL)

In [None]:
pc.detect_outliers_stl(df_ice)

In [None]:
build_outliers_plot_stl(df_ice)

In [None]:
pc.detect_outliers_stl(df_tsa)

In [None]:
pc.detect_outliers_today_stl(df_tsa) 

In [None]:
# Less than 2 years of data 

df = {
        'Dt': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
        'Value': [100, 120, 140, 250, 450]
}

df = pd.DataFrame(df)
pc.detect_outliers_stl(df)

In [None]:
pc.build_outliers_plot_stl(df_tsa)

In [None]:
pc.build_outliers_plot_stl(df_air)

In [None]:
#Diagnostic plots
pc.build_seasonal_plot_stl(df_tsa)

In [None]:
#Diagnostic plots
pc.build_seasonal_plot_stl(df_air)

### Example 3 - Finding Outliers Using Multiple Seasonal-Trend Decomposition using LOESS (MSTL)

In [None]:
pc.detect_outliers_mstl(df_ice)

In [None]:
# Less than 2 years of data 

df = {
        'Dt': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
        'Value': [100, 120, 140, 750, 160]
}

df = pd.DataFrame(df)
pc.detect_outliers_mstl(df)

In [None]:
#Diagnostic plots
build_outliers_plot_mstl(df_ice)

In [None]:
pc.build_outliers_plot_mstl(df_tsa)

In [None]:
x = pc.detect_outliers_latest_mstl(df_tsa) 
print(x)

In [None]:
pc.detect_outliers_today_mstl(df_tsa) 

In [None]:
#Diagnostic plots
build_seasonal_plot_mstl(df_ice)

In [None]:
#Diagnostic plots
build_seasonal_plot_mstl(df_air)

In [None]:
#Diagnostic plots
build_outliers_plot_mstl(df_air)

In [None]:
build_seasonal_plot_mstl(df_tsa)

### Example 2 - Finding Outliers Using the Generalized ESD or Seasonal ESD algorithm (Used by Twitter)

In [None]:
pc.detect_outliers_esd(df_tsa)

In [None]:
pc.build_outliers_plot_esd(df_tsa)

### Example 5 - Finding Outliers Using IQR

In [None]:
import pandas as pd

df = {
        'Dt': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
        'Value': [100, 120, 140, 450, 160]
}

df = pd.DataFrame(df)
pc.detect_outliers_classic(df)

In [None]:
build_iqr_plot(df)

### Example 6 - Finding Outliers Using Moving Average

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'random numbers': np.random.randn(200)})
df.head(20)

In [None]:
pc.detect_outliers_moving_average(df_tsa)

In [None]:
pc.build_outliers_plot_moving_average(df_tsa)

### Example 7 - Miscellaneous diagnostic function - Stationarity Check

In [None]:
conduct_stationarity_check(df_ice)