# Exploration

# Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from src.features import compute_features
from src.detect import detect_anomalies
from src.config import TICKERS, DATA_DIR

# Load data for one ticker (e.g. AAPL)

In [2]:
ticker = "AAPL"
df = pd.read_csv(
   f"{DATA_DIR}/{ticker}.csv",
   skiprows=3,
   header=None,
   names=["Date", "Close", "High", "Low", "Open", "Volume"],
   index_col=0,
   parse_dates=True,
)
print(df.head())

                 Close        High         Low        Open     Volume
Date                                                                 
2022-01-03  178.645645  179.499574  174.425140  174.542917  104487900
2022-01-04  176.378342  179.558457  175.809061  179.254190   99310400
2022-01-05  171.686691  176.839648  171.411868  176.290001   94537600
2022-01-06  168.820694  172.059699  168.467348  169.507752   96904000
2022-01-07  168.987534  170.921120  167.868606  169.694226   86709100


# Compute features

In [3]:
df = compute_features(df)
print(df.head())

                 Close        High         Low        Open     Volume  \
Date                                                                    
2022-02-18  164.416763  167.600916  163.325892  166.893338   82772700   
2022-02-22  161.488113  163.817264  159.355498  162.136728   91162800   
2022-02-23  157.311371  163.286575  156.996879  162.687087   90009200   
2022-02-24  159.935364  160.043469  149.380450  149.950456  141147500   
2022-02-25  162.008987  162.274323  158.097568  161.016384   91974200   

              return  volatility        rsi  macd_diff  bollinger_h  \
Date                                                                  
2022-02-18 -0.009356    0.021099  42.446365  -0.240071   177.424642   
2022-02-22 -0.017812    0.021508  38.208366  -0.538626   177.200743   
2022-02-23 -0.025864    0.022183  33.128249  -0.958377   177.132327   
2022-02-24  0.016680    0.022478  38.647232  -0.995896   176.736804   
2022-02-25  0.012965    0.022603  42.673683  -0.824726   176.0

# Detect anomalies

In [4]:
df, anomalies = detect_anomalies(df)

# Plot results

In [5]:
from src.visualize import plot_anomalies

plot_anomalies(df, title=f"{ticker} Close Price with Detected Anomalies")

Saved plot: data/raw/AAPL_Close_Price_with_Detected_Anomalies.png


In [6]:
print("Anomaly dates and close prices:")
print(df[df["anomaly"] == 1][["Close"]])

Anomaly dates and close prices:
                 Close
Date                  
2023-08-04  180.185928
2024-01-02  184.290405
2024-06-11  206.185730
2024-06-12  212.078201
