In [1]:
import pandas as pd
from anomaly_detector import AnomalyDetector
from signal_classifier import SignalClassifier

In [2]:
df = pd.read_csv("./raw_data/combined_data.csv", sep=";")
df.head()

Unnamed: 0,UID,UTC,open,close,high,low,volume
0,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:01:00+00:00,0.01638,0.01648,0.01648,0.01638,21
1,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:02:00+00:00,0.01638,0.01648,0.01648,0.01638,15
2,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:03:00+00:00,0.01648,0.01648,0.01648,0.01648,5
3,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:04:00+00:00,0.01648,0.01648,0.01648,0.016385,26
4,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:05:00+00:00,0.01648,0.01648,0.01648,0.01648,3


In [3]:
custom_params = {
    "n_estimators": 300,
    "contamination": 0.1,  # 10% аномалий
    "max_samples": 0.8,
    "random_state": 123
}

anomaly_detector = AnomalyDetector(df, model_params=custom_params)

In [4]:
extended_df, features = anomaly_detector.generate_features()

In [5]:
extended_df

Unnamed: 0,UID,UTC,open,close,high,low,volume,return,amplitude
0,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:01:00+00:00,0.01638,0.01648,0.01648,0.016380,21,0.006105,0.006105
1,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:02:00+00:00,0.01638,0.01648,0.01648,0.016380,15,0.006105,0.006105
2,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:03:00+00:00,0.01648,0.01648,0.01648,0.016480,5,0.000000,0.000000
3,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:04:00+00:00,0.01648,0.01648,0.01648,0.016385,26,0.000000,0.005798
4,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:05:00+00:00,0.01648,0.01648,0.01648,0.016480,3,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...
802354,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:36:00+00:00,279.00000,279.00000,279.00000,279.000000,1,0.000000,0.000000
802355,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:39:00+00:00,279.85000,279.85000,279.85000,279.850000,1,0.000000,0.000000
802356,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:40:00+00:00,279.85000,279.85000,279.85000,279.850000,3,0.000000,0.000000
802357,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:44:00+00:00,279.85000,279.85000,279.85000,279.850000,1,0.000000,0.000000


In [6]:
features

['return', 'amplitude']

In [7]:
df_with_anomalies = anomaly_detector.detect_anomalies()
df_with_anomalies

Unnamed: 0,UID,UTC,open,close,high,low,volume,return,amplitude,anomaly_score,anomaly
0,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:01:00+00:00,0.01638,0.01648,0.01648,0.016380,21,0.006105,0.006105,-1,1
1,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:02:00+00:00,0.01638,0.01648,0.01648,0.016380,15,0.006105,0.006105,-1,1
2,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:03:00+00:00,0.01648,0.01648,0.01648,0.016480,5,0.000000,0.000000,1,0
3,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:04:00+00:00,0.01648,0.01648,0.01648,0.016385,26,0.000000,0.005798,-1,1
4,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:05:00+00:00,0.01648,0.01648,0.01648,0.016480,3,0.000000,0.000000,1,0
...,...,...,...,...,...,...,...,...,...,...,...
802354,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:36:00+00:00,279.00000,279.00000,279.00000,279.000000,1,0.000000,0.000000,1,0
802355,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:39:00+00:00,279.85000,279.85000,279.85000,279.850000,1,0.000000,0.000000,1,0
802356,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:40:00+00:00,279.85000,279.85000,279.85000,279.850000,3,0.000000,0.000000,1,0
802357,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 20:44:00+00:00,279.85000,279.85000,279.85000,279.850000,1,0.000000,0.000000,1,0


In [8]:
anomalies_df = anomaly_detector.get_anomalies()
anomalies_df

Unnamed: 0,UID,UTC,open,close,high,low,volume,return,amplitude,anomaly_score,anomaly
0,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:01:00+00:00,0.016380,0.01648,0.01648,0.016380,21,0.006105,0.006105,-1,1
1,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:02:00+00:00,0.016380,0.01648,0.01648,0.016380,15,0.006105,0.006105,-1,1
3,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:04:00+00:00,0.016480,0.01648,0.01648,0.016385,26,0.000000,0.005798,-1,1
5,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:06:00+00:00,0.016385,0.01648,0.01648,0.016385,9,0.005798,0.005798,-1,1
6,8e2b0325-0292-4654-8a18-4f63ed3b0e09,2023-01-01 09:07:00+00:00,0.016385,0.01648,0.01648,0.016385,10,0.005798,0.005798,-1,1
...,...,...,...,...,...,...,...,...,...,...,...
802303,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 18:46:00+00:00,278.320000,279.83000,279.83000,278.320000,6,0.005425,0.005425,-1,1
802304,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 18:47:00+00:00,279.830000,279.84000,279.84000,279.830000,6,0.000036,0.000036,-1,1
802322,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 19:23:00+00:00,279.850000,278.90000,279.85000,278.900000,2,-0.003395,0.003406,-1,1
802325,a78b8349-a1dc-447d-9277-1d75826d089a,2024-12-30 19:30:00+00:00,278.900000,279.84000,279.84000,278.900000,4,0.003370,0.003370,-1,1


In [9]:
anomaly_detector.visualize_anomalies(instrument_id="a78b8349-a1dc-447d-9277-1d75826d089a")

In [10]:
features = [
    'sma_3', 'sma_5', 'sma_7', 'sma_10', 
    'ema_3', 'ema_5', 'ema_7', 'ema_10', 
    'rsi_7', 'rsi_14', 'rsi_28', 
    'atr_7', 'atr_14', 'atr_28', 
    'volume_ratio_10', 'volume_ratio_20', 'volume_ratio_30', 
    'amplitude_mean_10', 'amplitude_mean_20', 'amplitude_mean_30', 
    'return_lag_3', 'return_lag_5', 'return_lag_7', 'return_lag_10'
    ]

In [11]:
signal_classifier = SignalClassifier(df_with_anomalies, features=features, lookahead=5)





In [12]:
signal_classifier.train(train_period_end="2024-08-31", eval_period_end="2024-09-30")

0:	learn: 0.6873788	test: 0.6872651	best: 0.6872651 (0)	total: 223ms	remaining: 1m 51s
100:	learn: 0.6203562	test: 0.6199562	best: 0.6199562 (100)	total: 7.07s	remaining: 27.9s
200:	learn: 0.6112377	test: 0.6201243	best: 0.6195106 (178)	total: 14.1s	remaining: 21s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6195105913
bestIteration = 178

Shrink model to first 179 iterations.
Модель успешно обучена!


In [13]:
signal_classifier.evaluate()

{'0': {'precision': 0.6767515923566879,
  'recall': 0.7012237041110958,
  'f1-score': 0.6887703423593761,
  'support': 7273.0},
 '1': {'precision': 0.6820310213637694,
  'recall': 0.656756375933493,
  'f1-score': 0.6691551216710933,
  'support': 7097.0},
 'accuracy': 0.6792623521224774,
 'macro avg': {'precision': 0.6793913068602286,
  'recall': 0.6789900400222943,
  'f1-score': 0.6789627320152347,
  'support': 14370.0},
 'weighted avg': {'precision': 0.679358976327687,
  'recall': 0.6792623521224774,
  'f1-score': 0.6790828530605073,
  'support': 14370.0}}

In [14]:
signal_classifier.feature_importance()

Unnamed: 0,Feature Id,Importances
0,return,18.186085
1,rsi_7,15.768933
2,rsi_28,6.589752
3,amplitude,5.01781
4,amplitude_mean_30,4.689341
5,rsi_14,4.573413
6,amplitude_mean_20,3.653304
7,volume_ratio_30,3.64969
8,UID,3.165539
9,volume_ratio_20,2.94031
