In [None]:
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from typing import Tuple, Dict, List
import warnings

class PhaseSpaceAnalyzer:
    def __init__(
        self,
        window_short: int = 30,
        window_long: int = 60,
        tau: int = 10,
        grid_size: int = 500,
        poly_order: int = 3
    ):
        """
        Инициализация анализатора фазового пространства
        
        Args:
            window_short: размер короткого окна
            window_long: размер длинного окна
            tau: горизонт прогнозирования
            grid_size: размер сетки фазового пространства
            poly_order: порядок полинома для фильтра Савицкого-Голея
        """
        self.window_short = window_short
        self.window_long = window_long
        self.tau = tau
        self.grid_size = grid_size
        self.poly_order = poly_order
        self.scaler = MinMaxScaler()
        self.cache = {}
        self.grid_bounds = None
        
    def prepare_phase_space_data(self, prices: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Подготовка данных для фазового пространства
        """
        # Проверка на достаточную длину данных
        min_length = max(self.window_short, self.window_long) + self.poly_order + 1
        if len(prices) < min_length:
            raise ValueError(f"Недостаточно данных. Минимальная длина: {min_length}")
            
        # Сглаживание с адаптивной проверкой окна
        smoothed_short = savgol_filter(
            prices,
            min(self.window_short, len(prices) - 2),
            self.poly_order
        )
        smoothed_long = savgol_filter(
            prices,
            min(self.window_long, len(prices) - 2),
            self.poly_order
        )
        
        # Вычисление разности и производной
        diff = (smoothed_short - smoothed_long) / smoothed_long
        diff_smooth = savgol_filter(diff, self.window_short, self.poly_order)
        diff_deriv = savgol_filter(diff, self.window_short, self.poly_order, deriv=1)
        
        return diff_smooth, diff_deriv
    
    def fit(self, df: pd.DataFrame) -> None:
        """
        Обучение модели на исторических данных
        """
        prices = df['close'].values
        diff_smooth, diff_deriv = self.prepare_phase_space_data(prices)
        
        # Нормализация данных
        phase_space_data = np.column_stack([diff_smooth, diff_deriv])
        self.scaler.fit(phase_space_data)
        normalized_data = self.scaler.transform(phase_space_data)
        
        # Построение кеша вероятностей
        self.cache = {}
        for t in range(len(normalized_data) - self.tau):
            current_cell = self._get_grid_cell(normalized_data[t])
            future_cell = self._get_grid_cell(normalized_data[t + self.tau])
            
            if current_cell not in self.cache:
                self.cache[current_cell] = {}
            if future_cell not in self.cache[current_cell]:
                self.cache[current_cell][future_cell] = 0
                
            self.cache[current_cell][future_cell] += 1
            
        # Нормализация вероятностей
        for current_cell in self.cache:
            total = sum(self.cache[current_cell].values())
            for future_cell in self.cache[current_cell]:
                self.cache[current_cell][future_cell] /= total
    
    def predict_probabilities(self, df: pd.DataFrame) -> List[Dict[Tuple[int, int], float]]:
        """
        Предсказание вероятностей будущих состояний
        """
        prices = df['close'].values
        diff_smooth, diff_deriv = self.prepare_phase_space_data(prices)
        phase_space_data = np.column_stack([diff_smooth, diff_deriv])
        normalized_data = self.scaler.transform(phase_space_data)
        
        predictions = []
        for t in range(len(normalized_data)):
            current_cell = self._get_grid_cell(normalized_data[t])
            if current_cell in self.cache:
                predictions.append(self.cache[current_cell])
            else:
                # Если ячейка не найдена, используем ближайшую
                nearest_cell = self._find_nearest_cell(current_cell)
                predictions.append(self.cache.get(nearest_cell, {}))
                
        return predictions
    
    def _get_grid_cell(self, point: np.ndarray) -> Tuple[int, int]:
        """
        Получение индекса ячейки сетки для точки
        """
        x, y = point
        i_x = int(np.clip(x * (self.grid_size - 1), 0, self.grid_size - 1))
        i_y = int(np.clip(y * (self.grid_size - 1), 0, self.grid_size - 1))
        return (i_x, i_y)
    
    def _find_nearest_cell(self, cell: Tuple[int, int]) -> Tuple[int, int]:
        """
        Поиск ближайшей ячейки в кеше
        """
        if not self.cache:
            return cell
            
        min_dist = float('inf')
        nearest_cell = cell
        
        for cached_cell in self.cache:
            dist = ((cell[0] - cached_cell[0]) ** 2 + 
                   (cell[1] - cached_cell[1]) ** 2) ** 0.5
            if dist < min_dist:
                min_dist = dist
                nearest_cell = cached_cell
                
        return nearest_cell
    
    def plot_forecast(self, df: pd.DataFrame, num_points: int = 1000) -> None:
        """
        Визуализация прогноза
        """
        predictions = self.predict_probabilities(df)
        
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
        
        # График цен
        df['close'].iloc[:num_points].plot(ax=ax1, color='black', alpha=0.7)
        ax1.set_title('Цена BTC/USD')
        
        # Тепловая карта вероятностей
        probabilities = np.zeros((self.grid_size, self.grid_size))
        for t, pred in enumerate(predictions[:num_points]):
            for (i_x, i_y), prob in pred.items():
                probabilities[i_y, i_x] += prob
                
        im = ax2.imshow(probabilities, cmap='coolwarm', aspect='auto')
        plt.colorbar(im, ax=ax2)
        ax2.set_title('Тепловая карта вероятностей переходов')
        
        plt.tight_layout()
        plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

# Загружаем файл
file_path = '../npz/BTCUSD_1T.npz'
data = np.load(file_path, allow_pickle=True)

# Проверяем содержимое
if 'data' in data:
    raw_data = data['data']
else:
    raise ValueError("Файл не содержит ключа 'data'")

# Преобразуем в DataFrame
columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
df = pd.DataFrame(raw_data, columns=columns)

# Преобразуем timestamp в datetime и делаем индексом
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Преобразуем числовые колонки в float
for col in ['open', 'high', 'low', 'close', 'volume']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [None]:
# 1) Делим данные
N = len(df)
train_size = int(0.8 * N)
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]

In [None]:
# Создание и обучение анализатора
analyzer = PhaseSpaceAnalyzer(window_short=30, window_long=60, tau=10)
analyzer.fit(train_df)

# Получение прогноза
predictions = analyzer.predict_probabilities(test_df)

# Визуализация
analyzer.plot_forecast(test_df, num_points=500)

In [None]:
%matplotlib widget