# Imports

In [5]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
import seaborn as sns

from helper_functions import data_loading

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Data loading

In [6]:
stocks_features = data_loading('cwt')

# Visualization

In [56]:
stock_feature_matrix = stocks_features['adnc']['feature_matrix'].copy()

# Get complex modulus and append it to the feature matrix
real_power = np.power(stock_feature_matrix[:, 1], 2)
imag_power = np.power(stock_feature_matrix[:, 2], 2)
complex_modulus = np.sqrt(real_power + imag_power).reshape(-1, 1)
stock_feature_matrix = np.concatenate((stock_feature_matrix, complex_modulus), axis=1)

# Pivot the feature matrix in a way that the complex modulus are mapped to the frequencies in a time
# coherent manner and sort according to the frequencies in descending fashion.
abs_frequency_df = pd.DataFrame(stock_feature_matrix[:, [0,3]], columns=['frequency', 'complex_modulus'])
n_freq = abs_frequency_df.frequency.unique().shape[0]
n_cols = int(abs_frequency_df.shape[0] / n_freq)
abs_frequency_df['time_window'] = list(range(n_cols))*n_freq
abs_frequency_df = abs_frequency_df.pivot_table(index='frequency', columns='time_window', values='complex_modulus')
abs_frequency_df.sort_values(by='frequency', inplace=True, ascending=False)

# scale modulus values so they are in [-1, 1]
min_modulus = abs_frequency_df.min().min()
max_modulus = abs_frequency_df.max().max()
abs_frequency_df = (abs_frequency_df - min_modulus) / (max_modulus - min_modulus)
abs_frequency_df = abs_frequency_df * 2 - 1

abs_frequency_df.head()

time_window,0,1,2,3,4,5,6,7,8,9,...,420,421,422,423,424,425,426,427,428,429
frequency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.4341,-0.842465,-0.859528,-0.888782,-0.925683,-0.959062,-0.979286,-0.971599,-0.953815,-0.937484,-0.926716,...,-0.99382,-0.995083,-0.995408,-0.994121,-0.991955,-0.989809,-0.988121,-0.987008,-0.986399,-0.986149
0.4051,-0.821094,-0.84265,-0.868111,-0.902836,-0.930424,-0.946919,-0.950327,-0.943003,-0.930491,-0.920296,...,-0.996943,-0.998543,-0.999432,-0.99662,-0.993629,-0.990857,-0.98855,-0.98681,-0.985658,-0.985085
0.3779,-0.808317,-0.823562,-0.849291,-0.890636,-0.921659,-0.949489,-0.959377,-0.947279,-0.92876,-0.915668,...,-0.99795,-0.997334,-0.996482,-0.995085,-0.993294,-0.99146,-0.989815,-0.988468,-0.987491,-0.986969
0.3526,-0.80172,-0.81594,-0.842465,-0.879212,-0.914635,-0.959647,-0.985356,-0.956165,-0.927564,-0.908047,...,-0.998163,-0.996609,-0.995229,-0.994098,-0.99321,-0.992526,-0.991955,-0.991402,-0.990888,-0.990562
0.329,-0.794197,-0.808317,-0.821094,-0.843208,-0.87012,-0.896116,-0.90554,-0.907099,-0.892794,-0.886443,...,-0.994566,-0.994406,-0.994136,-0.994074,-0.994333,-0.994807,-0.99519,-0.995169,-0.994799,-0.994463


In [109]:
stock_df = stocks_dict['adnc'].copy()

# Get conne of influence
cone_of_influence = stock_df.iloc[0, :-1].astype(float)
stock_df.drop(index=[0], inplace=True)

# Get frequencies
frequencies = stock_df.iloc[:, -1].astype(float)
stock_df.drop(columns=['freq'], inplace=True)

# Check if frequencies are repeated
if frequencies.unique().shape[0] != frequencies.shape[0]:
    frequencies = frequencies.to_frame().reset_index()

frequencies = frequencies.to_numpy()

# Convert dataframe to array, and convert array into complex
stock_array = stock_df.to_numpy().astype(str)
stock_array = np.char.replace(stock_array,'i','j')
stock_array = np.char.replace(stock_array,' ','')
stock_array = stock_array.astype(np.complex128)

# Get real and imaginary coefficients
real_coefficients = np.real(stock_array)
imag_coefficients = np.imag(stock_array)

# Map frequencies to coefficients
sample_list = []

for i_freq in range(frequencies.shape[0]):
    for j_time in range(real_coefficients.shape[1]):
        sample = np.array([frequencies[i_freq], real_coefficients[i_freq, j_time], imag_coefficients[i_freq, j_time]])
        sample_list.append(sample)

stock_feature_matrix = np.stack(sample_list)

In [107]:
real_coefficients.shape, imag_coefficients.shape, frequencies.shape

((59, 430), (59, 430), (59,))

(25370, 3)

In [5]:
stocks_dict['adnc']['stf_spectogram']

Unnamed: 0,coef_1,coef_2,coef_3,coef_4,coef_5,coef_6,coef_7,freq
0,2.0498 + 0.0000i,5.5527 + 0.0000i,4.1448 + 0.0000i,1.7926 + 0.0000i,1.7292 + 0.0000i,1.7463 + 0.0000i,1.8886 + 0.0000i,0.0000
1,0.5915 - 1.6932i,1.4713 - 4.9145i,2.3026 - 3.0329i,0.6548 - 1.4702i,0.6347 - 1.4129i,0.6192 - 1.4353i,0.6918 - 1.5603i,0.0245
2,-0.8871 - 0.7559i,-3.3730 - 2.2295i,-0.6850 - 2.8983i,-0.7721 - 0.8543i,-0.7299 - 0.8218i,-0.7582 - 0.8056i,-0.8420 - 0.9206i,0.0491
3,-0.4736 + 0.1602i,-2.1189 + 1.7002i,-1.7762 - 1.0563i,-0.5975 + 0.2024i,-0.5631 + 0.1765i,-0.5550 + 0.1969i,-0.6710 + 0.2397i,0.0736
4,-0.1729 + 0.0869i,0.4498 + 1.4986i,-1.3776 + 0.2776i,-0.0097 + 0.2409i,-0.0246 + 0.2108i,-0.0228 + 0.2041i,-0.0016 + 0.2916i,0.0982
...,...,...,...,...,...,...,...,...
124,-0.0065 - 0.0104i,0.0431 + 0.0010i,0.0152 + 0.0488i,-0.0017 + 0.0003i,4.4396e-04 - 9.5597e-05i,0.0006 + 0.0013i,-0.0005 + 0.0029i,3.0434
125,-0.0075 + 0.0097i,0.0284 - 0.0283i,0.0557 + 0.0118i,0.0020 + 0.0021i,0.0031 + 0.0005i,0.0035 + 0.0000i,0.0038 + 0.0013i,3.0680
126,0.0097 + 0.0039i,0.0031 - 0.0378i,0.0367 - 0.0415i,0.0023 - 0.0010i,0.0016 - 0.0024i,0.0003 - 0.0022i,0.0008 - 0.0013i,3.0925
127,0.0011 - 0.0092i,-0.0252 - 0.0313i,-0.0180 - 0.0450i,5.2678e-04 - 8.8125e-05i,-2.6696e-04 + 5.1253e-05i,-0.0000 + 0.0020i,0.0010 + 0.0028i,3.1170
