In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
import paramiko

from statistic import Statistic
from statistic import *
from FslAccessor import FslAccessor

import glob
import random
from tqdm import tqdm_notebook as tqdm
import os
import numpy as np

import matplotlib.pyplot as plt

In [2]:
# Standard values in a sliding window
def get_mean_in_steps(curve, interval_size, step):
    """ Takes the mean on a sliding window of the curve.
    :param curve: a 1d signal (numpy array of real numbers)
    :param interval_size: the size of the sliding window (positive integer)
    :param step: the step size in the sliding window (positive integer)
    :returns: a list which contains the mean of each segment
    """
    len_curve = len(curve)
    start_index = 0
    out = []
    while start_index + interval_size < len_curve:
        signal = curve[start_index: start_index + interval_size]
        mean = np.mean(signal)
        out.append(mean)
        start_index += step
    return out

def get_var_in_steps(curve, interval_size, step):
    """ Takes the variance on a sliding window of the curve.
    :param curve: a 1d signal (numpy array of real numbers)
    :param interval_size: the size of the sliding window (positive integer)
    :param step: the step size in the sliding window (positive integer)
    :returns: a list which contains the variance of each segment
    """
    len_curve = len(curve)
    start_index = 0
    out = []
    while start_index + interval_size < len_curve:
        signal = curve[start_index: start_index + interval_size]
        var = np.var(signal)
        out.append(var)
        start_index += step
    return out

# Now we get some spectral sliding window measurements
def fourier_over_time_overlap_log(curve, interval_size, step):
    """ takes a curve does a fourier transform on a sliding window of the curve.
    :param curve: a 1d signal (numpy array of real numbers)
    :param interval_size: the size of the sliding window (positive integer)
    :param step: the step size in the sliding window (positive integer)
    :returns: a list length num_splits which contains the fourier signal of each segment
    """
    len_curve = len(curve)
    start_index = 0
    out = []
    while start_index + interval_size < len_curve:
        signal = curve[start_index: start_index + interval_size]
        freq = np.log(np.fft.fftshift(np.abs(np.fft.fft(signal)))) 
        #freq = np.fft.fftshift(np.abs(np.fft.fft(signal)))
        out.append(freq)
        start_index += step
    return out

# a single measure (weighted mean) from the spectral data
def mean_log_frequency(my_signals):
    """turns a list of log fourier signals into a list of the same size of the mean of those signals,
    weighted by their index (to indicate their frequency) - This is one possible feature we can extract from these signals.
    :param my_signals: a list which contains the fourier signals
    :returns: a list of the same size with the weighted means
    """
    n = len(my_signals)
    weights = np.linspace(0, 0.5, len(my_signals[0]))
    out = []
    for signal in my_signals:
        weighted_signal = np.multiply(signal, weights)
        out.append(np.mean(weighted_signal))
    return out

# binning the spectral data and getting the mean and variance
def binned_fourier_means(my_signals, n):
    """
    Takes a list of fourier signals into a list of n lists of means, where each list is a single bin from the,
    original signals.
    :param my_signals: a list which contains the fourier signals
    :param n: int, the number of bins
    returns: a list of the same length as my_signals whith each entry in the list being a list of length n 
            (each entry of this list being the bin means)
    """
    out = []
    for signal in my_signals:
        length = len(singal) / n
        index = 0
        bin_vals = []
        for i in range(n):
            average_val = np.mean(signal(index:index+length))
            bin_vals.append(average_val)
            index += length
        out.append(bin_vals)
    return out

def binned_fourier_vars(my_signals, n):
    """
    Takes a list of fourier signals into a list of n lists of variances, where each list is a single bin from the,
    original signals.
    :param my_signals: a list which contains the fourier signals
    :param n: int, the number of bins
    returns: a list of the same length as my_signals whith each entry in the list being a list of length n 
            (each entry of this list being the bin variances)
    """
    out = []
    for signal in my_signals:
        length = len(singal) / n
        index = 0
        bin_vals = []
        for i in range(n):
            var = np.var(signal(index:index+length))
            bin_vals.append(var)
            index += length
        out.append(bin_vals)
    return out

 Lets get sliding window values for our three types of data measurements: our options are to
 1. sample from each window, 
 2. use the first/last/middle value of each window or 
 3. use the mean of each window.
 
Based on the results of the cell below it looks like they all typically have a small coefficient of variation (ratio of std deviation to the mean) so I think that taking the mean in these windows should be fine.

In [None]:
day = "110617"
a_day = "fsl_groups/fslg_market_robustness/compute/NASDAQ/ProcessedData/{0}/".format(day)
csv_ptn = a_day + '{0}_*.csv.gz'.format(day)

fa = FslAccessor()
#ls = fa.connect_apply(fa.lst_files, path=csv_ptn)
#print(ls)
lst = fa.pull_obaos(path=csv_ptn, f_num=100)

In [None]:
flux_low = np.zeros(int((16 - 9.5) * 3600 / 0.01))

for stock in tqdm(lst):
    if stock is not None:
        stat = Statistic(stock)
        dd = stat.delta_depth_flux_mat(d_t=0.01)
        time, trace = dd[:, 0], dd[:, 1]
        flux_low += trace

In [None]:
print(np.shape(flux_low))

In [None]:
np.save("delta_depth_flux_{0}_low.npy".format(day), flux_low)

In [None]:
flux_d = np.zeros(int((16 - 9.5) * 3600 / 0.01))

for stock in tqdm(lst):
    if stock is not None:
        stat = Statistic(stock)

        dd = stat.depth_flux_mat(d_t=0.01)
        time, trace = dd[:, 0], dd[:, 1]
        flux_d += trace

In [None]:
print(np.shape(flux_d))

In [None]:
np.save("depth_flux_{0}_low.npy".format(day), flux_d)

In [None]:
flux_m = np.zeros(int((16 - 9.5) * 3600 / 0.01))

for stock in tqdm(lst):
    if stock is not None:
        stat = Statistic(stock)

        dd = stat.message_flux_mat(d_t=0.01)
        time, trace = dd[:, 0], dd[:, 1]
        flux_m += trace

In [None]:
print(np.shape(flux_m))

In [None]:
np.save("message_flux_{0}_low.npy".format(day), flux_m)

In [None]:
# start over from what we saved
import numpy as np

flux_low = np.load("delta_depth_flux_{0}_low.npy".format(day))
flux_d = np.load("depth_flux_{0}_low.npy".format(day))
flux_m = np.load("message_flux_{0}_low.npy".format(day))

In [None]:
mean_log_freq_delta_depth = mean_log_frequency(fourier_over_time_overlap_log(flux_low, 360000, 1000))
# If needed we could go up to steps as small as 100 with this particular data set, it takes a bit but it will run.

In [None]:
np.save("mlf_delta_depth_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_delta_depth)


In [None]:
mean_log_freq_depth = mean_log_frequency(fourier_over_time_overlap_log(flux_d, 360000, 1000))


In [None]:
np.save("mlf_depth_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_depth)


In [None]:
mean_log_freq_message = mean_log_frequency(fourier_over_time_overlap_log(flux_m, 360000, 1000))


In [None]:
np.save("mlf_message_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_message)

In [None]:
flux_dd_steps = get_mean_in_steps(flux_low, 360000, 1000)

In [None]:
np.save("avg_delta_depth_{0}_low_hour_window_step1000.npy".format(day), flux_dd_steps)

In [None]:
flux_d_steps = get_mean_in_steps(flux_d, 360000, 1000)

In [None]:
np.save("avg_depth_{0}_low_hour_window_step1000.npy".format(day), flux_d_steps)

In [None]:
flux_m_steps = get_mean_in_steps(flux_m, 360000, 1000)

In [None]:
np.save("avg_message_{0}_low_hour_window_step1000.npy".format(day), flux_m_steps)

In [None]:
# Get even more features!!!
days = ['101416', '102116', '102816', '111917', '112617', '120317', '110617', '103117', '111317', '062516', '071016', '091316', '110816', '022817', '042617', '110617', '072517', '121217']
print(len(days), type(days[6]))
stat.ask_flux_mat(dt=.01)

In [5]:
days = ['110617', '072517', '121217']

for day in days:
    print("The day is ", day)
    a_day = "fsl_groups/fslg_market_robustness/compute/NASDAQ/ProcessedData/{0}/".format(day)
    csv_ptn = a_day + '{0}_*.csv.gz'.format(day)

    fa = FslAccessor()
    lst = fa.pull_obaos(path=csv_ptn, f_num=50)

    flux_low = np.zeros(int((16 - 9.5) * 3600 / 0.01))

    for stock in tqdm(lst):
        if stock is not None:
            stat = Statistic(stock)
            dd = stat.delta_depth_flux_mat(d_t=0.01)
            time, trace = dd[:, 0], dd[:, 1]
            flux_low += trace

    print(np.shape(flux_low))

    np.save("delta_depth_flux_{0}_low.npy".format(day), flux_low)

    flux_d = np.zeros(int((16 - 9.5) * 3600 / 0.01))

    for stock in tqdm(lst):
        if stock is not None:
            stat = Statistic(stock)

            dd = stat.depth_flux_mat(d_t=0.01)
            time, trace = dd[:, 0], dd[:, 1]
            flux_d += trace

    print(np.shape(flux_d))

    np.save("depth_flux_{0}_low.npy".format(day), flux_d)

    flux_m = np.zeros(int((16 - 9.5) * 3600 / 0.01))

    for stock in tqdm(lst):
        if stock is not None:
            stat = Statistic(stock)

            dd = stat.message_flux_mat(d_t=0.01)
            time, trace = dd[:, 0], dd[:, 1]
            flux_m += trace

    print(np.shape(flux_m))

    np.save("message_flux_{0}_low.npy".format(day), flux_m)

    # Now lets get the bid and the ask (aggragated, I think... 
    #   but IDK I just modified Humphrey's code without totally understanding it.)

    bid_low = np.zeros(int((16 - 9.5) * 3600 / 0.01))

    for stock in tqdm(lst):
        stat = Statistic(stock)
        if stat.obao is not None:
            dd = stat.bid_flux_mat(d_t=0.01)
            time, trace = dd[:, 0], dd[:, 1]
            bid_low += trace


    np.save("bid_{0}_low.npy".format(day), bid_low)

    ask_low = np.zeros(int((16 - 9.5) * 3600 / 0.01))

    for stock in tqdm(lst):
        stat = Statistic(stock)
        if stat.obao is not None:
            dd = stat.ask_flux_mat(d_t=0.01)
            time, trace = dd[:, 0], dd[:, 1]
            ask_low += trace


    np.save("ask_{0}_low.npy".format(day), ask_low)

    # start over from what we saved
    import numpy as np

    flux_low = np.load("delta_depth_flux_{0}_low.npy".format(day))
    flux_d = np.load("depth_flux_{0}_low.npy".format(day))
    flux_m = np.load("message_flux_{0}_low.npy".format(day))
    ask_low = np.load("ask_{0}_low.npy".format(day))
    bid_low = np.load("bid_{0}_low.npy".format(day))

    # Collect the spectral activity
    mean_log_freq_delta_depth = mean_log_frequency(fourier_over_time_overlap_log(flux_low, 360000, 1000))
    np.save("mlf_delta_depth_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_delta_depth)

    mean_log_freq_depth = mean_log_frequency(fourier_over_time_overlap_log(flux_d, 360000, 1000))
    np.save("mlf_depth_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_depth)

    mean_log_freq_message = mean_log_frequency(fourier_over_time_overlap_log(flux_m, 360000, 1000))
    np.save("mlf_message_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_message)

    mean_log_freq_bid = mean_log_frequency(fourier_over_time_overlap_log(bid_low, 360000, 1000))
    np.save("mlf_bid_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_bid)

    mean_log_freq_ask = mean_log_frequency(fourier_over_time_overlap_log(ask_low, 360000, 1000))
    np.save("mlf_ask_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_ask)


    #Collect the mean
    flux_dd_steps = get_mean_in_steps(flux_low, 360000, 1000)
    np.save("avg_delta_depth_{0}_low_hour_window_step1000.npy".format(day), flux_dd_steps)

    flux_d_steps = get_mean_in_steps(flux_d, 360000, 1000)
    np.save("avg_depth_{0}_low_hour_window_step1000.npy".format(day), flux_d_steps)

    flux_m_steps = get_mean_in_steps(flux_m, 360000, 1000)
    np.save("avg_message_{0}_low_hour_window_step1000.npy".format(day), flux_m_steps)

    flux_ask_steps = get_mean_in_steps(ask_low, 360000, 1000)
    np.save("avg_ask_{0}_low_hour_window_step1000.npy".format(day), flux_ask_steps)

    flux_bid_steps = get_mean_in_steps(bid_low, 360000, 1000)
    np.save("avg_bid_{0}_low_hour_window_step1000.npy".format(day), flux_bid_steps)


    # We now collect the variance as a new feature.
    message_var_steps = get_var_in_steps(flux_m, 360000, 1000)
    np.save("message_var_{0}_low_hour_window_step1000.npy".format(day), message_var_steps)

    delta_var_steps = get_var_in_steps(flux_d, 360000, 1000)
    np.save("delta_var_{0}_low_hour_window_step1000.npy".format(day), delta_var_steps)

    dd_var_steps = get_var_in_steps(flux_low, 360000, 1000)
    np.save("delta_depth_var_{0}_low_hour_window_step1000.npy".format(day), dd_var_steps)

    ask_var_steps = get_var_in_steps(ask_low, 360000, 1000)
    np.save("ask_var_{0}_low_hour_window_step1000.npy".format(day), ask_var_steps)

    bid_var_steps = get_var_in_steps(bid_low, 360000, 1000)
    np.save("bid_var_{0}_low_hour_window_step1000.npy".format(day), bid_var_steps)

The day is  110617


A Jupyter Widget

Reading CSV...
Finished Reading CSV in 3.7978391647338867s!
Reading CSV...
Finished Reading CSV in 3.891468048095703s!
Reading CSV...
Finished Reading CSV in 3.5459561347961426s!
Reading CSV...
Finished Reading CSV in 3.782581090927124s!
Reading CSV...
Finished Reading CSV in 3.3932769298553467s!
Reading CSV...
Finished Reading CSV in 3.4076311588287354s!
Reading CSV...
Finished Reading CSV in 3.7368531227111816s!
Reading CSV...
Finished Reading CSV in 3.5933139324188232s!


Exception: Error reading SSH protocol banner
Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2138, in _check_banner
    buf = self.packetizer.readline(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 367, in readline
    buf += self._read_timeout(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 576, in _read_timeout
    raise socket.timeout()
socket.timeout

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 1966, in run
    self._check_banner()
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2143, in _check_banner
    "Error reading SSH protocol banner" + str(e)
paramiko.ssh_exception.SSHException: Error reading SSH protocol banner



Error reading SSH protocol banner
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
Reading CSV...
Finished Reading CSV in 3.5240259170532227s!
Reading CSV...
Finished Reading CSV in 3.5288658142089844s!
Reading CSV...
Finished Reading CSV in 3.5180141925811768s!
Reading CSV...
Finished Reading CSV in 3.682436943054199s!
Reading CSV...
Finished Reading CSV in 3.6844491958618164s!
Reading CSV...
Finished Reading CSV in 3.5617079734802246s!
Reading CSV...
Finished Reading CSV in 4.085468053817749s!
Reading CSV...
Finished Reading CSV in 3.8371870517730713s!
Reading CSV...
Finished Reading CSV in 3.7050578594207764s!


Exception: Error reading SSH protocol banner
Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2138, in _check_banner
    buf = self.packetizer.readline(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 367, in readline
    buf += self._read_timeout(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 576, in _read_timeout
    raise socket.timeout()
socket.timeout

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 1966, in run
    self._check_banner()
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2143, in _check_banner
    "Error reading SSH protocol banner" + str(e)
paramiko.ssh_exception.SSHException: Error reading SSH protocol banner



Error reading SSH protocol banner
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out



A Jupyter Widget


(2340000,)


A Jupyter Widget


(2340000,)


A Jupyter Widget


(2340000,)


A Jupyter Widget




A Jupyter Widget




  


The day is  072517


A Jupyter Widget

Reading CSV...
Finished Reading CSV in 3.6616082191467285s!
Reading CSV...
Finished Reading CSV in 3.85624098777771s!
Reading CSV...
Finished Reading CSV in 3.5408358573913574s!
Reading CSV...
Finished Reading CSV in 3.6452252864837646s!
Reading CSV...
Finished Reading CSV in 3.3922410011291504s!
Reading CSV...
Finished Reading CSV in 3.3337371349334717s!
Reading CSV...
Finished Reading CSV in 4.582437753677368s!
Reading CSV...
Finished Reading CSV in 3.5339338779449463s!
Reading CSV...
Finished Reading CSV in 3.5716729164123535s!
Reading CSV...
Finished Reading CSV in 3.579061985015869s!
Reading CSV...
Finished Reading CSV in 3.710576295852661s!
Reading CSV...
Finished Reading CSV in 3.561954975128174s!
Reading CSV...
Finished Reading CSV in 3.605067014694214s!
Reading CSV...
Finished Reading CSV in 3.8046700954437256s!
Reading CSV...
Finished Reading CSV in 3.349731922149658s!
Reading CSV...
Finished Reading CSV in 3.6690919399261475s!
Reading CSV...
Finished Reading CSV in 3.6553049

Exception: Error reading SSH protocol banner
Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2138, in _check_banner
    buf = self.packetizer.readline(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 367, in readline
    buf += self._read_timeout(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 576, in _read_timeout
    raise socket.timeout()
socket.timeout

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 1966, in run
    self._check_banner()
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2143, in _check_banner
    "Error reading SSH protocol banner" + str(e)
paramiko.ssh_exception.SSHException: Error reading SSH protocol banner



Error reading SSH protocol banner
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
Reading CSV...
Finished Reading CSV in 3.346498966217041s!
Reading CSV...
Finished Reading CSV in 3.634530782699585s!
Reading CSV...
Finished Reading CSV in 3.6337931156158447s!
Reading CSV...
Finished Reading CSV in 3.498530864715576s!
Reading CSV...
F

A Jupyter Widget


(2340000,)


A Jupyter Widget


(2340000,)


A Jupyter Widget


(2340000,)


A Jupyter Widget




A Jupyter Widget


The day is  121217


A Jupyter Widget

Reading CSV...
Finished Reading CSV in 3.6583099365234375s!
Reading CSV...
Finished Reading CSV in 3.8337080478668213s!
Reading CSV...
Finished Reading CSV in 3.5006070137023926s!
Reading CSV...
Finished Reading CSV in 3.6585278511047363s!
Reading CSV...
Finished Reading CSV in 3.4634649753570557s!
Reading CSV...
Finished Reading CSV in 3.395301103591919s!
Reading CSV...
Finished Reading CSV in 3.671048879623413s!
Reading CSV...
Finished Reading CSV in 3.549057722091675s!
Reading CSV...
Finished Reading CSV in 3.549743890762329s!
Reading CSV...
Finished Reading CSV in 3.9875099658966064s!
Reading CSV...
Finished Reading CSV in 3.6548023223876953s!
Reading CSV...
Finished Reading CSV in 3.502640962600708s!
Reading CSV...
Finished Reading CSV in 3.512917995452881s!
Reading CSV...
Finished Reading CSV in 3.880220890045166s!
Reading CSV...
Finished Reading CSV in 3.3661019802093506s!
Reading CSV...
Finished Reading CSV in 3.6669881343841553s!


Exception: Error reading SSH protocol banner
Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2138, in _check_banner
    buf = self.packetizer.readline(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 367, in readline
    buf += self._read_timeout(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 576, in _read_timeout
    raise socket.timeout()
socket.timeout

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 1966, in run
    self._check_banner()
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2143, in _check_banner
    "Error reading SSH protocol banner" + str(e)
paramiko.ssh_exception.SSHException: Error reading SSH protocol banner



Error reading SSH protocol banner
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
[Errno 60] Operation timed out
Reading CSV...
Finished Reading CSV in 3.6860220432281494s!
Reading CSV...
Finished Reading CSV in 5.353327035903931s!
Reading CSV...
Finished Reading CSV in 3.5284979343414307s!
Reading CSV...
Finished Reading CSV in 3.3885350227355957s!
Reading CSV...
Finished Reading CSV in 3.704927682876587s!
Reading CSV...
Fi

Exception: Error reading SSH protocol banner
Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2138, in _check_banner
    buf = self.packetizer.readline(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 367, in readline
    buf += self._read_timeout(timeout)
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/packet.py", line 576, in _read_timeout
    raise socket.timeout()
socket.timeout

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 1966, in run
    self._check_banner()
  File "/Users/Charles/anaconda3/lib/python3.6/site-packages/paramiko/transport.py", line 2143, in _check_banner
    "Error reading SSH protocol banner" + str(e)
paramiko.ssh_exception.SSHException: Error reading SSH protocol banner



Error reading SSH protocol banner
[Errno 60] Operation timed out
[Errno 60] Operation timed out



A Jupyter Widget


(2340000,)


A Jupyter Widget


(2340000,)


A Jupyter Widget


(2340000,)


A Jupyter Widget




A Jupyter Widget




In [None]:
smaller_stocks = ['IGZ', 'IBO', 'CBX', 'CBO', 'ZBZX', 'MTFB', 'FMAX', 'ZVZZC', 'ZXYZ.A', 
                  'EVLMC', 'EVGBC', 'EVSTC', 'ZNWAA', 'GJO', 'SGYPU', 'TROVU', 'MFLA', 
                  'EMSA', 'EMLB', 'PFK', 'IGEM', 'SPE-B', 'OVLC', 'GDL-B', 'CCZ', 'OFG-D', 
                  'CUBI-D', 'GLU-A', 'SOV-C', 'HL-B', 'ITMS', 'ITML', 'BRG-C', 'CBMXW', 
                  'CUBI-E', 'CMS-B', 'NMK-C', 'GPT-A', 'MTB-', 'BCV-A', 'GJV', 'CVB', 
                  'GAB-D', 'XKE', 'HTF', 'STLR', 'GAB-G', 'KYN-F', 'ABRN', 'GGZ-A', 'WYIG', 
                  'DESC', 'PYT', 'PW-A', 'CPAA', 'ASB-C', 'ZAZZT', 'GCV-B', 'WBIR', 'PPS-A',
                  'SUI-A', 'GAB-H', 'PCG-I']
print(len(smaller_stocks))

In [None]:
bid_low = np.zeros(int((16 - 9.5) * 3600 / 0.01))

for stock in tqdm(lst):
    stat = Statistic(stock)
    if stat.obao is not None:
        dd = stat.bid_flux_mat(d_t=0.01)
        time, trace = dd[:, 0], dd[:, 1]
        bid_low += trace


np.save("bid_{0}_low.npy".format(day), bid_low)

ask_low = np.zeros(int((16 - 9.5) * 3600 / 0.01))

for stock in tqdm(lst):
    stat = Statistic(stock)
    if stat.obao is not None:
        dd = stat.ask_flux_mat(d_t=0.01)
        time, trace = dd[:, 0], dd[:, 1]
        ask_low += trace


np.save("ask_{0}_low.npy".format(day), ask_low)

# start over from what we saved
import numpy as np

flux_low = np.load("delta_depth_flux_{0}_low.npy".format(day))
flux_d = np.load("depth_flux_{0}_low.npy".format(day))
flux_m = np.load("message_flux_{0}_low.npy".format(day))
ask_low = np.load("ask_{0}_low.npy".format(day))
bid_low = np.load("bid_{0}_low.npy".format(day))

# Collect the spectral activity
mean_log_freq_delta_depth = mean_log_frequency(fourier_over_time_overlap_log(flux_low, 360000, 1000))
np.save("mlf_delta_depth_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_delta_depth)

mean_log_freq_depth = mean_log_frequency(fourier_over_time_overlap_log(flux_d, 360000, 1000))
np.save("mlf_depth_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_depth)

mean_log_freq_message = mean_log_frequency(fourier_over_time_overlap_log(flux_m, 360000, 1000))
np.save("mlf_message_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_message)

mean_log_freq_bid = mean_log_frequency(fourier_over_time_overlap_log(bid_low, 360000, 1000))
np.save("mlf_bid_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_bid)

mean_log_freq_ask = mean_log_frequency(fourier_over_time_overlap_log(ask_low, 360000, 1000))
np.save("mlf_ask_{0}_low_hour_window_step1000.npy".format(day), mean_log_freq_ask)


#Collect the mean
flux_dd_steps = get_mean_in_steps(flux_low, 360000, 1000)
np.save("avg_delta_depth_{0}_low_hour_window_step1000.npy".format(day), flux_dd_steps)

flux_d_steps = get_mean_in_steps(flux_d, 360000, 1000)
np.save("avg_depth_{0}_low_hour_window_step1000.npy".format(day), flux_d_steps)

flux_m_steps = get_mean_in_steps(flux_m, 360000, 1000)
np.save("avg_message_{0}_low_hour_window_step1000.npy".format(day), flux_m_steps)

flux_ask_steps = get_mean_in_steps(ask_low, 360000, 1000)
np.save("avg_ask_{0}_low_hour_window_step1000.npy".format(day), flux_ask_steps)

flux_bid_steps = get_mean_in_steps(bid_low, 360000, 1000)
np.save("avg_bid_{0}_low_hour_window_step1000.npy".format(day), flux_bid_steps)


# We now collect the variance as a new feature.
message_var_steps = get_var_in_steps(flux_m, 360000, 1000)
np.save("message_var_{0}_low_hour_window_step1000.npy".format(day), message_var_steps)

delta_var_steps = get_var_in_steps(flux_d, 360000, 1000)
np.save("delta_var_{0}_low_hour_window_step1000.npy".format(day), delta_var_steps)

dd_var_steps = get_var_in_steps(flux_low, 360000, 1000)
np.save("delta_depth_var_{0}_low_hour_window_step1000.npy".format(day), dd_var_steps)

ask_var_steps = get_var_in_steps(ask_low, 360000, 1000)
np.save("ask_var_{0}_low_hour_window_step1000.npy".format(day), ask_var_steps)

bid_var_steps = get_var_in_steps(bid_low, 360000, 1000)
np.save("bid_var_{0}_low_hour_window_step1000.npy".format(day), bid_var_steps)

In [None]:
plt.plot(time / 3600, ask_low)
plt.plot(time / 3600, bid_low)

plt.show()