# Trade Feeds Demoes (FX Spot Orders and Trades)
#### I use real feeds from MOEX, they are over 250M+ and come packed, we have to unpack them first

In [33]:
%%sh
gunzip /opt/app/data/data/Trades/*.gz

gunzip: /opt/app/data/data/Trades/*.gz: No such file or directory


#### You should see these two files in the data/Trades folder now

In [34]:
%%sh
ls -la -h /opt/app/data/data/Trades/

total 260648
drwxr-xr-x    2 root     root        4.0K Apr 12 15:09 .
drwxr-xr-x    1 root     root        4.0K Apr 12 15:02 ..
-rw-r--r--    1 root     root      253.2M Apr 12 15:09 TopofBook20130206.txt
-rw-r--r--    1 root     root        1.3M Apr 12 15:09 TradeLog20130206.txt


#### Let's import the feed into a dataframe
It's 6M+ records and will take some time

In [3]:
import pandas as pd
import numpy as np

_feed = '/opt/app/data/data/Trades/TopofBook20130206.txt'
%time data = pd.read_csv(_feed, low_memory=False)

CPU times: user 5.14 s, sys: 1.25 s, total: 6.39 s
Wall time: 6.79 s


#### Let's inspect the dataframe

In [35]:
data.shape

(6328950, 6)

In [4]:
data.dtypes

SECCODE     object
BUYSELL     object
TIME         int64
TRADENO    float64
PRICE       object
VOLUME      object
dtype: object

#### As you can see above, some of the column types are object and we need to convert them to ones that will allow better manipulation

In [5]:
%time data.TIME = pd.to_datetime(data.TIME, format='%H%M%S%f')

CPU times: user 26.4 s, sys: 530 ms, total: 26.9 s
Wall time: 26.6 s


#### We converted the datetime column to proper format, let's see what we got:

In [6]:
data.dtypes

SECCODE            object
BUYSELL            object
TIME       datetime64[ns]
TRADENO           float64
PRICE              object
VOLUME             object
dtype: object

In [7]:
data.head(5)

Unnamed: 0,SECCODE,BUYSELL,TIME,TRADENO,PRICE,VOLUME
0,USD000UTSTOM,S,1900-01-01 10:00:00.192,,30.099,5000000
1,BKTRUB000TOM,S,1900-01-01 10:00:00.205,,34.9255,2000000
2,USD000000TOD,S,1900-01-01 10:00:00.214,,30.096,3000000
3,USD000UTSTOM,S,1900-01-01 10:00:00.219,,30.049,5000000
4,USD000000TOD,S,1900-01-01 10:00:00.222,,-,-


#### Let's see instruments that were traded in this session

In [9]:
data.SECCODE.unique()

array(['USD000UTSTOM', 'BKTRUB000TOM', 'USD000000TOD', 'EUR_RUB__TOD',
       'EUR_RUB__TOM', 'EUR000TODTOM', 'USD000TODTOM', 'EURUSDTODTOM',
       'EURUSD000TOM', 'EURUSD000TOD', 'CNY000000TOD', 'USDRUB_TOM1M',
       'USDRUB_TOM1W', 'USDRUB_TOM1Y', 'USDRUB_TOM1D', 'USDRUB_TOM2M',
       'USDRUB_TOM2W', 'USDRUB_TOM3M', 'USDRUB_TOM6M', 'USDRUB_TOM9M',
       'BYR_RUB__TOM'], dtype=object)

#### Let's count number of orders per instrument

In [10]:
%time ticker_counts = data.SECCODE.value_counts()

CPU times: user 980 ms, sys: 10 ms, total: 990 ms
Wall time: 983 ms


#### Let's query for a particular instrument put orders

In [9]:
%time data[(data.SECCODE=='USD000UTSTOM') & (data.BUYSELL=='S')].head(5)

CPU times: user 1.58 s, sys: 50 ms, total: 1.63 s
Wall time: 1.62 s


Unnamed: 0,SECCODE,BUYSELL,TIME,TRADENO,PRICE,VOLUME
0,USD000UTSTOM,S,1900-01-01 10:00:00.192,,30.099,5000000
3,USD000UTSTOM,S,1900-01-01 10:00:00.219,,30.049,5000000
100,USD000UTSTOM,S,1900-01-01 10:00:00.575,,30.049,4999000
101,USD000UTSTOM,S,1900-01-01 10:00:00.575,76139178.0,30.049,1000
545,USD000UTSTOM,S,1900-01-01 10:00:00.835,,30.0485,700000


#### Let's plot a little

In [40]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

In [38]:
print(ticker_counts.values)
ticker_counts.index

[2405788 1549432 1513218  536156  159622  117919   38133    3071    2006
     958     754     529     477     316     316     174      26      19
      17      17       2]


Index(['EUR_RUB__TOM', 'BKTRUB000TOM', 'USD000UTSTOM', 'EUR_RUB__TOD',
       'USD000000TOD', 'EURUSD000TOM', 'EURUSD000TOD', 'USD000TODTOM',
       'EUR000TODTOM', 'USDRUB_TOM6M', 'USDRUB_TOM1Y', 'CNY000000TOD',
       'USDRUB_TOM9M', 'USDRUB_TOM1M', 'USDRUB_TOM3M', 'EURUSDTODTOM',
       'USDRUB_TOM1W', 'USDRUB_TOM1D', 'USDRUB_TOM2W', 'USDRUB_TOM2M',
       'BYR_RUB__TOM'],
      dtype='object')

In [39]:
plot

<function plotly.offline.offline.plot(figure_or_data, show_link=True, link_text='Export to plot.ly', validate=True, output_type='file', include_plotlyjs=True, filename='temp-plot.html', auto_open=True, image=None, image_filename='plot_image', image_width=800, image_height=600, config=None)>

In [49]:
import sys

In [50]:
sys.getsizeof(data)

1758767781