# A notebook to build and test the feature calculation functionality
We will build the moving average calculation

## Imports and set up Django environment

In [6]:
from datetime import timedelta
import math
import os
import pandas as pd
import django
from django.db import connection

from django.utils import timezone
import logging
import numpy as np
from IPython.core.display import display, HTML
os.chdir('..')

# Allows async calls to django ORM in Jupyter. Required.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rest.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

from pricedata import models as pd_models
from feature import models as ft_models
from algobuilder.utils import DatabaseUtility

## Get a single feature execution for moving average and its datasource symbols
It doesn't matter which feature execution we use

In [7]:
ft = ft_models.Feature.objects.filter(name='MovingAverage').first()
ft

Feature(name=MovingAverage, pluginclass=MovingAverage, active=True

In [19]:
ftex = ft_models.FeatureExecution.objects.filter(feature=ft).first()
ftex

8

In [28]:
ftex_symbols = ftex.featureexecutiondatasourcesymbol_set.all()
ftex_symbols

<QuerySet [FeatureExecutionSymbol(feature_execution=Feature: MovingAverage candle_period: 10S calculation_period: 1H., datasource_symbol=datasource=MT5, symbol=EURGBP, retrieve_price_data=True, active=True, FeatureExecutionSymbol(feature_execution=Feature: MovingAverage candle_period: 10S calculation_period: 1H., datasource_symbol=datasource=MT5, symbol=EURJPY, retrieve_price_data=True, active=True]>

## Get the time for the next feature calculation.
This will be the first common candle time available across all datasource symbols required for the feature calculation that is after the last feature calculation.
This is easiest to do in raw SQL

In [32]:
# SQL
sql = \
"""
SELECT min(times.time)
FROM
	(
		SELECT cnd.time as time
		FROM pricedata_candle cnd
		WHERE cnd.datasource_symbol_id in
			(
				SELECT datasource_symbol_id
				FROM feature_featureexecutiondatasourcesymbol feds
					INNER JOIN pricedata_datasourcesymbol dss ON feds.datasource_symbol_id = dss.id
				WHERE feds.feature_execution_id = %s
			)
		GROUP BY cnd.time
		HAVING count(*) = 
			(
				SELECT count(datasource_symbol_id)
				FROM feature_featureexecutiondatasourcesymbol feds
					INNER JOIN pricedata_datasourcesymbol dss ON feds.datasource_symbol_id = dss.id
				WHERE feds.feature_execution_id = %s
			)
	) as times

"""

# If we have calculated this feature before, get the last calculation time and append to sql query
last_calc_exists = ft_models.FeatureExecutionResult.objects.filter(feature_execution=ftex).last() is not None
last_calc_time = None
if last_calc_exists:
    last_calc = ft_models.FeatureExecutionResult.objects.filter(feature_execution=ftex).latest("time")
    last_calc_time = last_calc.time
    sql += f" WHERE times.time > '{last_calc_time}'"

# Run the SQL
with connection.cursor() as cursor:
    cursor.execute(sql, [ftex.id, ftex.id])
    row = cursor.fetchone()

next_calc_time = row[0]

next_calc_time
    


## Get a dataframe of all candles that haven't been calculated and those that are required for the calculations.
e.g. for a 30 day moving average we will start at next_candle - 30 days

We will be calculating using the time offset, so index will need to be time

There is only one symbol required for moving average so we we will specify it

In [30]:
# Get the data source symbol
ds_symbol = ftex_symbols[0].datasource_symbol

# From date for candle data is next candle - calculation period
cp_td = pd.to_timedelta(ftex.calculation_period)
from_date = next_calc_time - cp_td

# QuerySet to get the candles, then convert to dataframe, sorted by time. Set index to time
candles = pd_models.Candle.objects.filter(datasource_symbol=ds_symbol, time__gte=from_date).all()
df = pd.DataFrame(list(candles.values())).sort_values(by='time', ascending=True).set_index('time')
df

Unnamed: 0_level_0,id,datasource_symbol_id,period,bid_open,bid_high,bid_low,bid_close,ask_open,ask_high,ask_low,ask_close,volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-08-03 00:00:00+00:00,5351,1390,1H,0.854240,0.854900,0.854200,0.854490,0.854970,0.855630,0.854930,0.855220,169
2021-08-03 01:00:00+00:00,5352,1390,1H,0.854490,0.854880,0.854380,0.854700,0.854670,0.855060,0.854560,0.854880,255
2021-08-03 02:00:00+00:00,5353,1390,1H,0.854700,0.854800,0.854630,0.854690,0.854880,0.854980,0.854810,0.854870,319
2021-08-03 03:00:00+00:00,5354,1390,1H,0.854690,0.854770,0.854150,0.854310,0.854860,0.854940,0.854320,0.854480,659
2021-08-03 04:00:00+00:00,5355,1390,1H,0.854310,0.854550,0.854200,0.854350,0.854480,0.854720,0.854370,0.854520,910
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-18 02:00:00+00:00,379546,1390,1H,0.851890,0.852100,0.851880,0.852070,0.852060,0.852270,0.852050,0.852240,228
2021-08-18 03:00:00+00:00,379659,1390,1H,0.852070,0.852360,0.851970,0.852220,0.852240,0.852530,0.852140,0.852390,563
2021-08-18 04:00:00+00:00,379773,1390,1H,0.852220,0.852280,0.851820,0.851900,0.852390,0.852450,0.851990,0.852070,501
2021-08-18 05:00:00+00:00,379887,1390,1H,0.851900,0.852220,0.851860,0.851960,0.852080,0.852400,0.852040,0.852140,753


# Calculate the moving average
We will use bid_close

In [133]:
df['moving_average'] = df['bid_close'].rolling(ftex.calculation_period).mean()
df

Unnamed: 0_level_0,id,datasource_symbol_id,period,bid_open,bid_high,bid_low,bid_close,ask_open,ask_high,ask_low,ask_close,volume,moving_average
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-08-03 00:00:00+00:00,5351,1390,1H,0.854240,0.854900,0.854200,0.854490,0.854970,0.855630,0.854930,0.855220,169,0.85449
2021-08-03 01:00:00+00:00,5352,1390,1H,0.854490,0.854880,0.854380,0.854700,0.854670,0.855060,0.854560,0.854880,255,0.85470
2021-08-03 02:00:00+00:00,5353,1390,1H,0.854700,0.854800,0.854630,0.854690,0.854880,0.854980,0.854810,0.854870,319,0.85469
2021-08-03 03:00:00+00:00,5354,1390,1H,0.854690,0.854770,0.854150,0.854310,0.854860,0.854940,0.854320,0.854480,659,0.85431
2021-08-03 04:00:00+00:00,5355,1390,1H,0.854310,0.854550,0.854200,0.854350,0.854480,0.854720,0.854370,0.854520,910,0.85435
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-18 02:00:00+00:00,379546,1390,1H,0.851890,0.852100,0.851880,0.852070,0.852060,0.852270,0.852050,0.852240,228,0.85207
2021-08-18 03:00:00+00:00,379659,1390,1H,0.852070,0.852360,0.851970,0.852220,0.852240,0.852530,0.852140,0.852390,563,0.85222
2021-08-18 04:00:00+00:00,379773,1390,1H,0.852220,0.852280,0.851820,0.851900,0.852390,0.852450,0.851990,0.852070,501,0.85190
2021-08-18 05:00:00+00:00,379887,1390,1H,0.851900,0.852220,0.851860,0.851960,0.852080,0.852400,0.852040,0.852140,753,0.85196


# Reshape the dataframe for upload into the feature_execution_result table.
This will require candle_id, feature_execution_id and result

In [134]:
df = df.reset_index()
df = df.rename(columns={'id': 'candle_id'})
df['feature_execution_id'] = ftex.id
df['result'] = df['moving_average']
df = df.drop(labels=df.columns.difference(['candle_id','feature_execution_id', 'result']), axis=1)
df

Unnamed: 0,candle_id,feature_execution_id,result
0,5351,8,0.85449
1,5352,8,0.85470
2,5353,8,0.85469
3,5354,8,0.85431
4,5355,8,0.85435
...,...,...,...
663,379546,8,0.85207
664,379659,8,0.85222
665,379773,8,0.85190
666,379887,8,0.85196


# Upload result

In [137]:
DatabaseUtility.bulk_insert_or_update(data=df, table=ft_models.FeatureExecutionResult.objects.model._meta.db_table, batch_size=1000)