In [None]:
import torch
import torch.nn as nn
import math
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
torch.manual_seed(0)
np.random.seed(0)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        continue
import joblib
import numpy as np
from smartapi import SmartConnect
from rich import  print
from smartapi import SmartWebSocket
import ipywidgets as widgets
import json
import pandas as pd
pd.set_option('plotting.backend', 'pandas_bokeh')
import pandas_bokeh
pandas_bokeh.output_notebook()
import datetime
import threading
import ast 
import http.client
import mimetypes
conn = http.client.HTTPSConnection("apiconnect.angelbroking.com")
import time
from dateutil import parser, tz
from tqdm import tqdm
import nsepython

obj=SmartConnect(api_key="ANGEL_HISTORICAL_DATA_API_KEY",
                #access_token = "your access token",
                #refresh_token = "your refresh_token"
                )

data = obj.generateSession("ANGEL_CLIENT_ID","ANGEL_PASSWORD")
refreshToken= data['data']['refreshToken']
feedToken=obj.getfeedToken()
userProfile= obj.getProfile(refreshToken)

headers = {
  'Authorization': f'Bearer {obj.access_token}',
  'Content-Type': 'application/json',
  'Accept': 'application/json',
  'X-UserType': 'USER',
  'X-SourceID': 'WEB',
  'X-ClientLocalIP': obj.clientLocalIP,
  'X-ClientPublicIP': obj.clientPublicIP,
  'X-MACAddress': obj.clientMacAddress,
  'X-PrivateKey': obj.api_key
}

print (f"LOGIN : {userProfile['data']['name']}")

calculate_loss_over_all_values = False

In [None]:
input_window = 300
output_window = 5
batch_size = 10 # batch size
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        #pe.requires_grad = False
        self.register_buffer('pe', pe)
    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class TransAm(nn.Module):
    def __init__(self,feature_size=30,num_layers=2,dropout=0.2):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        
        self.decoder = nn.Linear(feature_size,1)
        self.init_weights()
    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)
    def forward(self,src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src,self.src_mask)#, self.src_mask)
        output = self.decoder(output)
        return output
    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = np.append(input_data[i:i+tw][:-output_window] , output_window * [0])
        train_label = input_data[i:i+tw]
        #train_label = input_data[i+output_window:i+tw+output_window]
        inout_seq.append((train_seq ,train_label))
    return torch.FloatTensor(inout_seq)


def get_data2(inst):
    global scaler
    old_lst=[]
    interval='5minute'
    todaydt=datetime.date.today()
    hud_ago=todaydt-datetime.timedelta(days=50) #59
    to_date=datetime.date.isoformat(todaydt)
    from_date=datetime.date.isoformat(hud_ago)

    for i2 in range(1):
        new_lst = module.kite.historical_data(inst, from_date, to_date, interval,continuous=False)
        old_lst = new_lst + old_lst
        todaydt=todaydt-datetime.timedelta(days=51) #60
        hud_ago=hud_ago-datetime.timedelta(days=51) #60
        to_date=datetime.date.isoformat(todaydt)
        from_date=datetime.date.isoformat(hud_ago)
    df=pd.DataFrame(old_lst)
    df_nifty = df
    this_inst_df = df_nifty
    amplitude = this_inst_df['close'].to_numpy()[-905:]
    amplitude = amplitude.reshape(-1)
    scaler = MinMaxScaler(feature_range=(-15, 15)) 
    amplitude = scaler.fit_transform(amplitude.reshape(-1, 1)).reshape(-1)
    sampels = int(amplitude.shape[0]*0)
    train_data = amplitude[:sampels]
    test_data = amplitude
    train_sequence = create_inout_sequences(train_data,input_window)
    train_sequence = train_sequence[:-output_window]
    test_data = create_inout_sequences(test_data,input_window)
    test_data = test_data[:-output_window]
    return train_sequence.to(device),test_data.to(device)
def get_batch(source, i,batch_size):
    seq_len = min(batch_size, len(source) - 1 - i)
    data = source[i:i+seq_len]    
    input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window,1)) # 1 is feature size
    target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window,1))
    return input, target
def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    eval_batch_size = 1000
    with torch.no_grad():
        for i in range(0, len(data_source) - 1, eval_batch_size):
            data, targets = get_batch(data_source, i,eval_batch_size)
            output = eval_model(data)            
            if calculate_loss_over_all_values:
                total_loss += len(data[0])* criterion(output, targets).to(device).item()
            else:                                
                total_loss += len(data[0])* criterion(output[-output_window:], targets[-output_window:]).to(device).item()            
    return total_loss / len(data_source)

plot_counter = 0
def plot_and_loss(eval_model, data_source,epoch,tknip):
    global plot_counter
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i in range(0, len(data_source) - 1):
            data, target = get_batch(data_source, i,1)
            # look like the model returns static values for the output window
            output = eval_model(data)    
            if calculate_loss_over_all_values:                                
                total_loss += criterion(output, target).item()
            else:
                total_loss += criterion(output[-output_window:], target[-output_window:]).item()

            test_result = torch.cat((test_result.to(device), output[-1].view(-1).to(device)), 0) #todo: check this. -> looks good to me
            truth = torch.cat((truth.to(device), target[-1].view(-1).to(device)), 0)
    test_result = test_result.cpu().numpy()
    truth = truth.cpu().numpy()
    len(test_result)
    return total_loss / i


def predict_future_open(eval_model, data_source,steps,tkn):
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    _ , data = get_batch(data_source, 0,1)
    with torch.no_grad():
        for i in range(0, steps,1):
            input = torch.clone(data[-input_window:])
            input[-output_window:] = 0     
            output = eval_model(data[-input_window:])                        
            data = torch.cat((data, output[-1:]))
    data = data.cpu().view(-1)
    pyplot.plot(data,color="red")       
    pyplot.plot(data[:input_window],color="blue")
    pyplot.grid(True, which='both')
    pyplot.axhline(y=0, color='k')
    return data

def predict_future(eval_model, data_source,steps,tkn):
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    _ , data = get_batch(data_source, 0,1)
    with torch.no_grad():
        for i in range(0, steps,1):
            input = torch.clone(data[-input_window:])
            input[-output_window:] = 0     
            output = eval_model(data[-input_window:])                        
            data = torch.cat((data, output[-1:]))
    data = data.cpu().view(-1)
    pyplot.plot(data,color="red")       
    pyplot.plot(data[:input_window],color="blue")
    pyplot.grid(True, which='both')
    pyplot.axhline(y=0, color='k')
    pyplot.savefig(f'./nmnm/transformer-future_{plot_counter}_{steps}_{tkn}.png')
    pyplot.close()
        
model= torch.load('./best_model_multi8.pt',map_location=torch.device('cpu'))

In [None]:
# train_data, val_data = get_data2(2029825)
# predict_future(model,val_data,2000,2029825)

In [None]:
# look_up = 1001

# inst_check_list = [1793,5633,6401,3861249,2995969,25601,325121,6483969,40193,41729,54273,
#                    60417,5436929,70401,1510401,4267265,4268801]

# for one in tqdm(inst_check_list):
#     train_data, val_data = get_data2(one)
#     col_list = []

#     orig_data = np.array([])
#     orig_data

#     for one_part_point in range(15):   #  total_parts
#     #     print(val_data[-(300*(one_part_point+1))::].shape)
#         dpp = predict_future_open(model, val_data[-(300*(one_part_point+1))::],2000,123123)

#         col_list.append(np.append(orig_data,dpp))

#         orig_data = np.append(orig_data,dpp[:input_window])

#     #     col_list.append(dpp)
#     col_list.append(orig_data)
#     pyplot.savefig(f'./nmnm/test_plot.png')
#     pyplot.close()

#     plot_df = pd.DataFrame(col_list)
#     trps = plot_df.transpose()
#     trps.plot()
#     pd.DataFrame(orig_data).plot()
    
    
# #     predict_future(model,val_data,look_up,one)

In [None]:
col_list = []
orig_data = np.array([])
test_len = 15

for one_part_point in tqdm(range(test_len)):   #  total_parts
    dpp = predict_future_open(model, val_data[input_window*(one_part_point):input_window*(one_part_point+1)],1000,123123)
    col_list.append(np.append(orig_data,dpp))
    orig_data = np.append(orig_data,dpp[:input_window])

col_list.append(orig_data)
pyplot.savefig(f'./nmnm/test_plot.png')
pyplot.close()

plot_df = pd.DataFrame(col_list)
trps = plot_df.transpose()
trps.plot()

In [None]:
# for jj in range(8):
#     print(jj+1)
#     model= torch.load(f'./best_model_multi{jj+1}.pt',map_location=torch.device('cpu'))
    
#     col_list = []
#     orig_data = np.array([])
#     test_len = 8

#     for one_part_point in tqdm(range(test_len)):   #  total_parts
#         dpp = predict_future_open(model, val_data[input_window*(one_part_point):input_window*(one_part_point+1)],100,123123)
#         col_list.append(np.append(orig_data,dpp))
#         orig_data = np.append(orig_data,dpp[:input_window])

#     col_list.append(orig_data)
#     pyplot.savefig(f'./nmnm/test_plot.png')
#     pyplot.close()

#     plot_df = pd.DataFrame(col_list)
#     trps = plot_df.transpose()
#     trps.plot()
#     print('*'*60)
    

In [None]:
train_data, val_data = get_data2(3356417)

In [None]:
val_data[-1][0]

In [None]:
train_data, val_data = get_data2(3529217)
col_list = []
orig_data = np.array([])
test_len = 2
model= torch.load(f'./best_model_multi18.pt',map_location=torch.device('cpu'))
for one_part_point in tqdm(range(test_len)):   #  total_parts
    dpp = predict_future_open(model, val_data[input_window*(one_part_point):input_window*(one_part_point+1)],
                              1000,123123)
    mod = dpp[0].numpy()
    if (orig_data.size != 0): #check not empty
        org = orig_data[-1]
        diff = org-mod
        dpp = dpp + diff

    col_list.append(np.append(orig_data,dpp))
    orig_data = np.append(orig_data,dpp[:input_window])

pyplot.savefig(f'./nmnm/test_plot.png')
pyplot.close()

plot_df = pd.DataFrame(col_list)
trps = plot_df.transpose()
trps.plot()

In [None]:
train_data, val_data = get_data2(3356417)  #3529217
col_list = []
orig_data = np.array([])
test_len = 2
model= torch.load(f'./best_model_multi18.pt',map_location=torch.device('cpu'))
for one_part_point in tqdm(range(test_len)):   #  total_parts
    dpp = predict_future_open(model, val_data[input_window*(one_part_point):input_window*(one_part_point+1)],
                              1000,123123)
    if (orig_data.size != 0): #check not empty
        diff = orig_data[-1] - dpp[301].numpy()
        dpp = dpp - diff

    col_list.append(np.append(orig_data,dpp))
    orig_data = np.append(orig_data,dpp[:input_window])

pyplot.savefig(f'./nmnm/test_plot.png')
pyplot.close()

plot_df = pd.DataFrame(col_list)
trps = plot_df.transpose()
trps.plot()

In [None]:
for i in range(14,19):
    for jj in [1459457,70401,261889,]:
#     for jj in [3861249,6401,3677697,3669505]:
        print('*'*50)
        print(i)
        print(jj)
        
        train_data, val_data = get_data2(jj)
        col_list = []
        orig_data = np.array([])
        test_len = 6
        model= torch.load(f'./best_model_multi{i}.pt',map_location=torch.device('cpu'))
        for one_part_point in tqdm(range(test_len)):   #  total_parts
            dpp = predict_future_open(model, val_data[input_window*(one_part_point):input_window*(one_part_point+1)],
                                      300,123123)
            mod = dpp[0].numpy()
            if (orig_data.size != 0): #check not empty
                org = orig_data[-1]
                diff = org-mod
                dpp = dpp + diff

            col_list.append(np.append(orig_data,dpp))
            orig_data = np.append(orig_data,dpp[:input_window])

        pyplot.savefig(f'./nmnm/test_plot.png')
        pyplot.close()

        plot_df = pd.DataFrame(col_list)
        trps = plot_df.transpose()
        trps.plot()

In [None]:
loss_df = pd.read_excel('../valid_loss_map_df_5x (17).xlsx')
loss_df['name'] =''
type(loss_df['inst'][0])
all_inst = pd.read_excel('./all_inst.xlsx')
df3 = pd.merge(loss_df,all_inst,left_on=['inst'], right_on = ['instrument_token'], how = 'left')
df3['ltp']=0.0
df3 = df3[0:143]
inedx_counter = 0
for one_symbol in tqdm(df3.tradingsymbol):
    ltp = module.kite.quote([f'NSE:{one_symbol}'])[f'NSE:{one_symbol}']['last_price']
    df3.at[inedx_counter, 'ltp'] = ltp
#     print(one_symbol)    
#     print(ltp)
    inedx_counter += 1

In [None]:
model_loss_list =[]
criterion = nn.MSELoss()

for i in tqdm(range(18)):
    this_model = f'./best_model_multi{i+1}.pt'
    this_total_loss = 0.0
    model = torch.load(this_model, map_location=torch.device('cpu'))
    
    inedx_counter = 0
    df3['loss'] = 0.0
    for one_inst in tqdm(df3.inst.astype(dtype='int32')):
        _, val_data_ip = get_data2(one_inst)
        this_loss = plot_and_loss(model, val_data_ip, 1, one_inst)
        this_total_loss+=this_loss
        df3.at[inedx_counter, 'loss'] = this_loss
        inedx_counter+=1
    print(this_model)
    print(this_total_loss)
        
    model_loss_list.append({'model':this_model,'this_total_loss':this_total_loss})
    
model_loss_list_edf = pd.DataFrame(model_loss_list)
model_loss_list_edf

In [None]:
this_model = f'./best_model_multi7.pt'
this_total_loss = 0.0
model = torch.load(this_model, map_location=torch.device('cpu'))

inedx_counter = 0
df3['loss'] = 0.0
for one_inst in tqdm(df3.inst.astype(dtype='int32')):
    _, val_data_ip = get_data2(one_inst)
    this_loss = plot_and_loss(model, val_data_ip, 1, one_inst)
    df3.at[inedx_counter, 'loss'] = this_loss
    inedx_counter+=1

print(this_model)
print(this_total_loss)

import matplotlib.pyplot as plt

plt.matshow(df3.corr())
plt.show()
df3['ltp_by_loss'] = df3['ltp']/df3['loss']
# df3[['ltp_by_loss']]
df3['ltp_by_lossx10'] = df3['ltp_by_loss']*20
df3['lossx10'] = df3['loss']*20
df3[['ltp','lossx10','ltp_by_lossx10']].plot()
ax = df3[['ltp','lossx10','ltp_by_lossx10']].plot.hist(bins=100, alpha=0.3)
df3[df3.ltp_by_loss > 180]

In [None]:
this_model = f'./best_model_multi3.pt'
this_total_loss = 0.0
model = torch.load(this_model, map_location=torch.device('cpu'))

inedx_counter = 0
df3['loss'] = 0.0
for one_inst in tqdm(df3.inst.astype(dtype='int32')):
    _, val_data_ip = get_data2(one_inst)
    this_loss = plot_and_loss(model, val_data_ip, 1, one_inst)
    df3.at[inedx_counter, 'loss'] = this_loss
    inedx_counter+=1

print(this_model)
print(this_total_loss)

import matplotlib.pyplot as plt

plt.matshow(df3.corr())
plt.show()
df3['ltp_by_loss'] = df3['ltp']/df3['loss']
# df3[['ltp_by_loss']]
df3['ltp_by_lossx10'] = df3['ltp_by_loss']*20
df3['lossx10'] = df3['loss']*20
df3[['ltp','lossx10','ltp_by_lossx10']].plot()
ax = df3[['ltp','lossx10','ltp_by_lossx10']].plot.hist(bins=100, alpha=0.3)
df3[df3.ltp_by_loss > 180]

In [None]:
df3.to_excel('./df3.xlsx',index=False)

In [None]:
import QuantConnect_Reserved