In [1]:
!pip install timm

Collecting timm
  Downloading timm-0.4.5-py3-none-any.whl (287 kB)
[K     |████████████████████████████████| 287 kB 867 kB/s 
Installing collected packages: timm
Successfully installed timm-0.4.5


In [2]:
import timm
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm

In [3]:
df = pd.read_csv('../input/financial-econometrics-preprocessed/preprocessed_data_non_wavelet.csv',nrows=80000)

In [4]:
df = df[:80000]

In [5]:
class dataset(torch.utils.data.Dataset):


    def __init__(self, image, target=None):
        if target is not None:
            assert image.size(0) == target.size(0)
        self.image = image
        self.target = target

    def __getitem__(self, index):

        image = self.image[index]

        target = self.target[index]

        return image.float(), target.float()

    def __len__(self):
        return len(self.image)

In [6]:
data = df.drop(columns=['output','Unnamed: 0','index']).to_numpy()

In [7]:
data = data[50:-4]

In [8]:
sc = MinMaxScaler()
data = sc.fit_transform(data)
data=np.array([data[i-30:i] for i in range(30,len(data))])

In [9]:
target=df.output[30:].to_numpy()
target=target[50:-4]

In [10]:
data = torch.tensor(data)
target = torch.tensor(target)

In [11]:
data = torch.unsqueeze(data, 1)

In [12]:
train_dataset = dataset(data,target)
train_loader = torch.utils.data.DataLoader(train_dataset,shuffle=False,batch_size=64)

In [13]:
class model(nn.Module):
    def __init__(self):
        super(model, self).__init__()
        self.conv = nn.Sequential(         
            nn.Conv2d(1, 3, 1,1,0), 
            nn.BatchNorm2d(3,affine=True), 
              nn.ReLU(),                      
        )
        self.resnet= timm.create_model('resnext50_32x4d',pretrained=False)
        
        self.resnet._modules["fc"] = nn.Linear(2048,1024)      
        self.l1 = nn.Linear(1024,1024)
        self.bn1 = nn.BatchNorm1d(1024, momentum=0.9)
        self.l2= nn.Linear(1024,256)
        self.bn2 = nn.BatchNorm1d(256, momentum=0.9)
        self.l3= nn.Linear(256,1)




    def forward(self, x):
      
            out = self.conv(x)        
            out = self.resnet(out)
            out = self.l1(out)
            out = self.bn1(out)
            out = torch.relu(out)
            output=self.l2(out)
            output=self.bn2(output)
            output = torch.relu(output)
            output=self.l3(output)
            
  
            return output,out

In [14]:
model = model().to('cuda')

In [15]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [16]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [17]:
for epoch in tqdm(range(100)):
    lgbm_input=torch.zeros(1,1024).float().to('cuda')
    losses = []
    for (images,target) in (train_loader):
        images = images.to('cuda')
        target = target.to('cuda').view(-1,1)
        out,output = model(images)
        loss = criterion(out,target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.data)
        lgbm_input=torch.cat((lgbm_input,output), 0)
    print(sum(losses)/len(losses))

  0%|          | 0/100 [00:00<?, ?it/s]

tensor(7.0191, device='cuda:0')
tensor(6.4850, device='cuda:0')
tensor(6.3427, device='cuda:0')
tensor(6.2390, device='cuda:0')
tensor(6.0891, device='cuda:0')
tensor(5.8973, device='cuda:0')
tensor(5.6914, device='cuda:0')
tensor(5.4658, device='cuda:0')
tensor(5.2895, device='cuda:0')
tensor(5.1194, device='cuda:0')
tensor(4.9750, device='cuda:0')
tensor(4.6632, device='cuda:0')
tensor(4.3393, device='cuda:0')
tensor(4.2871, device='cuda:0')
tensor(4.0177, device='cuda:0')
tensor(3.6426, device='cuda:0')
tensor(3.6523, device='cuda:0')
tensor(3.4016, device='cuda:0')
tensor(3.3400, device='cuda:0')
tensor(3.1425, device='cuda:0')
tensor(3.0330, device='cuda:0')
tensor(2.8198, device='cuda:0')
tensor(2.8422, device='cuda:0')
tensor(2.7463, device='cuda:0')
tensor(2.4292, device='cuda:0')
tensor(2.3251, device='cuda:0')
tensor(2.4120, device='cuda:0')
tensor(2.3568, device='cuda:0')
tensor(2.0916, device='cuda:0')
tensor(1.9604, device='cuda:0')
tensor(1.8692, device='cuda:0')
tensor(1

In [18]:
def DA(predictions,y_test):
    result=predictions*y_test
    result=[1 if item>=0 else 0 for item in result]
    return np.mean(result)

In [19]:
losses = []
mae = 0
mse = 0
i = 0
acc = 0
model.eval()
for (images,target) in (train_loader):
    i += 1
    images = images.to('cuda')
    target = target.to('cuda').view(-1,1)
    out,output = model(images)
    mae += mean_absolute_error(target.detach().cpu().numpy()/1000,out.detach().cpu().numpy()/1000)
    mse += mean_squared_error(target.detach().cpu().numpy()/1000,out.detach().cpu().numpy()/1000)
    acc += DA(target.detach().cpu().numpy()/1000,out.detach().cpu().numpy()/1000) 
print(f"Mean absolute Error is {mae/i}")
print(f"Mean Squared is {mse/i}")
print(f"Accuracy is {acc/i}")

Mean absolute Error is 0.012659736543834534
Mean Squared is 0.0003452917094044295
Accuracy is 0.5947530751874226


In [20]:
import lightgbm as lgb

In [21]:
other_input = df.drop(columns=['output','Unnamed: 0','index']).to_numpy()

In [22]:
other_input = other_input[50:-4]
other_input = np.array([other_input[i-30:i] for i in range(30,len(other_input))])

In [23]:
other_input = other_input.reshape((-1,30*other_input.shape[-1]))

In [24]:
lgbm_input = lgbm_input.cpu().detach().numpy()
lgbm_input = lgbm_input[1:]

In [25]:
print(lgbm_input.shape,other_input.shape)

(79916, 1024) (79916, 900)


In [26]:
inputs = np.concatenate([lgbm_input, other_input], 1)

In [27]:
np.savez_compressed('data.npz', inputs)

In [28]:
targets = df.output[50:].to_numpy()
targets = targets[30:-4]

In [29]:
x_train, x_test, y_train, y_test = train_test_split(inputs , targets, test_size=0.2, random_state=2021) 
params = {'num_leaves': 38,              
          'min_data_in_leaf': 50,
          'objective': 'regression',     
          'max_depth': -1,                
          'learning_rate': 0.1,           
          "min_sum_hessian_in_leaf": 6,
          "boosting": "gbdt",             
          "feature_fraction": 0.9,         
          "bagging_freq": 1,               
          "bagging_fraction": 0.7,          
          "bagging_seed": 11,
          "lambda_l1": 0.1,
          "verbosity": -1,               
          "nthread": 4,               
          'metric': 'mae',               
          "random_state": 2019,
          'device_type':'gpu',
          'gpu_platform_id':1,
          'gpu_device_id':0
          }
trn_data = lgb.Dataset(x_train, y_train)
val_data = lgb.Dataset(x_test, y_test)

In [30]:
clf = lgb.train(params,
                trn_data,
                20000,
                valid_sets=[trn_data, val_data],
                verbose_eval=200,
                early_stopping_rounds=500)
oof = clf.predict(x_train, num_iteration=clf.best_iteration)
predictions = clf.predict(x_test, num_iteration=clf.best_iteration)

Training until validation scores don't improve for 500 rounds
[200]	training's l1: 0.377849	valid_1's l1: 0.449251
[400]	training's l1: 0.323518	valid_1's l1: 0.438025
[600]	training's l1: 0.280191	valid_1's l1: 0.43285
[800]	training's l1: 0.245108	valid_1's l1: 0.430742
[1000]	training's l1: 0.21541	valid_1's l1: 0.428952
[1200]	training's l1: 0.189921	valid_1's l1: 0.42787
[1400]	training's l1: 0.167872	valid_1's l1: 0.42716
[1600]	training's l1: 0.148843	valid_1's l1: 0.426304
[1800]	training's l1: 0.131961	valid_1's l1: 0.425521
[2000]	training's l1: 0.117113	valid_1's l1: 0.425156
[2200]	training's l1: 0.104168	valid_1's l1: 0.424847
[2400]	training's l1: 0.0928784	valid_1's l1: 0.424736
[2600]	training's l1: 0.0828234	valid_1's l1: 0.424634
[2800]	training's l1: 0.0739526	valid_1's l1: 0.424213
[3000]	training's l1: 0.0661039	valid_1's l1: 0.424107
[3200]	training's l1: 0.0591356	valid_1's l1: 0.423707
[3400]	training's l1: 0.0529361	valid_1's l1: 0.423488
[3600]	training's l1: 

In [31]:
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import mean_absolute_error

In [32]:
def mean_absolute_percentage_error(y_pred, y_true):
    mask=y_true!=0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / (y_true[mask]))) * 100

def normalize_function(data):
    min = np.amin(data,axis=0)
    max = np.amax(data,axis=0)
    return (data - min)/(max-min)
def labeled(x):
    if x>=0:
        return 0
    if x<=-0:
        return 1  



In [33]:
MSE=mean_squared_error(predictions/1000,y_test/1000)
MAE=mean_absolute_error(predictions/1000,y_test/1000)
acc=DA(predictions,y_test)

print('MAE %.6f*10-3\t MSE: %.6f*10-3\t ACC: %.6f' %
                (1000*MAE,1000*MSE,acc ))

MAE 0.422190*10-3	 MSE: 0.000569*10-3	 ACC: 0.929930


In [34]:
torch.save(model.state_dict(), 'model.pth')