In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [2]:
all_data = pd.read_parquet('train_low_mem.parquet')

In [3]:
all_data.drop('row_id', axis=1, inplace=True)

In [None]:
class Bagging:
    
    def __init__(self, models):
        self.models = models
        self.n_models = len(models)

    def fit(self, X, y):
        n, p = X.shape
        for model in self.models:
            sample = np.random.choice(n, size=n, replace=True)
            model.fit(X[sample], y[sample])
    
    def predict(self, X):
        res = []
        for model in self.models:
            res.append(model.predict(X))
        return np.array(res).mean()


class Stacking:

    def __init__(self, models):
        self.models = models
        self.n_models = len(models)

    def fit(self, X, y):
        n, p = X.shape
        for model in self.models:
            model.fit(X, y)
    
    def predict(self, X):
        res = []
        for model in self.models:
            res.append(model.predict(X))
        return np.array(res).mean()

class NetModel:
    
    def __init__(self, model, optim, criterion, device, **args):
        self.model = model
        self.optim = optim()
        self.criterion = criterion()
        self.device = device

    def fit(self, X, y, epoch=5, batch_size=4096):

        X = torch.tensor(X.values, dtype=torch.float32, device=self.device)
        y = torch.tensor(y.values, dtype=torch.float32, device=self.device)
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=batch_size)

        self.model.train()
        for _ in range(epoch):
            for batch in loader:
                train_X, train_y = batch
                y_hat = self.model(train_X.to(self.device))
                loss = self.criterion(y_hat, train_y.to(self.device))
                self.optim.zero_grad()
                loss.backward()
                self.optim.step()

    def predict(self, X):
        
    

In [4]:
class Dense(nn.Module):
    
    def __init__(self, in_features, out_features, activation):
        super().__init__()
        self.linear = nn.Linear(in_features, out_features, bias=False)
        self.normal = nn.BatchNorm1d(out_features)
        self.activation = activation()
    
    def forward(self, X):
        X = self.linear(X)
        X = self.normal(X)
        X = self.activation(X)
        return X

class Dnn(nn.Module):

    def __init__(self, activation):
        super().__init__()
        self.layer = nn.Sequential(
            Dense(300, 512, activation),
            Dense(512, 256, activation),
            Dense(256, 256, activation),
            Dense(256, 64, activation),
            Dense(64, 32, activation),
        )
        self.out = nn.Linear(32, 1, bias=True)
    def forward(self, X):
        return self.out(self.layer(X)).squeeze()

In [None]:
train_ind = int(all_data.shape[0]*0.8)
train_data = all_data.iloc[:train_ind]
# val_data = all_data.iloc[train_ind:]

X = train_data[[f'f_{i}' for i in range(300)]]
y = train_data['target']
X = torch.tensor(X.values, dtype=torch.float32)
y = torch.tensor(y.values, dtype=torch.float32)

# val_X = val_data[[f'f_{i}' for i in range(300)]]
# val_y = val_data['target']
# val_X = torch.tensor(val_X.values, dtype=torch.float32)
# val_y = torch.tensor(val_y.values, dtype=torch.float32)

In [None]:
torch_set = TensorDataset(X, y)
torch_loader = DataLoader(torch_set, batch_size=4096)

In [None]:
model = Dnn(nn.ReLU)
optim = torch.optim.Adam(
    model.parameters(),
    lr=0.0005,
    betas=(0.9, 0.999),
    eps=1e-05,
    weight_decay=0.001,
    amsgrad=False
)
criterion = nn.MSELoss()
model.cuda()
losses = []
val_losses = []
for epoch in range(20):
#     print(epoch)
    model.train()
    for ind, batch in enumerate(torch_loader):
        train_X, train_y = batch
        y_hat = model(train_X.to(torch.device('cuda')))
        loss = criterion(y_hat, train_y.to(torch.device('cuda')))
        optim.zero_grad()
        loss.backward()
        optim.step()
        # if ind % 100 == 0:
        #     print(ind)
        #     # y1 = y_hat.cpu().detach().numpy().squeeze()
        #     # y2 = train_y.detach().numpy()
        #     losses.append(np.sqrt(float(loss.cpu().detach().numpy())))

#     with torch.no_grad():
#         model.eval()
#         val_yhat = model(val_X.to(torch.device('cuda')))
#         y1 = val_yhat.cpu().detach().numpy().squeeze()
#         y2 = val_y.detach().numpy()
#         rmse = np.linalg.norm(y1-y2)**2
#         rmse = np.sqrt(rmse/val_X.shape[0])
#         val_losses.append(rmse)
#         print(np.corrcoef(y1, y2)[0,1])
                # for batch in torch_loader:
                #     train_X, train_y = batch
                #     y_hat = model(train_X.to(torch.device('cuda')))
                #     y1 = y_hat.cpu().detach().numpy().squeeze()
                #     y2 = train_y.detach().numpy()
                #     rmse += np.linalg.norm(y1-y2)**2
                # print(f'rmse:{rmse}')
                # losses.append(rmse)