### Generate dataset

In [None]:
import locale
import numpy as np
from tqdm import tqdm
import datetime
import calendar

np.random.seed(0)

N = 1000000
min_time = datetime.date(1970, 1, 1)
max_time = datetime.date(2050, 12, 31)

class Dataset(object):
    def __init__(self):
        self.date_pairs = None
        self.labels = None
        self.datestring_pairs = None
        self.locale_pairs = None
        self.char_mapping = None

dataset = Dataset()

First we generate datetime pairs and labels

In [None]:
same_year_perc = 0.6
same_year_month_perc = 0.8

dataset.date_pairs = []
dataset.labels = []

for chance, daysdelta in tqdm(zip(np.random.random(N),
                                  np.random.randint((max_time - min_time).days, size=N)),
                              total=N):
    a = min_time + datetime.timedelta(days=int(daysdelta))
    if chance > same_year_month_perc:
        day = np.random.randint(1, calendar.monthrange(a.year, a.month)[1] + 1)
        b = datetime.date(a.year, a.month, day)
    elif chance > same_year_perc:
        month = np.random.randint(1, 12 + 1)
        day = np.random.randint(1, calendar.monthrange(a.year, month)[1] + 1)
        b = datetime.date(a.year, month, day)
    else:
        b = min_time + datetime.timedelta(days=int(np.random.randint((max_time - min_time).days)))   
    dataset.date_pairs.append((a, b))
    dataset.labels.append(int(a <= b))

Convert to date strings.

In [None]:
import babel
from babel.dates import format_date

dataset.locale_pairs = np.random.choice(babel.localedata.locale_identifiers(), (N,2))

dataset.datestring_pairs = []

for (date_a, date_b), (locale_a, locale_b) in tqdm(zip(dataset.date_pairs, dataset.locale_pairs), total=N):
    dataset.datestring_pairs.append((format_date(date_a, locale=locale_a),
                                     format_date(date_b, locale=locale_b)))
dataset.datestring_pairs[:5]

Do a concise character mapping.

In [None]:
allchars = set()
for da, dp in tqdm(dataset.datestring_pairs):
    allchars.update([ord(x) for x in da])
    allchars.update([ord(x) for x in dp])

dataset.char_mapping = np.zeros(len(allchars))
for i, char in enumerate(sorted(allchars)):
    dataset.char_mapping[i] = char

In [None]:
import pickle

pickle.dump(dataset, open('dataset.pkl', 'wb'))

### Load generated dataset

In [None]:
import pickle
import numpy as np
from tqdm import tqdm

class Dataset(object):
    def __init__(self):
        self.date_pairs = None
        self.labels = None
        self.datestring_pairs = None
        self.locale_pairs = None
        self.char_mapping = None

dataset = pickle.load(open('dataset.pkl', 'rb'))

First, lets train a model that gets (y, m, d) integer triplets.

### Integer model

In [None]:
X = np.array([[a.year, a.month, a.day, b.year, b.month, b.day] for a, b in dataset.date_pairs])
y = np.array(dataset.labels).astype(int)

In [None]:
import torch
import torch.utils.data # this is needed for some reason
import torch.nn as nn
import torch.nn.functional as F

# Model
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Example for nn.Sequential style of layers
        self.per_input_layers = nn.Sequential(nn.Linear(3, 20),
                                              nn.ReLU(inplace=True),
                                              nn.Linear(20, 10),
                                              nn.ReLU(inplace=True))
        # Example for layers called by hand
        self.joint_layer_1 = nn.Linear(20, 100)
        # Register parameters yourself
        self.joint_layer_2_w = nn.Parameter(torch.randn(1, 100, dtype=torch.float, requires_grad=True))
        self.joint_layer_2_b = nn.Parameter(torch.zeros(1, dtype=torch.float, requires_grad=True))
        
    def forward(self, x):
        a, b = x.chunk(2, dim=1)
        a, b = self.per_input_layers(a), self.per_input_layers(b)
        x = torch.cat((a, b), dim=1)
        x = F.relu(self.joint_layer_1(x))
        # addmm is compound b + x.mm(w)
        x = torch.sigmoid(torch.addmm(self.joint_layer_2_b, x, self.joint_layer_2_w.t()))
        return x

for p in Net().parameters():
    print(p.shape)

In [None]:
from sklearn.model_selection import StratifiedKFold

# Parameters
lr = 0.0001
momentum = 0.5
batch_size = 1000
epochs = 20

n_splits = 8
kfold = StratifiedKFold(n_splits=n_splits)
train_stats = []
val_stats = []

# All .cuda() calls can be parametrized with .to(device)
date_bias = torch.Tensor([1970, 1, 1, 1970, 1, 1]).cuda()
date_dev = torch.Tensor([80, 12, 30, 80, 12, 30]).cuda()

def train(model, data, optimizer):
    model.train() # This enables some training-only effects, such as dropout
    avg_loss = 0
    avg_acc = 0
    for i, (x, y) in enumerate(data):
        x, y = x.float().cuda(), y.float().cuda()
        x = (x - date_bias) / date_dev
        output = model(x)
        loss = F.binary_cross_entropy(output, y.view_as(output), reduction='sum')
        
        avg_loss += loss.sum().item()
        pred = (output >= 0.5).float()
        avg_acc += (y.view_as(pred) == pred).sum().item()
        
        # This is the quitessential gradient update step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    return avg_loss / len(data.dataset), avg_acc / len(data.dataset)

def test(model, data):
    model.eval() # Opposite of model.train() above
    avg_loss = 0
    avg_acc = 0
    with torch.no_grad(): # Disable the operation history tracking needed for gradient computation.
        for i, (x, y) in enumerate(data):
            x, y = x.float().cuda(), y.float().cuda()
            x = (x - date_bias) / date_dev
            output = model(x)
            loss = F.binary_cross_entropy(output, y.view_as(output), reduction='sum')

            avg_loss += loss.sum().item()
            pred = (output >= 0.5).float()
            avg_acc += (y.view_as(pred) == pred).sum().item()

    return avg_loss / len(data.dataset), avg_acc / len(data.dataset)

for train_idx, val_idx in kfold.split(X, y):
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]
    
    # You can also extend torch.utils.data.Dataset, with __getitem__ and __len__ overrides. 
    # __getitem__ can be slow thanks to DataLoader creating batches with multiple threads
    train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   pin_memory=True, num_workers=1,
                                                   shuffle=True)
    val_dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 pin_memory=True, num_workers=1)
    # Basic linear model should get good performance for this task
    # model = nn.Sequential(nn.Linear(6, 1), nn.Sigmoid()).cuda()
    model = Net().cuda() 
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    tstat = []
    vstat = []
    for epoch in tqdm(range(epochs)):
        tstat.append(train(model, train_dataloader, optimizer))
        vstat.append(test(model, val_dataloader))
    train_stats.append(tstat)
    val_stats.append(vstat)
    print(vstat[-1])

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

f, ax = plt.subplots(1, 2, figsize=(18, 10))
for stats in train_stats:
    loss = [x[0] for x in stats]
    acc = [x[1] for x in stats]
    ax[0].plot(np.arange(len(stats)), loss, c='b')
    ax[1].plot(np.arange(len(stats)), acc, c='b')
for stats in val_stats:
    loss = [x[0] for x in stats]
    acc = [x[1] for x in stats]
    ax[0].plot(np.arange(len(stats)), loss, c='orange')
    ax[1].plot(np.arange(len(stats)), acc, c='orange')

We can print out trained parameters, or full states (which are serialized when saving the model).

In [None]:
for p in model.parameters():
    print(p)

In [None]:
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Save/Load

In [None]:
torch.save(model.state_dict(), 'model.torch')

model = Net().cuda()
model.load_state_dict(torch.load('model.torch'))
model.eval()

In [None]:
with torch.no_grad():
    outputs = model(torch.from_numpy(X).float().cuda())
    preds = (outputs >= 0.5).flatten().cpu().numpy()

In [None]:
[(x, y) for x, y, z in zip(dataset.datestring_pairs, X, preds != y) if z]