---
title: Oracle
---

Humans are not very good at producing random 0/1 sequences. We can guess their next number right more than 50% of the time !

```{attention}
- [ ] Describe problem we are trying to solve and the basic "trick" that we intend to use (learn based on the last few produced values)
- [ ] Describe how to build the database (CSV file, 1 column, title "Values" first row, then 0 or 1, typically ~100 values)
- [ ] Make custom Train + Test Dataset from database
- [ ] model I/O interpretation : N numbers (0 or 1) -> normalized log p of 1 output
- [ ] implement some baseline models as models with fixed parametes: pick H, pick tails, pick last one, pick opposite last one, restore the balance, etc. Test their performance on the test dataset.
- [ ] Baseline accuracy vs best accuracy (via stats) -> algo score
- [ ] make some additional data analysis to try to find some pattern in the dataset and guess some better performing "fixed" algorithm?
- [ ] Try to learn the parameters of a linear model, see what the performance is.
- [ ] Logistic regression?
- [ ] Try with 1 or 2 hidden layers, tweak the params (sizes), etc, see how it goes.
```

In [1]:
import torch

In [2]:
from torch.utils.data import Dataset

First, no options at construction (but later `transform`?), no support for tensors of indices (but required for later ?)

In [3]:
import pathlib
import pandas as pd

In [4]:
# Nota: ATM, a simple list would do. Can a simple list be given to a data loader? Test!
# Do with and without transform and target_transform

class HTDataset(Dataset):
    def __init__(self, chunk_size=3, overlap=False, transform=None, target_transform=None): # size of the chunk used to predict
        self.data = []
        for csv_file in pathlib.Path("./db").glob("*.csv"):          
            df = pd.read_csv(csv_file)
            t = torch.tensor(df["Values"])
            if not overlap:
                n = (len(t) - 1) // chunk_size
                for i in range(0, n):
                    j = chunk_size * i
                    input = [x.item() for x in t[j:j+chunk_size]]
                    output = t[j+chunk_size].item()
                    self.data.append((input, output))
            else:
                n = len(t) - chunk_size
                for i in range(0, n):
                    input = [x.item() for x in t[i:i+chunk_size]]
                    output = t[i+chunk_size].item()
                    self.data.append((input, output))
        self.transform = transform
        self.target_transform = target_transform
        # list all csv files in db dir
        # for each file, 
        #    - use pandas to get the list or tensor of the stuff
        #    - compute how many chunks (+1) we can extract from the values
        #    - do it, add all the input/output stuff to the data list
    def __len__(self):
        return len(self.data)
    def __getitem__(self, i): # not vectorized
        data = self.data[i]
        input, output = data
        if self.transform:
            input = self.transform(input)
        if self.target_transform:
            output = self.target_transform(output)
        data = input, output
        return data

In [5]:
data = HTDataset(overlap=True)

In [6]:
def heads(input):
    return 0

def tails(input):
    return 1

In [7]:
def accuracy(algo, data):
    successes = 0
    failures = 0
    for input, output in data:
        if algo(input) == output:
            successes += 1
        else:
            failures += 1
    return successes / (successes + failures)

In [8]:
accuracy(heads, data)

0.5257731958762887

In [9]:
accuracy(tails, data)

0.4742268041237113

In [10]:
def keep(input):
    return input[-1]

def switch(input):
    return 1 - input[-1]

In [11]:
accuracy(keep, data)

0.31958762886597936

In [12]:
accuracy(switch, data)

0.6804123711340206

In [13]:
# TODO: Linear regression

In [14]:
from torch.nn import Module, Linear

def T(x):
    return torch.tensor(x, dtype=torch.float32)

class LR(Module):
    def __init__(self, chunk_size=3):
        super().__init__()
        self.chunk_size = chunk_size
        self.linear = Linear(chunk_size, 1)
    def forward(self, input):
        return self.linear(input)
        

In [15]:
model = LR()

In [16]:
model(torch.tensor([0.0, 1.0, 0.0]))

tensor([-0.0975], grad_fn=<ViewBackward0>)

In [17]:
def model_wrapper(input):
    model.eval()
    input = torch.tensor(input, dtype=torch.float32)
    out = model(input)
    if out.item() >= 0.5:
        return 1
    else:
        return 0

In [18]:
model_wrapper([0, 1, 0])

0

In [19]:
accuracy(model_wrapper, data)

0.5257731958762887

In [20]:
from torch.utils.data import DataLoader

In [21]:
def loss_fn(y1, y2):
    y1 = y1.squeeze()
    y2 = y2.squeeze()
    dy = y1-y2
    return dy.abs().mean()

In [22]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [23]:
X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 

def train(model, loss_fn, optimizer):
    model.train()

    
    pred = model(X)
    loss = loss_fn(pred, y)
    #print(f"loss: {loss.item()}")

    # Backpropagation
    loss.backward()
    optimizer.step()
    #print(list(model.named_parameters())[0])
    optimizer.zero_grad()
    return loss.item()

In [24]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

epochs = 10_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[-0.4520, -0.0116, -0.5101]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([-0.0859], requires_grad=True))]
loss: 1.0265779495239258
accuracy: 0.5257731958762887
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-1.7782e-04,  2.2004e-01, -7.7982e-01]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.7798], requires_grad=True))]
loss: 0.34006598591804504
accuracy: 0.6804123711340206


In [25]:
# Escape the local minimum?

In [26]:
# Test a better strategy (switch)
sd = model.state_dict()
sd["linear.weight"] = torch.tensor([[0.0, 0.0, -1.0]])
sd["linear.bias"] = torch.tensor([1.0])
model.load_state_dict(sd)

<All keys matched successfully>

In [27]:
model.state_dict()

OrderedDict([('linear.weight', tensor([[ 0.,  0., -1.]])),
             ('linear.bias', tensor([1.]))])

In [28]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[ 0.,  0., -1.]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.], requires_grad=True))]
loss: 0.3195876181125641
accuracy: 0.6804123711340206


In [29]:
# Mmm, there is an issue here, the loss appears to be larger (although the accuracy is better). 
# How come? Is it "normal" (and the MSE proxy that sucks) ? A mistake?
# Actually, "edging the bets" and returning almost all the time the middle value 0.5
# is probably strategic given that an error of 1.0 will be much more punished that 0.5
# (4 x more costly!). So that kinda make sense ... we need to "shape" the loss function
# much better. Would the L1 error work here? Mmm

In [30]:
def loss_fn(y1, y2):
    y1 = y1.squeeze()
    y2 = y2.squeeze()
    dy = y1-y2
    return dy.abs().mean()

In [31]:
model = LR()

In [32]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

In [33]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

epochs = 10_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[ 0.4350, -0.2285,  0.4200]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([-0.1959], requires_grad=True))]
loss: 0.6889473795890808
accuracy: 0.3917525773195876
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-9.2302e-04, -6.7032e-04, -9.9001e-01]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.0005], requires_grad=True))]
loss: 0.3228142559528351
accuracy: 0.6804123711340206


In [34]:
sd = model.state_dict()
sd["linear.weight"] = torch.tensor([[0.0, 0.0, -1.0]])
sd["linear.bias"] = torch.tensor([1.0])
model.load_state_dict(sd)

<All keys matched successfully>

In [35]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[ 0.,  0., -1.]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.], requires_grad=True))]
loss: 0.3195876181125641
accuracy: 0.6804123711340206


In [36]:
# Mmm this is also a poor proxy for accuracy (even if it seems less shitty?)

Here we could actually display all cases (inputs: 2**3 = 8) and see what the likelist outcome is in each case.

In [37]:
# Also, are things different if we start with overlaping data?

In [38]:
l = {}
for X, y in data:
    d = tuple(X) + (y,)
    if d not in l:
        l[d] = 0
    l[d] += 1
r = sorted(l.items())
l = dict(r)
l       

{(0, 0, 0, 0): 1,
 (0, 0, 0, 1): 6,
 (0, 0, 1, 0): 5,
 (0, 0, 1, 1): 5,
 (0, 1, 0, 0): 8,
 (0, 1, 0, 1): 15,
 (0, 1, 1, 0): 8,
 (0, 1, 1, 1): 2,
 (1, 0, 0, 0): 7,
 (1, 0, 0, 1): 4,
 (1, 0, 1, 0): 18,
 (1, 0, 1, 1): 5,
 (1, 1, 0, 0): 2,
 (1, 1, 0, 1): 8,
 (1, 1, 1, 0): 2,
 (1, 1, 1, 1): 1}

In [39]:
data = HTDataset(overlap=True)

In [40]:
accuracy(heads, data)

0.5257731958762887

In [41]:
l = {}
for X, y in data:
    d = tuple(X) + (y,)
    if d not in l:
        l[d] = 0
    l[d] += 1
r = sorted(l.items())
l = dict(r)
l       

{(0, 0, 0, 0): 1,
 (0, 0, 0, 1): 6,
 (0, 0, 1, 0): 5,
 (0, 0, 1, 1): 5,
 (0, 1, 0, 0): 8,
 (0, 1, 0, 1): 15,
 (0, 1, 1, 0): 8,
 (0, 1, 1, 1): 2,
 (1, 0, 0, 0): 7,
 (1, 0, 0, 1): 4,
 (1, 0, 1, 0): 18,
 (1, 0, 1, 1): 5,
 (1, 1, 0, 0): 2,
 (1, 1, 0, 1): 8,
 (1, 1, 1, 0): 2,
 (1, 1, 1, 1): 1}

In [42]:
X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 

In [43]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

epochs = 10_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[ 0.,  0., -1.]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.], requires_grad=True))]
loss: 0.3195876181125641
accuracy: 0.6804123711340206
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-0.0012,  0.0024, -0.9935]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.0015], requires_grad=True))]
loss: 0.32053428888320923
accuracy: 0.6804123711340206


In [44]:
chunk_size = 1

data = HTDataset(chunk_size=chunk_size, overlap=False)

X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 

model = LR(chunk_size=chunk_size)

def loss_fn(y1, y2):
    y1 = y1.squeeze()
    y2 = y2.squeeze()
    dy = y1-y2
    return dy.abs().mean()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

def train(model, loss_fn, optimizer):
    model.train()
    
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

In [45]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

epochs = 10_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[0.6998]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.2212], requires_grad=True))]
loss: 0.6287241578102112
accuracy: 0.32323232323232326
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-0.9950]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.9964], requires_grad=True))]
loss: 0.32752057909965515
accuracy: 0.6767676767676768


In [46]:
accuracy(switch, data)

0.6767676767676768

In [47]:
accuracy(heads, data)

0.5353535353535354

In [48]:
model_wrapper([1])

0

In [49]:
model_wrapper([0]) # actually heads ...

1

In [50]:
model.eval()

LR(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

In [51]:
model2 = LR(chunk_size=1)
with torch.no_grad():
    model2.linear.weight[0,0] = -1.0
    model2.linear.bias[0] = 1.0

In [52]:
model.eval()
model3 = LR(chunk_size=1)
model3.eval()
with torch.no_grad():
    model3.linear.weight[0,0] = -0.02
    model3.linear.bias[0] = 0.51

In [53]:
rows = []
for X, y in data:
    out1 = model(torch.tensor(X, dtype=torch.float32))
    out1 = out1.item()
    out2 = model2(torch.tensor(X, dtype=torch.float32))
    out2 = out2.item()
    out3 = model3(torch.tensor(X, dtype=torch.float32))
    out3 = out3.item()
    rows.append({"in": X[0], 
                 "out": y, 
                 "pred 1": out1, "pred 2": out2, "pred 3": out3,
                 "err 1": (y-out1)**2, "err 2": (y-out2)**2, "err 3": (y-out3)**2,
                })

pd.set_option('display.max_rows', None)
df = pd.DataFrame(rows)
df

Unnamed: 0,in,out,pred 1,pred 2,pred 3,err 1,err 2,err 3
0,1,0,0.001386,0.0,0.49,2e-06,0.0,0.2401
1,0,0,0.996396,1.0,0.51,0.992805,1.0,0.2601
2,0,0,0.996396,1.0,0.51,0.992805,1.0,0.2601
3,0,1,0.996396,1.0,0.51,1.3e-05,0.0,0.2401
4,1,1,0.001386,0.0,0.49,0.997229,1.0,0.2601
5,1,0,0.001386,0.0,0.49,2e-06,0.0,0.2401
6,0,1,0.996396,1.0,0.51,1.3e-05,0.0,0.2401
7,1,0,0.001386,0.0,0.49,2e-06,0.0,0.2401
8,0,1,0.996396,1.0,0.51,1.3e-05,0.0,0.2401
9,1,1,0.001386,0.0,0.49,0.997229,1.0,0.2601


In [54]:
df["err 1"].mean(), df["err 2"].mean(), df["err 3"].mean()
# So we can have a model with a better accuracy (switch) with an cost that is actually lower
# So why is our algorithm not able to find it? Try to initialize in the vicinity and see what happens?

(0.3214926769293237, 0.32323232323232326, 0.2465646436067545)

In [55]:
def model_wrapper_ho(model):
    def _model_wrapper(input):
        model.eval()
        input = torch.tensor(input, dtype=torch.float32)
        out = model(input)
        if out.item() >= 0.5:
            return 1
        else:
            return 0
    return _model_wrapper

In [56]:
accuracy(model_wrapper_ho(model), data)

0.6767676767676768

In [57]:
accuracy(model_wrapper_ho(model2), data)

0.6767676767676768

In [58]:
accuracy(model_wrapper_ho(model3), data)

0.6767676767676768

In [59]:
chunk_size = 3

data = HTDataset(chunk_size=chunk_size, overlap=False)

X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 

model = LR(chunk_size=chunk_size)
with torch.no_grad():
    model.linear.weight[0,0] = -0.02
    model.linear.bias[0] = 0.51

def loss_fn(y1, y2):
    y1 = y1.squeeze()
    y2 = y2.squeeze()
    dy = y1-y2
    return (dy*dy).mean()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(model, loss_fn, optimizer):
    model.train()
    
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

In [60]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(model_wrapper, data))

epochs = 10_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(model_wrapper, data))

[('linear.weight', Parameter containing:
tensor([[-0.0200, -0.5234,  0.2696]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.5100], requires_grad=True))]
loss: 0.4935130476951599
accuracy: 0.42424242424242425
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-0.4428,  0.0664, -0.2080]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.7670], requires_grad=True))]
loss: 0.1695430725812912
accuracy: 0.7575757575757576


## Logistic (differential score) output

Now we interpret the model output $\ell$, the difference between score of 0 and score of 1, that is

$$
\mathrm{\ell} = \log p +c - \log (1 -p) -c = \log \frac{p}{1-p}
$$                                                            

where $p$ is the probability of having 0. Consequently, 

$$
\exp \ell = \frac{p}{1-p}
$$

$$
(1 - p)\exp \ell - p = 0
$$

$$
(1+\exp \ell) p = \exp \ell
$$

and

$$
p = \frac{\exp \ell}{1 + \exp \ell} = \frac{1}{1 + \exp(-\ell)}.
$$

Note that in this context we have:
$$
-\log p =  \log (1+\exp(-s))
$$
and
$$
-\log (1-p) = \log (1+\exp s).
$$

In [61]:
class Model(Module):
    def __init__(self, chunk_size=3):
        super().__init__()
        self.chunk_size = chunk_size
        self.linear = Linear(chunk_size, 1)
    def forward(self, input):
        return self.linear(input)

In [62]:
def loss_fn(pred, y):
    pred = pred.squeeze()
    y = y.squeeze()
    cross_entropy_terms = y * torch.log(1.0 + torch.exp(-pred)) + (1 - y) * torch.log(1.0 + torch.exp(pred))
    return cross_entropy_terms.mean()

In [63]:
chunk_size = 1
data = HTDataset(overlap=False, chunk_size=chunk_size)
X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 
model = Model(chunk_size=chunk_size)

In [64]:
model.eval()
pred = model(X)
loss_fn(pred, y)

tensor(0.6985, grad_fn=<MeanBackward0>)

In [65]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(model, loss_fn, optimizer):
    model.train()
    
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

In [66]:
def logistic_model_wrapper_ho(model):
    def _model_wrapper(input):
        model.eval()
        input = torch.tensor(input, dtype=torch.float32)
        out = model(input)
        if out.item() >= 0.0:
            return 1
        else:
            return 0
    return _model_wrapper

In [67]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(logistic_model_wrapper_ho(model), data))

epochs = 100_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(logistic_model_wrapper_ho(model), data))

[('linear.weight', Parameter containing:
tensor([[0.0451]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.0107], requires_grad=True))]
loss: 0.6985028982162476
accuracy: 0.46464646464646464
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-1.4960]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.5425], requires_grad=True))]
loss: 0.6247754693031311
accuracy: 0.6767676767676768


In [68]:
model.eval()
model(torch.tensor([0.0]))

tensor([0.5425], grad_fn=<ViewBackward0>)

In [69]:
model(torch.tensor([1.0]))

tensor([-0.9535], grad_fn=<ViewBackward0>)

In [70]:
model(torch.tensor([[0.0], [1.0]]))

tensor([[ 0.5425],
        [-0.9535]], grad_fn=<AddmmBackward0>)

In [71]:
chunk_size = 3
data = HTDataset(overlap=False, chunk_size=chunk_size)
X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 
model = Model(chunk_size=chunk_size)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(model, loss_fn, optimizer):
    model.train()
    
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

In [74]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(logistic_model_wrapper_ho(model), data))

epochs = 100_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(logistic_model_wrapper_ho(model), data))

[('linear.weight', Parameter containing:
tensor([[-2.1248,  0.2879, -1.1662]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.3609], requires_grad=True))]
loss: 0.5157646536827087
accuracy: 0.7575757575757576
------------------------------------------------------------
[('linear.weight', Parameter containing:
tensor([[-2.1486,  0.2489, -1.2050]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([1.4113], requires_grad=True))]
loss: 0.5156917572021484
accuracy: 0.7575757575757576


In [79]:
class NN(Module):
    def __init__(self, chunk_size=3, n=8):
        super().__init__()
        self.chunk_size = chunk_size
        self.linear1 = Linear(chunk_size, n)
        self.relu = torch.nn.ReLU()
        self.linear2 = Linear(n, 1)
    def forward(self, input):
        x = input
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

In [84]:
chunk_size = 3
n = 8

data = HTDataset(overlap=False, chunk_size=chunk_size)
X = torch.tensor([input for input, _ in data], dtype=torch.float32) 
y = torch.tensor([output for _, output in data], dtype=torch.float32) 
model = NN(chunk_size=chunk_size, n=n)

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(model, loss_fn, optimizer):
    model.train()
    
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

In [85]:
pred = model(X)
loss = loss_fn(pred, y)
print(list(model.named_parameters()))
print("loss:", loss.item())
print("accuracy:", accuracy(logistic_model_wrapper_ho(model), data))

epochs = 100_000
for t in range(epochs):
    loss = train(model, loss_fn, optimizer)
print("------------------------------------------------------------")
print(list(model.named_parameters()))
print("loss:", loss)
print("accuracy:", accuracy(logistic_model_wrapper_ho(model), data))

[('linear1.weight', Parameter containing:
tensor([[-0.2992, -0.1142, -0.0998],
        [-0.0440, -0.3238,  0.2676],
        [ 0.5563, -0.0875,  0.1623],
        [-0.4545, -0.5577, -0.0578],
        [-0.0668,  0.0722,  0.4203],
        [ 0.2073, -0.2592,  0.4398],
        [ 0.2173, -0.5751,  0.4275],
        [ 0.0763,  0.3588, -0.4070]], requires_grad=True)), ('linear1.bias', Parameter containing:
tensor([-0.5157, -0.1978,  0.5552,  0.1567, -0.4367,  0.2838, -0.1767,  0.0427],
       requires_grad=True)), ('linear2.weight', Parameter containing:
tensor([[-0.3499,  0.3290,  0.0847,  0.0150, -0.0962,  0.0499, -0.1699, -0.0081]],
       requires_grad=True)), ('linear2.bias', Parameter containing:
tensor([0.3352], requires_grad=True))]
loss: 0.7277809977531433
accuracy: 0.48484848484848486
------------------------------------------------------------
[('linear1.weight', Parameter containing:
tensor([[-0.2992, -0.1142, -0.0998],
        [-0.1539, -0.9003,  0.9004],
        [ 1.1779,  0.1940, 