# "Cross-validation" des modèles

In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Creer des données synthetiques 

In [2]:
def get_data(
            N = 500,
            p0 = 0.15,
            theta0 = 0,
            wt = np.pi/20,
            theta_std = np.pi/6):
    
    theta = np.random.randn(N)*theta_std
    a = (theta-theta0)/wt
    p = 1/(1+np.exp(-a))
    
    p = p0/2 + (1-p0) * p
    y = np.random.rand(N) < p
    return theta, p, y


In [3]:
import torch
criterion = torch.nn.BCELoss()

def fit_data(theta, y, 
                learning_rate = 0.005,
                num_epochs = 1e4,
                verbose=False):
    torch.set_default_tensor_type('torch.DoubleTensor')
    from torch.nn import functional as F

    class LogisticRegressionModel(torch.nn.Module):
        def __init__(self):
            super(LogisticRegressionModel, self).__init__()
            self.linear = torch.nn.Linear(1, 1)    
            self.logit0 = torch.nn.Linear(1, 1) 

        def forward(self, x):
            p0 = torch.sigmoid(self.logit0(torch.zeros(1)))
            out = p0/2 + (1-p0)*torch.sigmoid(self.linear(x))
            return out

    logistic_model = LogisticRegressionModel()
    

    optimizer = torch.optim.Adam(logistic_model.parameters(), lr=learning_rate)

    theta, p, y = get_data()
    labels = torch.Tensor(y[:, None])
    Theta = torch.Tensor(theta[:, None])

    iter = 0
    for epoch in range(int(num_epochs)):
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = logistic_model(Theta)

        # Calculate Loss: 
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        # Print Loss
        if verbose and (epoch % (num_epochs//32) == 0) : 
            print(f"Iteration: {epoch}. Loss: {loss.item():.5f}.")

    logistic_model.eval()
    return logistic_model, loss.item()

kezako?

In [None]:
theta, p, y = get_data()

logistic_model, loss = fit_data(theta, y, verbose=True)

Iteration: 0. Loss: 0.64768.
Iteration: 312. Loss: 0.53423.
Iteration: 624. Loss: 0.50019.
Iteration: 936. Loss: 0.49302.
Iteration: 1248. Loss: 0.49049.
Iteration: 1560. Loss: 0.48839.
Iteration: 1872. Loss: 0.48651.
Iteration: 2184. Loss: 0.48497.


In [None]:
plt.figure(figsize = (8,6)) 
plt.scatter(theta, p, s=4, color = 'r', label='proba cachées')
plt.scatter(theta, y, s=1, alpha=.1, color = 'b', label='données')
x_values = np.linspace(-1.5, 1.50, 100)[:, None]
y_values = logistic_model(torch.Tensor(x_values)).detach().numpy()
plt.plot(x_values, y_values, 'g', alpha=.7, lw=3, label='proba prédites')
plt.xlabel(r'$\theta$')
plt.yticks([0.,1.],['Left', 'Right']);
plt.legend();


## validation

In [None]:
theta, p, y = get_data() # nouvelles données 

labels = torch.Tensor(y[:, None])
Theta = torch.Tensor(theta[:, None])
outputs = logistic_model(Theta)
loss = criterion(outputs, labels)
print('loss=', loss)
plt.figure(figsize = (8,6)) 
plt.scatter(theta, p, s=4, color = 'r', label='proba cachées')
plt.scatter(theta, y, s=1, alpha=.1, color = 'b', label='données')
x_values = np.linspace(-1.5, 1.50, 100)[:, None]
y_values = logistic_model(torch.Tensor(x_values)).detach().numpy()
plt.plot(x_values, y_values, 'g', alpha=.7, lw=3, label='proba prédites')
plt.xlabel(r'$\theta$')
plt.yticks([0.,1.],['Left', 'Right']);
plt.legend();

## influence du nombre de trials

In [None]:
Ns = np.logspace(1, 4, 15, base=10)
losses = []
for N_ in Ns:
    theta, p, y = get_data(N=int(N_))
    logistic_model, loss = fit_data(theta, y, verbose=False)
    theta, p, y = get_data(N=1000) # nouvelles données 
    labels = torch.Tensor(y[:, None])
    Theta = torch.Tensor(theta[:, None])
    outputs = logistic_model(Theta)
    loss = criterion(outputs, labels)
    print(f"N: {int(N_)}. Loss: {loss:.5f}.")
    losses.append(loss)

plt.figure(figsize = (8,8)) 
plt.scatter(Ns, losses)
plt.xlabel(' # trials')
plt.ylabel(' Loss ')


## influence du nombre du learning rate


## influence du nombre du nombre d'epochs