<a href="https://colab.research.google.com/github/arnav39/d2el-en/blob/main/3_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 3.3 Synthetic Regression Data

In [None]:
!pip install matplotlib_inline
!pip install --upgrade d2l==1.0.0a0

In [3]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

In [None]:
#let's first get a hang of the save_hyperparamters function
class First(d2l.HyperParameters):
  def __init__(self, x1, x2, x3, x4, x5, x6):
    self.save_hyperparameters(ignore=['x5', 'x4'])

In [None]:
a1 = First(1, 2, 3, 4, 5, 6)

In [None]:
print(a1.x1, a1.x2, a1.x3, a1.x6)

1 2 3 6


In [None]:
print(a1.x4)

AttributeError: ignored

In [None]:
class SyntheticRegressionData(d2l.DataModule):
  def __init__(self, w, b, noise=0.01, num_train=1000, num_val=1000, batch_size=32):
    super().__init__()
    self.save_hyperparameters()
    n = num_train+num_val
    self.X = torch.randn(n, len(w))
    noise = torch.randn(n, 1) * noise
    self.y = torch.matmul(self.X, w.reshape(-1, 1)) + b + noise

In [None]:
data = SyntheticRegressionData(w=torch.tensor([2, -3.4]),
                               b = 4.2)

In [None]:
print(data.__dict__) # Give a dictionay containing all the attributes of 'data' and their value

{'hparams': {'w': tensor([ 2.0000, -3.4000]), 'b': 4.2, 'noise': 0.01, 'num_train': 1000, 'num_val': 1000, 'batch_size': 32}, 'root': '../data', 'num_workers': 4, 'w': tensor([ 2.0000, -3.4000]), 'b': 4.2, 'noise': 0.01, 'num_train': 1000, 'num_val': 1000, 'batch_size': 32, 'X': tensor([[ 0.2826, -0.6326],
        [ 2.1191,  0.7354],
        [-0.2452,  0.0101],
        ...,
        [-0.1252, -0.8679],
        [-1.3451,  0.5519],
        [ 1.0368,  1.7343]]), 'y': tensor([[ 6.9221],
        [ 5.9274],
        [ 3.6788],
        ...,
        [ 6.9044],
        [-0.3621],
        [ 0.3878]])}


In [None]:
data.X

tensor([[ 0.2826, -0.6326],
        [ 2.1191,  0.7354],
        [-0.2452,  0.0101],
        ...,
        [-0.1252, -0.8679],
        [-1.3451,  0.5519],
        [ 1.0368,  1.7343]])

In [None]:
print(data.X.shape)

torch.Size([2000, 2])


In [None]:
data.y

tensor([[ 6.9221],
        [ 5.9274],
        [ 3.6788],
        ...,
        [ 6.9044],
        [-0.3621],
        [ 0.3878]])

In [None]:
print(data.y.shape)

torch.Size([2000, 1])


In [None]:
# First entry of our dataset
print(f'features : {data.X[0]}')
print(f'label : {data.y[0]}')

features : tensor([ 0.2826, -0.6326])
label : tensor([6.9221])


In [None]:
def add_to_class(Class):
  def wrapper(obj):
    setattr(Class, obj.__name__, obj)
  return wrapper

In [None]:
a = list(range(0, 10))
print("Before shuffling", a)
random.shuffle(a)
print("after shuffling", a)

Before shuffling [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
after shuffling [0, 9, 7, 3, 4, 6, 8, 2, 1, 5]


In [None]:
a = torch.tensor([1, 2, 5, 6])
data.X[a]

tensor([[ 2.1191,  0.7354],
        [-0.2452,  0.0101],
        [-2.4540, -0.8380],
        [ 0.2287,  0.8777]])

In [None]:
@add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
  if train:
    indices = list(range(0, self.num_train))
    random.shuffle(indices) # examples are read in random order
  else:
    indice = list(range(self.num_train, self.num_train + self.num_val))
  
  for i in range(0, len(indices), self.batch_size):
    batch_indices = torch.tensor(indices[i : i + self.batch_size])
    yield self.X[batch_indices], self.y[batch_indices]

### Iterators

In [None]:
string = 'ARNAV'
ch_iterator = iter(string)

print(next(ch_iterator))
print(next(ch_iterator))
print(next(ch_iterator))
print(next(ch_iterator))

A
R
N
A


In [None]:
X, y = next(iter(data.get_dataloader(True)))
print(f"X.shape : {X.shape}")
print(f"y.shape : {y.shape}")

X.shape : torch.Size([32, 2])
y.shape : torch.Size([32, 1])


In [None]:
a = "Hello world"
obj = slice(0, None)
print(a[obj])

Hello world


In [None]:
t = torch.tensor([1, 2, 3, 4])
b = tuple(a for a in t)
b

(tensor(1), tensor(2), tensor(3), tensor(4))

In [None]:
torch.utils.data??

In [None]:
@add_to_class(d2l.DataModule)
def get_tensorloader(self, tensors, train, indice=slice(0, None)):
  tensors = tuple(a[indice] for a in tensors)
  dataset = torch.utils.data.TensorDataset(*tensors)
  return torch.utils.data.DataLoader(dataset, self.batch_size,
                                     shuffle=train)
  
@add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
  i = slice(0, self.num_train) if train else slice(self.num_train, None)
  return self.get_tensorloader((self.X, self.y), train, i)

In [None]:
X, y = next(iter(data.train_dataloader()))
print(f"X.shape : {X.shape}")
print(f"y.shape : {y.shape}")

X.shape : torch.Size([32, 2])
y.shape : torch.Size([32, 1])


In [None]:
# no_of_batches
len(data.train_dataloader())

32

# Ex 3.3

## Q1:

can use math.ceil for the number of iterations in each epoch

## Q3:

In [None]:
class GiveData(d2l.DataModule):
  def __init__(self, w, b, noise=0.01, batch_size=25):
    self.save_hyperparameters(ignore = ['w'])
    self.w = w.reshape(-1,1)

  def data_loader(self):
    X = torch.randn(self.batch_size, len(self.w))
    noise = torch.randn(self.batch_size, 1) * self.noise
    y = torch.mm(X, self.w) + self.b + noise
    return X, y

In [None]:
inst = GiveData(w = torch.tensor([1., -2.]),
               b = 0.76)

In [None]:
X1, y1 = inst.data_loader()

In [None]:
print(f"X.shape : {X1.shape}")
print(f"y.shape : {y1.shape}")

X.shape : torch.Size([25, 2])
y.shape : torch.Size([25, 1])


In [None]:
X2, y2 = inst.data_loader()
print(sum(X2 == X1), sum(y2 == y1))

tensor([0, 0]) tensor([0])


## Q4:

In [4]:
class GiveFixedData(d2l.DataModule):
  def __init__(self, w, b, noise=0.01, batch_size=25):
    self.save_hyperparameters(ignore = ['w'])
    self.w = w.reshape(-1,1)

  def data_loader(self):
    torch.manual_seed(0)
    X = torch.randn(self.batch_size, len(self.w))
    noise = torch.randn(self.batch_size, 1) * self.noise
    y = torch.mm(X, self.w) + self.b + noise
    return X, y

In [None]:
inst = GiveFixedData(w = torch.tensor([1.78, 3.98]),
                     b = -9.67)

X1, y1 = inst.data_loader()
X2, y2 = inst.data_loader()

In [None]:
print(sum(X1 == X2) / X1.shape[0])

tensor([1., 1.])


In [None]:
print(sum(y1 == y2) / y1.shape[0])

tensor([1.])


In [5]:
len(torch.ones(2, 3)) # given the dimensions across the first axis

2

In [7]:
torch.randn??

In [12]:
torch.manual_seed(0)
class DataGen(d2l.DataModule):
  def __init__(self, w, b, noise=0.01, batch_size=25):
    self.save_hyperparameters(ignore=['w'])
    self.w = w.reshape(-1, 1)

  def give_data(self):
    X = torch.randn(self.batch_size, len(self.w), )
    noise = torch.randn(self.batch_size, 1) * self.noise
    y = torch.mm(X, self.w) + self.b + noise
    return X, y

In [13]:
inst = DataGen(w = torch.tensor([1.78, 3.98]),
                     b = -9.67)

X1, y1 = inst.give_data()
X2, y2 = inst.give_data()

In [14]:
print(sum(X1 == X2) / X1.shape[0])
print(sum(y1 == y2) / y1.shape[0])

tensor([0., 0.])
tensor([0.])
