In [1]:
%matplotlib inline
import random
import tensorflow as tf
from d2l import tensorflow as d2l

2024-02-15 11:07:12.691247: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-15 11:07:12.693041: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-15 11:07:12.717092: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-15 11:07:12.717118: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-15 11:07:12.718040: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

### 3.3.1. Generating the Dataset

In [12]:
class SyntheticRegressionData(d2l.DataModule): #@save
    """ Synthetic data for linear regression"""
    def __init__(self, w, b, noise = 0.01, num_train = 1000, num_val = 1000, batch_size = 32):
        super().__init__()
        self.save_hyperparameters()
        n = num_train + num_val
        self.X = tf.random.normal((n, w.shape[0]))
        noise = tf.random.normal((n,1)) * noise
        self.y = tf.matmul(self.X, tf.reshape(w,(-1,1))) + b + noise

In [13]:
data = SyntheticRegressionData(w=tf.constant([2, -3.4]), b=4.2)

In [14]:
print("features:", data.X[0], "\nlabel:", data.y[0])

features: tf.Tensor([0.07825372 2.3389554 ], shape=(2,), dtype=float32) 
label: tf.Tensor([-3.5949047], shape=(1,), dtype=float32)


In [15]:
print (data.X[0:10])

tf.Tensor(
[[ 0.07825372  2.3389554 ]
 [-0.2758705   0.6148885 ]
 [-2.131023    0.03754052]
 [-0.92132515  1.5194995 ]
 [ 0.6690425   0.27936545]
 [-1.2986526   0.07693888]
 [-0.11273739  0.2510018 ]
 [-2.3880966  -1.8251023 ]
 [-1.1890905   0.33080885]
 [-1.6707363  -0.18904477]], shape=(10, 2), dtype=float32)


### 3.3.2. Reading the Dataset

In [16]:
@d2l.add_to_class(SyntheticRegressionData)
def get_dataloader(self, train):
    if train:
        indices = list(range(0, self.num_train))
        # The exemples are read in random order
        random.shuffle(indices)
    else:
        indices = list(range(self.num_train, self.num_train + self.num_val))
    for i in range(0,len(indices), self.batch_size):
        j = tf.constant(indices[i:i+self.batch_size])
        yield tf.gather(self.X, j), tf.gather(self.y,j)

In [17]:
X,y = next(iter(data.train_dataloader()))
print(f'X shape: {X.shape} \ny shape: {y.shape}')

X shape: (32, 2) 
y shape: (32, 1)


### 3.3.3. Concise Implementation of the Data Loader

In [18]:
@d2l.add_to_class(d2l.Module) #@save
def get_tensorloader(self, tensors, train, indices=slice(0,None)):
    tensors = tuple(a[indices] for a in tensors)
    shuffle_buffer = tensors[0].shape[0] if train else 1
    return tf.data.Dataset.from_tensor_slices(tensors).shuffle(buffer_size=shuffle_buffer).batch(self.batch_size)

@d2l.add_to_class(SyntheticRegressionData) #@save
def get_dataloader(self, train): 
    i = slice(0,self.num_train) if train else slice(self.num_train, None)
    return self.get_tensorloader((self.X, self.y), train, i)

In [19]:
X, y = next(iter(data.train_dataloader()))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: (32, 2) 
y shape: (32, 1)


In [20]:
len(data.train_dataloader())

32