##  TensorFlow/Keras

In [1]:
!pip install torch



In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from torch.utils.data import DataLoader, TensorDataset


In [3]:
iris = load_iris()
X, y = iris.data, iris.target

In [11]:
y[:15]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
X[:15, :]

array([[-0.90068117,  1.01900435, -1.34022653, -1.3154443 ],
       [-1.14301691, -0.13197948, -1.34022653, -1.3154443 ],
       [-1.38535265,  0.32841405, -1.39706395, -1.3154443 ],
       [-1.50652052,  0.09821729, -1.2833891 , -1.3154443 ],
       [-1.02184904,  1.24920112, -1.34022653, -1.3154443 ],
       [-0.53717756,  1.93979142, -1.16971425, -1.05217993],
       [-1.50652052,  0.78880759, -1.34022653, -1.18381211],
       [-1.02184904,  0.78880759, -1.2833891 , -1.3154443 ],
       [-1.74885626, -0.36217625, -1.34022653, -1.3154443 ],
       [-1.14301691,  0.09821729, -1.2833891 , -1.44707648],
       [-0.53717756,  1.47939788, -1.2833891 , -1.3154443 ],
       [-1.26418478,  0.78880759, -1.22655167, -1.3154443 ],
       [-1.26418478, -0.13197948, -1.34022653, -1.44707648],
       [-1.87002413, -0.13197948, -1.51073881, -1.44707648],
       [-0.05250608,  2.16998818, -1.45390138, -1.3154443 ]])

In [4]:
scaler = StandardScaler()

In [5]:
help(StandardScaler)

Help on class StandardScaler in module sklearn.preprocessing._data:

class StandardScaler(sklearn.base.OneToOneFeatureMixin, sklearn.base.TransformerMixin, sklearn.base.BaseEstimator)
 |  StandardScaler(*, copy=True, with_mean=True, with_std=True)
 |  
 |  Standardize features by removing the mean and scaling to unit variance.
 |  
 |  The standard score of a sample `x` is calculated as:
 |  
 |      z = (x - u) / s
 |  
 |  where `u` is the mean of the training samples or zero if `with_mean=False`,
 |  and `s` is the standard deviation of the training samples or one if
 |  `with_std=False`.
 |  
 |  Centering and scaling happen independently on each feature by computing
 |  the relevant statistics on the samples in the training set. Mean and
 |  standard deviation are then stored to be used on later data using
 |  :meth:`transform`.
 |  
 |  Standardization of a dataset is a common requirement for many
 |  machine learning estimators: they might behave badly if the
 |  individual feat

In [6]:
X = scaler.fit_transform(X)

In [7]:
help(scaler.fit_transform)

Help on method fit_transform in module sklearn.base:

fit_transform(X, y=None, **fit_params) method of sklearn.preprocessing._data.StandardScaler instance
    Fit to data, then transform it.
    
    Fits transformer to `X` and `y` with optional parameters `fit_params`
    and returns a transformed version of `X`.
    
    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Input samples.
    
    y :  array-like of shape (n_samples,) or (n_samples, n_outputs),                 default=None
        Target values (None for unsupervised transformations).
    
    **fit_params : dict
        Additional fit parameters.
    
    Returns
    -------
    X_new : ndarray array of shape (n_samples, n_features_new)
        Transformed array.



In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

**Converting Data to PyTorch Tensors**

In [9]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

**Creating DataLoader**

In [13]:
help(TensorDataset)

Help on class TensorDataset in module torch.utils.data.dataset:

class TensorDataset(Dataset)
 |  TensorDataset(*tensors: torch.Tensor) -> None
 |  
 |  Dataset wrapping tensors.
 |  
 |  Each sample will be retrieved by indexing tensors along the first dimension.
 |  
 |  Args:
 |      *tensors (Tensor): tensors that have the same size of the first dimension.
 |  
 |  Method resolution order:
 |      TensorDataset
 |      Dataset
 |      typing.Generic
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __getitem__(self, index)
 |  
 |  __init__(self, *tensors: torch.Tensor) -> None
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __len__(self)
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __annotations__ = {'tensors': typing.Tuple[torch.Tensor, ...]}
 |  
 |  __orig_bases__ = (torch.utils.data.dataset.Dataset[typing.Tuple[torch....
 |  
 |  __parameters__ 

In [14]:
help(DataLoader)

Help on class DataLoader in module torch.utils.data.dataloader:

class DataLoader(typing.Generic)
 |  DataLoader(dataset: torch.utils.data.dataset.Dataset[+T_co], batch_size: Optional[int] = 1, shuffle: Optional[bool] = None, sampler: Union[torch.utils.data.sampler.Sampler, Iterable, NoneType] = None, batch_sampler: Union[torch.utils.data.sampler.Sampler[List], Iterable[List], NoneType] = None, num_workers: int = 0, collate_fn: Optional[Callable[[List[~T]], Any]] = None, pin_memory: bool = False, drop_last: bool = False, timeout: float = 0, worker_init_fn: Optional[Callable[[int], NoneType]] = None, multiprocessing_context=None, generator=None, *, prefetch_factor: Optional[int] = None, persistent_workers: bool = False, pin_memory_device: str = '')
 |  
 |  Data loader. Combines a dataset and a sampler, and provides an iterable over
 |  the given dataset.
 |  
 |  The :class:`~torch.utils.data.DataLoader` supports both map-style and
 |  iterable-style datasets with single- or multi-proc

TensorDataset is used to combine the input features and labels into a single dataset. DataLoader is then created to handle batches during training.

In [12]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

**Defining the Mode**

Certainly! Let's break down the `__init__` method of the `IrisClassifier` class:

```python
def __init__(self):
    super(IrisClassifier, self).__init__()
    self.fc1 = nn.Linear(4, 10)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(10, 3)
```

1. **`super(IrisClassifier, self).__init__():`**
   - This line calls the constructor of the parent class (`nn.Module`). It's a standard practice to include this line when defining a custom neural network class in PyTorch.

2. **`self.fc1 = nn.Linear(4, 10)`**
   - `self.fc1` is an instance variable representing the first fully connected (linear) layer of the neural network.
   - `nn.Linear(4, 10)` specifies that this layer has an input size of 4 and an output size of 10. In the context of the Iris dataset, which has 4 features, this corresponds to mapping the input features to a hidden layer with 10 neurons.

3. **`self.relu = nn.ReLU()`**
   - `self.relu` represents the Rectified Linear Unit (ReLU) activation function, which is applied element-wise to the output of `self.fc1`. ReLU introduces non-linearity to the model by replacing all negative values in the tensor with zero.

4. **`self.fc2 = nn.Linear(10, 3)`**
   - `self.fc2` is the second fully connected layer, representing the output layer of the neural network.
   - `nn.Linear(10, 3)` specifies that this layer has an input size of 10 (output size of the previous layer `self.fc1`) and an output size of 3. This is suitable for the Iris dataset, where we want to classify into three classes.

In summary, the architecture of this neural network is as follows:
- Input layer: 4 neurons (features of the Iris dataset).
- Hidden layer (`self.fc1`): 10 neurons with ReLU activation.
- Output layer (`self.fc2`): 3 neurons representing the three classes of the Iris dataset.

This architecture is a simple feedforward neural network suitable for the classification task on the Iris dataset.

In [17]:
class IrisClassifier(nn.Module):
    def __init__(self):
        super(IrisClassifier, self).__init__()
        self.fc1 = nn.Linear(4, 10)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(10, 3)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model = IrisClassifier()


**Defining Loss Function and Optimizer**

In [19]:
help(nn.CrossEntropyLoss)

Help on class CrossEntropyLoss in module torch.nn.modules.loss:

class CrossEntropyLoss(_WeightedLoss)
 |  CrossEntropyLoss(weight: Optional[torch.Tensor] = None, size_average=None, ignore_index: int = -100, reduce=None, reduction: str = 'mean', label_smoothing: float = 0.0) -> None
 |  
 |  This criterion computes the cross entropy loss between input logits
 |  and target.
 |  
 |  It is useful when training a classification problem with `C` classes.
 |  If provided, the optional argument :attr:`weight` should be a 1D `Tensor`
 |  assigning weight to each of the classes.
 |  This is particularly useful when you have an unbalanced training set.
 |  
 |  The `input` is expected to contain the unnormalized logits for each class (which do `not` need
 |  to be positive or sum to 1, in general).
 |  `input` has to be a Tensor of size :math:`(C)` for unbatched input,
 |  :math:`(minibatch, C)` or :math:`(minibatch, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1` for the
 |  `K`-dimensional cas

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:
help(optim.Adam)

Help on class Adam in module torch.optim.adam:

class Adam(torch.optim.optimizer.Optimizer)
 |  Adam(params: Union[Iterable[torch.Tensor], Iterable[Dict[str, Any]]], lr: Union[float, torch.Tensor] = 0.001, betas: Tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0, amsgrad: bool = False, *, foreach: Optional[bool] = None, maximize: bool = False, capturable: bool = False, differentiable: bool = False, fused: Optional[bool] = None)
 |  
 |  Implements Adam algorithm.
 |  
 |  .. math::
 |     \begin{aligned}
 |          &\rule{110mm}{0.4pt}                                                                 \\
 |          &\textbf{input}      : \gamma \text{ (lr)}, \beta_1, \beta_2
 |              \text{ (betas)},\theta_0 \text{ (params)},f(\theta) \text{ (objective)}          \\
 |          &\hspace{13mm}      \lambda \text{ (weight decay)},  \: \textit{amsgrad},
 |              \:\textit{maximize}                                                              \\
 

In [34]:
for inputs, labels in train_loader:
  print(inputs)
  print(labels)

tensor([[ 7.9567e-01, -5.9237e-01,  4.7857e-01,  3.9577e-01],
        [-5.2506e-02, -8.2257e-01,  1.9438e-01, -2.6239e-01],
        [-9.0068e-01,  1.7096e+00, -1.2266e+00, -1.3154e+00],
        [-1.0218e+00,  1.0190e+00, -1.3971e+00, -1.1838e+00],
        [ 1.8983e-01, -1.9736e+00,  1.3755e-01, -2.6239e-01],
        [ 1.6438e+00,  3.2841e-01,  1.2743e+00,  7.9067e-01],
        [-2.9484e-01, -8.2257e-01,  2.5122e-01,  1.3251e-01],
        [-1.7489e+00, -1.3198e-01, -1.3971e+00, -1.3154e+00],
        [-1.7367e-01, -3.6218e-01,  2.5122e-01,  1.3251e-01],
        [-7.7951e-01, -8.2257e-01,  8.0709e-02,  2.6414e-01],
        [-5.3718e-01, -1.3198e-01,  4.2173e-01,  3.9577e-01],
        [-2.9484e-01, -1.3198e-01,  1.9438e-01,  1.3251e-01],
        [ 9.1684e-01, -3.6218e-01,  4.7857e-01,  1.3251e-01],
        [-5.2506e-02,  2.1700e+00, -1.4539e+00, -1.3154e+00],
        [ 3.1100e-01, -1.3198e-01,  4.7857e-01,  2.6414e-01],
        [-1.0218e+00, -2.4339e+00, -1.4664e-01, -2.6239e-01],
        

In [35]:
help(optimizer.zero_grad)

Help on method zero_grad in module torch.optim.optimizer:

zero_grad(set_to_none: bool = True) -> None method of torch.optim.adam.Adam instance
    Resets the gradients of all optimized :class:`torch.Tensor` s.
    
    Args:
        set_to_none (bool): instead of setting to zero, set the grads to None.
            This will in general have lower memory footprint, and can modestly improve performance.
            However, it changes certain behaviors. For example:
            1. When the user tries to access a gradient and perform manual ops on it,
            a None attribute or a Tensor full of 0s will behave differently.
            2. If the user requests ``zero_grad(set_to_none=True)`` followed by a backward pass, ``.grad``\ s
            are guaranteed to be None for params that did not receive a gradient.
            3. ``torch.optim`` optimizers have a different behavior if the gradient is 0 or None
            (in one case it does the step with a gradient of 0 and in the other

In [37]:
help(loss.backward)

Help on method backward in module torch._tensor:

backward(gradient=None, retain_graph=None, create_graph=False, inputs=None) method of torch.Tensor instance
    Computes the gradient of current tensor wrt graph leaves.
    
    The graph is differentiated using the chain rule. If the tensor is
    non-scalar (i.e. its data has more than one element) and requires
    gradient, the function additionally requires specifying ``gradient``.
    It should be a tensor of matching type and location, that contains
    the gradient of the differentiated function w.r.t. ``self``.
    
    This function accumulates gradients in the leaves - you might need to zero
    ``.grad`` attributes or set them to ``None`` before calling it.
    See :ref:`Default gradient layouts<default-grad-layouts>`
    for details on the memory layout of accumulated gradients.
    
    .. note::
    
        If you run any forward ops, create ``gradient``, and/or call ``backward``
        in a user-specified CUDA stream c

In [38]:
help(optimizer.step)

Help on method step in module torch.optim.adam:

step(closure=None) method of torch.optim.adam.Adam instance
    Performs a single optimization step.
    
    Args:
        closure (Callable, optional): A closure that reevaluates the model
            and returns the loss.



In [36]:
epochs = 50
for epoch in range(epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


In [39]:
with torch.no_grad():
    model.eval()
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    accuracy = torch.sum(predicted == y_test_tensor).item() / len(y_test_tensor)
    print(f'Test Accuracy: {accuracy}')


Test Accuracy: 0.8444444444444444


In [50]:
from torchsummary import summary

In [57]:

# Use torchsummary to print the model summary
help(summary)

Help on function summary in module torchsummary.torchsummary:

summary(model, input_size, batch_size=-1, device='cuda')



In [67]:
summary(model, (4,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 10]              50
              ReLU-2                   [-1, 10]               0
            Linear-3                    [-1, 3]              33
Total params: 83
Trainable params: 83
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [66]:
model


IrisClassifier(
  (fc1): Linear(in_features=4, out_features=10, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=10, out_features=3, bias=True)
)

In [73]:
!pip install torchviz graphviz


Collecting torchviz
  Downloading torchviz-0.0.2.tar.gz (4.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
  Created wheel for torchviz: filename=torchviz-0.0.2-py3-none-any.whl size=4131 sha256=5ba99b8903957ae8ef7d81bf709a932cfa65e9bf712372fa0afc3bfee4c400ee
  Stored in directory: /root/.cache/pip/wheels/4c/97/88/a02973217949e0db0c9f4346d154085f4725f99c4f15a87094
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.2


In [80]:
from torchviz import make_dot

# Dummy input with the correct shape
dummy_input = torch.randn((1, 4))

# Visualize the computation graph
output_file_path = "model_graph"
dot = make_dot(model(dummy_input), params=dict(model.named_parameters()))
dot.render(output_file_path, format="png", cleanup=True)

dot.format = 'png'
dot.render(filename=output_file_path, cleanup=True)
dot.view()


'model_graph.png'