In [None]:
import torch
import numpy as np
import pandas as pd

In [None]:
# IMPORT DATA INTO TENSORS

# From Python lists
x = torch.tensor([1, 2, 3])
print("TENSOR FROM PYTHON LISTS:", x)

# From NumPy array
numpy_array = np.array([[1, 2, 3], [4, 5, 6]])
torch_tensor_from_numpy = torch.from_numpy(numpy_array)
print("TENSOR FROM NUMPY:\n\n", torch_tensor_from_numpy)

# From Pandas DataFrame
df = pd.read_csv('./data.csv')
all_values = df.values # Extract the data as a NumPy array from the DataFrame
tensor_from_df = torch.tensor(all_values)

# INITIALIZE TENSORS
zeros = torch.zeros(2, 3)
ones = torch.ones(2, 3)
random = torch.rand(2, 3)
range_tensor = torch.arange(0, 10, step=1)


In [None]:
# RESHAPE TENSORS
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])
print("ORIGINAL TENSOR:\n\n", x)
print("\nTENSOR SHAPE:", x.shape)

expanded = x.unsqueeze(0)  # Add dimension at index 0
squeezed = expanded.squeeze()   # Remove dimension

reshaped = x.reshape(3, 2)

transposed = x.transpose(0, 1)


In [None]:
# COMBINE TENSORS
tensor_a = torch.tensor([[1, 2],
                         [3, 4]])
tensor_b = torch.tensor([[5, 6],
                         [7, 8]])

# Concatenate along columns (dim=1)
concatenated_tensors = torch.cat((tensor_a, tensor_b), dim=1)


In [None]:
# INDEX AND SLICE TENSORS

# Create a 3x4 tensor
x = torch.tensor([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12]
])
print("ORIGINAL TENSOR:\n\n", x)

# Get a single element at row 1, column 2
single_element_tensor = x[1, 2]

# Get the entire second row (index 1)
second_row = x[1]

# Last row
last_row = x[-1]


# Get the first two rows
first_two_rows = x[0:2]

# Get the third column (index 2) of all rows
third_column = x[:, 2]

# Every other column
every_other_col = x[:, ::2]

# Last column
last_col = x[:, -1]

In [None]:
# ADVANCED INDEXING 
# BOOLEAN MASKS

# For more complex data selection, such as filtering your dataset based on one or more conditions, you can use advanced indexing techniques.

# Boolean Masking**: Using a boolean tensor to select elements that meet a certain condition (e.g.,Â `x[x > 5]`).

x = torch.tensor([
    [1, 2, 3, 4],
    [5, 6, 7, 8],
    [9, 10, 11, 12]
])

# Boolean indexing using logical conditions
mask = x > 6
print("MASK (VALUES > 6):\n\n", mask, "\n")


In [None]:
# TENSOR MATH & LOGICAL OPERATIONS: BASIC

a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
print("TENSOR A:", a)
print("TENSOR B", b)
print("-" * 60)

# Element-wise addition
element_add = a + b

# Element-wise multiplication
element_mul = a * b

# Dot product
dot_product = torch.matmul(a, b)

# Broadcasting
# The automatic expansion of smaller tensors to match the shape of larger tensors during arithmetic operations.
# Broadcasting allows operations between tensors with compatible shapes, even if they don't have the exact same dimensions.

c = torch.tensor([1, 2, 3])
d = torch.tensor([[1],
                 [2],
                 [3]])

# Apply broadcasting
e = c + d  # c is broadcasted to match the shape of d


TENSOR A: tensor([1, 2, 3])
TENSOR B tensor([4, 5, 6])
------------------------------------------------------------
TENSOR A: tensor([1, 2, 3])
SHAPE: torch.Size([3])

TENSOR B

 tensor([[1],
        [2],
        [3]])

SHAPE: torch.Size([3, 1])


In [None]:
# TENSOR MATH & LOGICAL OPERATIONS: COMPARISON

temperatures = torch.tensor([20, 35, 19, 35, 42])
print("TEMPERATURES:", temperatures)
print("-" * 50)

### Comparison Operators (>, <, ==)

# Use '>' (greater than) to find temperatures above 30
is_hot = temperatures > 30

# Use '<=' (less than or equal to) to find temperatures 20 or below
is_cool = temperatures <= 20

# Use '==' (equal to) to find temperatures exactly equal to 35
is_35_degrees = temperatures == 35

print("\nHOT (> 30 DEGREES):", is_hot)
print("COOL (<= 20 DEGREES):", is_cool)
print("EXACTLY 35 DEGREES:", is_35_degrees, "\n")

In [None]:
# TENSOR MATH & LOGICAL OPERATIONS: OPERATIONS

is_morning = torch.tensor([True, False, False, True])
is_raining = torch.tensor([False, False, True, True])
print("IS MORNING:", is_morning)
print("IS RAINING:", is_raining)

### Logical Operators (&, |)

# Use '&' (AND) to find when it's both morning and raining
morning_and_raining = (is_morning & is_raining)

# Use '|' (OR) to find when it's either morning or raining
morning_or_raining = is_morning | is_raining

print("\nMORNING & (AND) RAINING:", morning_and_raining)
print("MORNING | (OR) RAINING:", morning_or_raining)

In [None]:
# TENSOR MATH & LOGICAL OPERATIONS: STATISTICS
data = torch.tensor([10.0, 20.0, 30.0, 40.0, 50.0])
print("DATA:", data)

# mean
data_mean = data.mean()

# standard deviation
data_std = data.std()

## 5 - Optional Exercises

You've now covered the essential tools for working with tensors in PyTorch. Theory provides the map, but hands-on practice is what builds true confidence and skill. The following optional exercises are your opportunity to apply what you have learned to practical scenarios, from analyzing sales data to engineering new features for a machine learning model. This is where the concepts truly come to life, so dive in and put your new knowledge to the test!

### Exercise 1: Analyzing Monthly Sales Data

You're a data analyst at an e-commerce company. You've been given a tensor representing the monthly sales of three different products over a period of four months. Your task is to extract meaningful insights from this data.

The tensor `sales_data` is structured as follows:

* **Rows** represent the **products** (Product A, Product B, Product C).

* **Columns** represent the **months** (Jan, Feb, Mar, Apr).

**Your goals are**:

1. Calculate the total sales for **Product B** (the second row).
2. Identify which months had sales **greater than 130** for **Product C** (the third row) using boolean masking.
3. Extract the sales data for all products for the months of **Feb and Mar** (the middle two columns).

<br>

<details>
<summary><span style="color:green;"><strong>Solution (Click here to expand)</strong></span></summary>

```python

In [7]:
# Sales data for 3 products over 4 months
sales_data = torch.tensor([[100, 120, 130, 110],   # Product A
                           [ 90,  95, 105, 125],   # Product B
                           [140, 115, 120, 150]    # Product C
                          ], dtype=torch.float32)

print("ORIGINAL SALES DATA:\n\n", sales_data)
print("-" * 45)

# 1. Calculate total sales for Product B.
total_sales_product_b = torch.sum(sales_data[1])
print("TOTAL SALES FOR PRODUCT B:", total_sales_product_b.item())

# 2. Find months where sales for Product C were > 130.
high_sales_mask_product_c = sales_data[2] > 130
sales_data[2,high_sales_mask_product_c]

# 3. Get sales for Feb and Mar for all products.
sales_data[:, 1:3]

ORIGINAL SALES DATA:

 tensor([[100., 120., 130., 110.],
        [ 90.,  95., 105., 125.],
        [140., 115., 120., 150.]])
---------------------------------------------
TOTAL SALES FOR PRODUCT B: 415.0


tensor([[120., 130.],
        [ 95., 105.],
        [115., 120.]])

### Exercise 2: Image Batch Transformation

You're working on a computer vision model and have a batch of 4 grayscale images, each of size 3x3 pixels. The data is currently in a tensor with the shape `[4, 3, 3]`, which represents `[batch_size, height, width]`.

For processing with certain deep learning frameworks, you need to transform this data into the `[batch_size, channels, height, width]` format. Since the images are grayscale, **you'll need to**:

1. Add a new dimension of size 1 at index 1 to represent the color channel.
2. After adding the channel, you realize the model expects the shape `[batch_size, height, width, channels]`. Transpose the tensor to swap the channel dimension with the last dimension.

<br>

<details>
<summary><span style="color:green;"><strong>Solution (Click here to expand)</strong></span></summary>

```python
### START CODE HERE ###

# 1. Add a channel dimension at index 1.
image_batch_with_channel = image_batch.unsqueeze(1)

# 2. Transpose the tensor to move the channel dimension to the end.
# Swap dimension 1 (channels) with dimension 3 (the last one).
image_batch_transposed = image_batch_with_channel.transpose(1, 3)

### END CODE HERE ###
```

In [12]:
# A batch of 4 grayscale images, each 3x3
image_batch = torch.rand(4, 3, 3)

print("\nORIGINAL IMAGE BATCH:\n\n", image_batch)
print("-" * 45)

# 1. Add a channel dimension to the image batch at index 1
image_batch_with_channel = image_batch.unsqueeze(1)
print("\nIMAGE BATCH WITH CHANNEL DIMENSION ADDED:\n\n", image_batch_with_channel)

#2. Transpose to move channel dimension to the end
image_batch_transposed = image_batch_with_channel.transpose(1,3)
print("\nIMAGE BATCH WITH CHANNEL DIMENSION TRANSPOSED TO THE END:\n\n", image_batch_transposed)


ORIGINAL IMAGE BATCH:

 tensor([[[0.3273, 0.7054, 0.2246],
         [0.9866, 0.0020, 0.8316],
         [0.1281, 0.5903, 0.4588]],

        [[0.5584, 0.1568, 0.8563],
         [0.9530, 0.0887, 0.2067],
         [0.2157, 0.3699, 0.5252]],

        [[0.0239, 0.3083, 0.9069],
         [0.7045, 0.2185, 0.3443],
         [0.1641, 0.1923, 0.3517]],

        [[0.8429, 0.9279, 0.8975],
         [0.1355, 0.3286, 0.9350],
         [0.5923, 0.2866, 0.9759]]])
---------------------------------------------

IMAGE BATCH WITH CHANNEL DIMENSION ADDED:

 tensor([[[[0.3273, 0.7054, 0.2246],
          [0.9866, 0.0020, 0.8316],
          [0.1281, 0.5903, 0.4588]]],


        [[[0.5584, 0.1568, 0.8563],
          [0.9530, 0.0887, 0.2067],
          [0.2157, 0.3699, 0.5252]]],


        [[[0.0239, 0.3083, 0.9069],
          [0.7045, 0.2185, 0.3443],
          [0.1641, 0.1923, 0.3517]]],


        [[[0.8429, 0.9279, 0.8975],
          [0.1355, 0.3286, 0.9350],
          [0.5923, 0.2866, 0.9759]]]])

IMAGE BA

### Exercise 3: Combining and Weighting Sensor Data

You're building an environment monitoring system that uses two sensors: one for temperature and one for humidity. You receive data from these sensors as two separate 1D tensors.

**Your task is to**:

1. **Concatenate** the two tensors into a single `2x5` tensor, where the first row is temperature data and the second is humidity data.
2. Create a `weights` tensor `torch.tensor([0.6, 0.4])`.
3. Use **broadcasting and element-wise multiplication** to apply these weights to the combined sensor data. The temperature data should be multiplied by 0.6 and the humidity data by 0.4.
4. Finally, calculate the **weighted average** for each time step by **summing** the weighted values along `dim=0` and **dividing** by the sum of the weights.

<br>

<details>
<summary><span style="color:green;"><strong>Solution (Click here to expand)</strong></span></summary>

In [22]:
# Sensor readings (5 time steps)
temperature = torch.tensor([22.5, 23.1, 21.9, 22.8, 23.5])
humidity = torch.tensor([55.2, 56.4, 54.8, 57.1, 56.8])

print("TEMPERATURE DATA: ", temperature)
print("HUMIDITY DATA:    ", humidity)
print("-" * 45)

### START CODE HERE ###

# 1. Concatenate the two tensors.
# Note: You need to unsqueeze them first to stack them vertically.
combined_data = temperature.unsqueeze(0) + humidity.unsqueeze(0)
print("combined_data:\n", combined_data)

# 2. Create the weights tensor.
weights = torch.tensor([0.6, 0.4])


# 3. Apply weights using broadcasting.
# You need to reshape weights to [2, 1] to broadcast across columns.
weighted_data = combined_data * weights.unsqueeze(1)
print("weighted_data:\n", weighted_data)

# 4. Calculate the weighted average for each time step.
#    (A true average = weighted sum / sum of weights)
weighted_sum = torch.sum(weighted_data, dim=0)
weighted_average = weighted_sum / torch.sum(weights)
print("weighted_sum:\n", weighted_sum)
print("sum of weights:\n", torch.sum(weights))
print("weighted_average:\n", weighted_average)

### END CODE HERE ###

print("\nCOMBINED DATA (2x5):\n\n", combined_data)
print("\nWEIGHTED DATA:\n\n", weighted_data)
print("\nWEIGHTED AVERAGE:", weighted_average)

TEMPERATURE DATA:  tensor([22.5000, 23.1000, 21.9000, 22.8000, 23.5000])
HUMIDITY DATA:     tensor([55.2000, 56.4000, 54.8000, 57.1000, 56.8000])
---------------------------------------------
combined_data:
 tensor([[77.7000, 79.5000, 76.7000, 79.9000, 80.3000]])
weighted_data:
 tensor([[46.6200, 47.7000, 46.0200, 47.9400, 48.1800],
        [31.0800, 31.8000, 30.6800, 31.9600, 32.1200]])
weighted_sum:
 tensor([77.7000, 79.5000, 76.7000, 79.9000, 80.3000])
sum of weights:
 tensor(1.)
weighted_average:
 tensor([77.7000, 79.5000, 76.7000, 79.9000, 80.3000])

COMBINED DATA (2x5):

 tensor([[77.7000, 79.5000, 76.7000, 79.9000, 80.3000]])

WEIGHTED DATA:

 tensor([[46.6200, 47.7000, 46.0200, 47.9400, 48.1800],
        [31.0800, 31.8000, 30.6800, 31.9600, 32.1200]])

WEIGHTED AVERAGE: tensor([77.7000, 79.5000, 76.7000, 79.9000, 80.3000])


### Exercise 4: Feature Engineering for Taxi Fares

You are working with a dataset of taxi trips. You have a tensor, `trip_data`, where each row is a trip and the columns represent **[distance (km), hour_of_day (24h)]**.

**Your goal** is to engineer a new binary feature called `is_rush_hour_long_trip`. This feature should be `True` (or `1`) only if a trip meets **both** of the following criteria:

* It's a **long trip** (distance > 10 km).
* It occurs during a **rush hour** (8-10 AM or 5-7 PM, i.e., `[8, 10)` or `[17, 19)`).

To achieve this, you will need to:

1. **Slice** the `trip_data` tensor to isolate the `distance` and `hour` columns.
2. Use **logical and comparison operators** to create boolean masks for each condition (long trip, morning rush, evening rush).
3. Combine these masks to create the final `is_rush_hour_long_trip` feature.
4. **Reshape** this new 1D feature tensor into a 2D column vector and convert its data type to float so it can be combined with the original data.

<br>

<details>
<summary><span style="color:green;"><strong>Solution (Click here to expand)</strong></span></summary>

```python
### START CODE HERE ###

# 1. Slice the main tensor to get 1D tensors for each feature.
distances = trip_data[:, 0]
hours = trip_data[:, 1]

# 2. Create boolean masks for each condition.
is_long_trip = distances > 10.0
is_morning_rush = (hours >= 8.0) & (hours < 10.0)
is_evening_rush = (hours >= 17.0) & (hours < 19.0)

# 3. Combine masks to identify rush hour long trips.
# A trip is a rush hour long trip if it's (a morning OR evening rush) AND a long trip.
is_rush_hour_long_trip_mask = (is_morning_rush | is_evening_rush) & is_long_trip

# 4. Reshape the new feature into a column vector and cast to float.
new_feature_col = is_rush_hour_long_trip_mask.float().unsqueeze(1)

### END CODE HERE ###
```

In [32]:
# Data for 8 taxi trips: [distance, hour_of_day]
trip_data = torch.tensor([
    [5.3, 7],   # Not rush hour, not long
    [12.1, 9],  # Morning rush, long trip -> RUSH HOUR LONG
    [15.5, 13], # Not rush hour, long trip
    [6.7, 18],  # Evening rush, not long
    [2.4, 20],  # Not rush hour, not long
    [11.8, 17], # Evening rush, long trip -> RUSH HOUR LONG
    [9.0, 9],   # Morning rush, not long
    [14.2, 8]   # Morning rush, long trip -> RUSH HOUR LONG
], dtype=torch.float32)


print("ORIGINAL TRIP DATA (Distance, Hour):\n\n", trip_data)
print("-" * 55)


### START CODE HERE ###

# 1. Slice the main tensor to get 1D tensors for each feature.
distances = trip_data[:, 0]
hours = trip_data[:, 1]
print("distances:\n", distances)
print("hours:\n", hours)

# 2. Create boolean masks for each condition.
is_long_trip = distances > 10.0
is_morning_rush = (hours >= 8.0) & (hours < 10.0)
is_evening_rush = (hours >= 17.0) & (hours < 19.0)

# 3. Combine masks to identify rush hour long trips.
# A trip is a rush hour long trip if it's (a morning OR evening rush) AND a long trip.
is_rush_hour_long_trip_mask = is_long_trip & (is_morning_rush | is_evening_rush)
print("is_rush_hour_long_trip_mask:\n", is_rush_hour_long_trip_mask)

# 4. Reshape the new feature into a column vector and cast to float.
new_feature_col = is_rush_hour_long_trip_mask.float().unsqueeze(1)
print("new_feature_col:\n", new_feature_col)


### END CODE HERE ###

print("\n'IS RUSH HOUR LONG TRIP' MASK: ", is_rush_hour_long_trip_mask)
print("\nNEW FEATURE COLUMN (Reshaped):\n\n", new_feature_col)

# You can now concatenate this new feature to the original data
enhanced_trip_data = torch.cat((trip_data, new_feature_col), dim=1)
print("\nENHANCED DATA (with new feature at the end):\n\n", enhanced_trip_data)

ORIGINAL TRIP DATA (Distance, Hour):

 tensor([[ 5.3000,  7.0000],
        [12.1000,  9.0000],
        [15.5000, 13.0000],
        [ 6.7000, 18.0000],
        [ 2.4000, 20.0000],
        [11.8000, 17.0000],
        [ 9.0000,  9.0000],
        [14.2000,  8.0000]])
-------------------------------------------------------
distances:
 tensor([ 5.3000, 12.1000, 15.5000,  6.7000,  2.4000, 11.8000,  9.0000, 14.2000])
hours:
 tensor([ 7.,  9., 13., 18., 20., 17.,  9.,  8.])
is_rush_hour_long_trip_mask:
 tensor([False,  True, False, False, False,  True, False,  True])
new_feature_col:
 tensor([[0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.]])

'IS RUSH HOUR LONG TRIP' MASK:  tensor([False,  True, False, False, False,  True, False,  True])

NEW FEATURE COLUMN (Reshaped):

 tensor([[0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.]])

ENHANCED DATA (with new feature at the end):

 tensor([[ 5