In [2]:
import os
import torch
import numpy as np

In [2]:
# Read data from CSV file
import pandas as pd
population_data = pd.read_csv('https://raw.githubusercontent.com/anandmishra22/PRML-Spring-2023/main/programmingAssignment/PA1/DATA/population.csv')

# Convert 'Population' column to numeric by removing all commas
population_data['Population'] = population_data['Population'].replace(',', '',regex=True).astype(float)


# Group by 'Country' and calculate the average population for each country's cities
average_population = population_data.groupby('Country')['Population'].mean().reset_index()
print(average_population)

# # Rounding of the population as it can't be a decimal value
# average_population['Population'] = average_population['Population'].round()

# # adding commas in form of thousand seperators
# average_population['Population'] = average_population['Population'].apply('{:,}'.format)

# # printing the average population
# print(average_population)

   Country  Population
0   Brazil   5536847.2
1    China  17815724.0
2  Germany   1795776.6
3    India   9170929.4
4    Japan   4830745.4
5       UK   2418711.6
6      USA   3787755.2


## **Introduction**

In PyTorch, tensors are used to represent the inputs and outputs of a model, as well as the model's parameters.

Similar to NumPy’s ndarrays, tensors offer powerful multidimensional array capabilities, but with the added advantage of being able to run on GPUs and other hardware accelerators. Additionally, tensors and NumPy arrays can frequently share the same underlying memory, which removes the need for data duplication.



### Intialization

In [9]:
# Initialize tensor from list
data=[[1,2],[3,4]]   
x_data=torch.tensor(data)
print(f"Data type of x_data: {type(x_data)} \n")



# Intialize tensor from a numpy array
np_array=np.array(data)
print(f"Data type of np_array: {type(np_array)} \n")
x_np=torch.from_numpy(np_array)
print(f"Data type of x_np: {type(x_np)} \n")


# Intialize tensor from other tensor
x_ones=torch.ones_like(x_data) #retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")


x_rand=torch.rand_like(x_data,dtype=torch.float) #overrides the datatype of x_data 
print(f"Random Tensor: \n {x_rand} \n")


# Initialize tensor from dimesionsality input
shape=(2,3,)
rand_tensor=torch.rand(shape)
ones_tensor=torch.ones(shape)
zeros_tensor=torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")


Data type of x_data: <class 'torch.Tensor'> 

Data type of np_array: <class 'numpy.ndarray'> 

Data type of x_np: <class 'torch.Tensor'> 

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.9841, 0.5495],
        [0.2924, 0.8301]]) 

Random Tensor: 
 tensor([[0.2538, 0.2664, 0.5072],
        [0.5894, 0.0625, 0.1299]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


[[]] : number of inside '[]' decide the shape of the 2nd dimension and number values inside it decide the shape of the 1st dimension

[[[]]]: number of 2d array decide the shape of 3rd dimension and inside 2nd dimension and 1st dimension similar to as above

## Accessing and Manipluating

In [11]:
#attributes of tensor
tensor=torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")



#Indexing and slicing a tensor
tensor=torch.randn(4,4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:,0]}")
print(f"Last column: {tensor[...,-1]}")
tensor[:,1]=0
print(tensor)


# Joining two tensor
t1=torch.cat([tensor,tensor,tensor],dim=1)
print(t1)

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu
First row: tensor([-1.0009, -0.5641,  0.7430, -1.6229])
First column: tensor([-1.0009,  0.4289, -1.5366, -0.9612])
Last column: tensor([-1.6229, -0.4759,  0.8513,  1.7465])
tensor([[-1.0009,  0.0000,  0.7430, -1.6229],
        [ 0.4289,  0.0000,  0.6219, -0.4759],
        [-1.5366,  0.0000,  0.9039,  0.8513],
        [-0.9612,  0.0000, -0.1983,  1.7465]])
tensor([[-1.0009,  0.0000,  0.7430, -1.6229, -1.0009,  0.0000,  0.7430, -1.6229,
         -1.0009,  0.0000,  0.7430, -1.6229],
        [ 0.4289,  0.0000,  0.6219, -0.4759,  0.4289,  0.0000,  0.6219, -0.4759,
          0.4289,  0.0000,  0.6219, -0.4759],
        [-1.5366,  0.0000,  0.9039,  0.8513, -1.5366,  0.0000,  0.9039,  0.8513,
         -1.5366,  0.0000,  0.9039,  0.8513],
        [-0.9612,  0.0000, -0.1983,  1.7465, -0.9612,  0.0000, -0.1983,  1.7465,
         -0.9612,  0.0000, -0.1983,  1.7465]])


## Linear vs Embedding

In [None]:
import torch.nn as nn


X_train=np.array([[1,0,1,0],[0,0,1,1], [1,1,1,0]])

row=X_train[0]
print(row.shape)

w_linear=nn.Linear(4,3,bias=False)
print(w_linear.weight)

print(w_linear(torch.FloatTensor(row)))

w_embedding=nn.Embedding(4,3).from_pretrained(w_linear.weight.T)
print(w_embedding.weight)

sparse_row=torch.tensor(row,dtype=torch.long)

non_zero_indicies=torch.nonzero(sparse_row).squeeze(1)
print(non_zero_indicies)

print(w_embedding(non_zero_indicies))



## Unsqueeze and Squeeze

In [3]:
tensor=torch.tensor([1,2,3]) #Shpae [3]
unsqueezed_tensor=tensor.unsqueeze(0)  #adding dimension at index 0
print(unsqueezed_tensor)
print(unsqueezed_tensor.shape)



# squeeze used to remove those dimension which have shape of 1
tensor=torch.rand(1,3,256,256)

tensor.squeeze(0)   #only removes if dimension specified having shape as 1 else return as it is(very important)




tensor([[1, 2, 3]])
torch.Size([1, 3])


## Transpose

In [5]:

shape=(2,3,)
t=torch.rand(shape)
print(t)

print(t.transpose(-2,-1))

tensor([[0.5445, 0.0376, 0.6943],
        [0.7321, 0.2462, 0.0233]])
tensor([[0.5445, 0.7321],
        [0.0376, 0.2462],
        [0.6943, 0.0233]])


## View
returns a new tensor with the same data as the self tensor but of different shape

In [17]:
a=torch.randn(1,2,3,4)
print("Original data's stride",a.stride())
c=a.view(1,2,4,3)   #no change in the stride
print("stride after view",c.stride())

test=torch.randn(2,3)
print(test)
print(test.view(-1))


t1=torch.randn(4,4)
z=t1.view(-1,8) #the size -1 is infered from other dimension
print(t1)
print(z)



Original data's stride (24, 12, 4, 1)
stride after view (24, 12, 3, 1)
tensor([[ 1.1863, -1.0208, -0.3183],
        [-0.0614, -0.6214,  0.2219]])
tensor([ 1.1863, -1.0208, -0.3183, -0.0614, -0.6214,  0.2219])
tensor([[-0.3429,  0.8406,  0.7283, -0.7830],
        [ 0.0914, -0.8639, -1.7109, -1.3927],
        [ 0.5615,  1.3659, -0.8780, -0.0956],
        [ 1.8997, -0.2655, -0.8522,  2.1720]])
tensor([[-0.3429,  0.8406,  0.7283, -0.7830,  0.0914, -0.8639, -1.7109, -1.3927],
        [ 0.5615,  1.3659, -0.8780, -0.0956,  1.8997, -0.2655, -0.8522,  2.1720]])


In [31]:
a=torch.randn(1,2,3,4)
print(a.size())
print(a)


b=a.transpose(-2,-1)  #actual mathematical transpose


print(b.size())

print(b)

c=a.view(1,2,4,3)  # goes on assigning elment in the contigous order of memory

print(c.size())  


print(c)

torch.equal(b,c)


# #contiguous tensor have a straightforward mapping between its shape and how its data is stored in memory, whereas in non contiguous 
# reorder dimension without physically rearranging the data

print(b.is_contiguous())  
print(c.is_contiguous())

# flattening(converting nd to 1d) of a and c is same and also same with respect to the contigous manner in which they were stored
# whereas in b the on flattening since its the actual tranpose so elements no longer remain in the contigous manner in which it was present
print(a.flatten())
print(b.flatten())
print(c.flatten())

torch.Size([1, 2, 3, 4])
tensor([[[[-1.4143, -0.1577, -0.1069, -1.0023],
          [ 1.0648, -0.6918, -0.0435,  0.6764],
          [ 1.0746,  0.1258, -0.1263, -0.9120]],

         [[ 0.4684, -2.3826, -1.5607,  1.5103],
          [ 0.3578, -0.6260,  1.2659,  0.6924],
          [-1.0835,  1.6167, -0.0412,  0.5165]]]])
Original data's stride (24, 12, 4, 1)
stride after transpose (24, 12, 1, 4)
torch.Size([1, 2, 4, 3])
tensor([[[[-1.4143,  1.0648,  1.0746],
          [-0.1577, -0.6918,  0.1258],
          [-0.1069, -0.0435, -0.1263],
          [-1.0023,  0.6764, -0.9120]],

         [[ 0.4684,  0.3578, -1.0835],
          [-2.3826, -0.6260,  1.6167],
          [-1.5607,  1.2659, -0.0412],
          [ 1.5103,  0.6924,  0.5165]]]])
stride after view (24, 12, 3, 1)
torch.Size([1, 2, 4, 3])
tensor([[[[-1.4143, -0.1577, -0.1069],
          [-1.0023,  1.0648, -0.6918],
          [-0.0435,  0.6764,  1.0746],
          [ 0.1258, -0.1263, -0.9120]],

         [[ 0.4684, -2.3826, -1.5607],
         

## Clamp
Clamps all elements in input into the range [ min, max ]. Letting min_value and max_value be min and max, respectively, this returns:

yi=min(max(xi,min_valuei),max_valuei)


## Repeat

- repeat the tensor along the specified dimension
- like .repeat(2,3) so repeats 2 times along the 2nd dimension and 3 times along the first dimension so if the shape is (3,2,4) then it will become (3,4,12)

In [4]:
import torch


x=torch.tensor([1,2,3,4])
# y=x.repeat(3,1,2) #means along the third dimension repeat 3 and along 1st dimension repeat once
y=x.repeat(3,1)

print(x.shape)
print(y)
print(y.shape)

torch.Size([4])
tensor([[1, 2, 3, 4],
        [1, 2, 3, 4],
        [1, 2, 3, 4]])
torch.Size([3, 4])
