## **2.1 Images**

In [None]:
import torchvision.datasets as datasets
import matplotlib.pyplot as plt 
import torchvision.transforms as transforms
import torch

##### Download the MNIST dataset using the torchvision library and create a subset of the dataset containing 1,000 samples. Each sample will include a 28 × 28 pixel image, along with a single integer value denoting the sample’s respective class (number).

In [None]:
mnistdataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())

subset = torch.utils.data.Subset(mnistdataset,range(1000))
#print(type(mnist_subset))

type(mnistdataset)

##### Plot a histogram of classes in your subset of MNIST.

In [None]:
# test function
# count = 0
# for i in range(len(subset)):
#     print(subset[i][1])
#     if (subset[i][1] == 0):
#         count += 1
# print("count: ", count)

In [None]:
labels = [subset[i][1] for i in range(len(subset))]
plt.hist(labels, bins=range(11))
plt.xlabel('Class Label')
plt.ylabel('Count')
plt.title('Histogram of Classes in the MNIST Subset')
plt.show()

##### Use the einops python package to ’batch’ the subset of MNIST images. Each batch should contain 25 different samples. Hint: The subset of MNIST image data should change dimensions from [1000, 28, 28] → [Number of Batches, Batch Size, 1, 28, 28] 
##### Context: Deep learning models operate over batches of samples. This rapidly speeds up computation as the forward pass runs parallel across the batch dimension.Batching also can improve optimization and provides the S in SGD (Stochastic Gradient Descent). Additionally, neural networks expect a channel dimension for image data. MNIST is grayscale so it only contains one channel, but other images can contain color, in which case they will have three channels, RGB.

In [None]:
import numpy as np
from einops import rearrange 

batchSize = 25
# mnist_dataset.shape
#images = einops.rearrange()
index_arr = np.random.choice(60000, 1000)
sub_images = mnistdataset.train_data[index_arr,:,:]
new_batch = rearrange(sub_images,'(b1 b2) h w -> b1 b2 1 h w',b2=25)
print(new_batch.shape)

##### Randomly select an MNIST image sample and, using matplotlib, plot it in three dimensions. The x and y-axis’ should be the respective pixel locations, and the z-axis should be the pixel intensity.

In [None]:
import random
mnist = mnistdataset._load_data()

    

In [None]:
image, label = mnist
random.seed(0)
index = np.random.randint(0, 28)
x = []
y = []
z = []
for i in range(len(image[index])):
    for j in range(len(image[index])):
        temp = int(image[index][i,j])
        x.append(i)
        y.append(j)
        z.append(temp)


In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# z = image[index]
#print(z)
# ax.plot3D(x, y, z, 'gray')
ax.contour(x, y, z)



In [None]:
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
from matplotlib import cm

ax = plt.figure().add_subplot(projection='3d')
X, Y, Z = axes3d.get_test_data(0.05)

ax.contour(X, Y, Z, cmap=cm.coolwarm)  # Plot contour curves

plt.show()

## 2.2 Time Series

##### Task: Load the data (energydata complete.csv) and perform the following analysis.

In [None]:
import pandas as pd 
data = pd.read_csv('energydata_complete.csv')
data

##### Plot the appliances energy consumption for whole period and a closer look at anyone week of consumption.

In [None]:
timedata_np = data.to_numpy()
print(timedata_np[:,1])
plt.plot( timedata_np[:,1])

In [None]:
print(timedata_np[42,0])
print(timedata_np[7*24*6+42,0])

In [None]:
begin = 42
end = 7*24*6 + begin
sub = timedata_np[begin:end, 1]
plt.plot(sub)


In [None]:
data['date']

In [None]:
data

##### Plot heatmap of hourly consumption of appliances for a week. An example heatmap looks like Figure 1.

In [None]:
date = data["date"]
to = data.set_index(pd.DatetimeIndex(pd.to_datetime(date)))

plt.figure(figsize=(5,10))
to_day = to.resample('H').sum()
#to_day
week_data = to_day.loc['2016-01-12 00:00:00': '2016-01-18 23:50:00']
#week_data
week_data['Appliances']
week_data = np.reshape(week_data['Appliances'].values,(24,7))
#week_data
heatmap = plt.pcolormesh(week_data)
plt.yticks(np.arange(0,25))
plt.xticks(np.arange(0,7))
plt.xlabel("Weekend")
plt.ylabel("Hours of day")
#ax = plt.axes()
#ax.set_xticklabels([])
plt.colorbar(heatmap)
plt.show()

##### Plot the histogram of energy consumption of appliances.

In [None]:
plt.hist(data['Appliances'],bins = 50)
plt.xlabel('Energy Consumption (Wh)')


##### Construct a feature variable NSM (no. of seconds from midnight) and plot energy consumption vs. NSM.

In [None]:

#print(data["date"])
time = data["date"][6][-8:]
print(time)
second = int(time[-2:])+ int(time[-5:-3])*60+ int(time[0:2])*3600

# convert date to nsm 
def to_NSM(date):
    time = date[-8:]
    sec = int(time[-2:])+ int(time[-5:-3])*60+ int(time[0:2])*3600
    return sec
#print(to_NSM(data["date"][6]))


In [None]:
data["NSM"] = data["date"].apply(lambda x: to_NSM(x))
data

In [None]:
plt.plot(data['NSM'], data['Appliances'],"o")
plt.ylabel('Energy Consumption')
plt.xlabel('NSM')
plt.show()

##### Plot appliances energy consumption vs. Press mm Hg.

In [None]:
plt.plot(data['Press_mm_hg'], data['Appliances'],'o')
plt.ylabel('Energy Consumption')
plt.xlabel('Pressure mm Hg')
plt.show()

##### It is observed that the major contributing factors for the energy consumption among all other features is NSM and Press mm Hg. Comment on it.

##### Ans: When the pressure is between 740 and 770, the average of energy consumption is higher. Comparing the energy consumption vs. NSM plot ,and appliances energy consumption vs. Press mm Hg plot, the plots pattern are somehow similar, most points are at the same position on the plots. It is because the pressure value is increasing corresponding to time.

##### Task: Load the data and Compute the following descriptive statistics of the data:
##### 1. Mean
##### 2. Variance (or Standard Deviation)
##### 3. Median
##### 4. Kurtosis
##### 5. Skewness
##### 6. Range

In [None]:
data2 = pd.read_csv('airfoil_self_noise.dat',delimiter="\t",header=None)
data2.columns =["Frequency Hz","Angle of attack degrees","Chord length meters","Free-stream velocity meters per second","Suction side displacement thickness meters","Pressure Level"]
data2

In [None]:
data2.mean()

In [None]:
data2.var()

In [None]:
data2.median()

In [None]:
data2.kurtosis()

In [None]:
data2.skew()