In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate
from PIL import Image

# Question 1 : Matrix Manipulation and Operations



# Question 2 : Advanced Plotting

In [None]:
x = np.linspace(-10, 10, 500)
y = np.exp(np.sqrt(x)) * np.cos(x)

plt.figure(figsize=(8, 6))
plt.plot(x, y)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.show()

In [None]:
fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
ax.plot(y, x)
ax.set_rmax(2)
ax.set_rticks([0.5, 1, 1.5, 2])  # Less radial ticks
ax.set_rlabel_position(-22.5)  # Move radial labels away from plotted line
ax.grid(True)

ax.set_title("A line plot on a polar axis", va='bottom')
plt.show()

# Question 3 : Data Handling and Visualisation Tools

In [32]:
raw_df = pd.read_csv('./laptops.csv')
raw_df.head()

Unnamed: 0,CompanyName,TypeOfLaptop,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price
0,MSI,Business Laptop,17.04068,IPS Panel Retina Display 2560x1600,Intel Core i7,12GB,512GB SSD,Intel Iris Xe Graphics,Linux,2.064834,35844.099371
1,Chuwi,2 in 1 Convertible,16.542395,Full HD,Intel Core i5,12GB,128GB PCIe SSD,Intel Iris Xe Graphics,No OS,4.060656,37019.059051
2,hp,WorkStation,17.295294,Full HD,Intel Xeon E3-1505M,8GB,1TB HDD,Intel Iris Xe Graphics,Linux,2.901689,33329.360341
3,MSI,2 in 1 Convertible,11.526203,2K,Intel Core i7,16GB,512GB NVMe SSD,Intel Iris Xe Graphics,Windows 10,2.914843,68631.102486
4,Microsoft,Gaming,12.649634,Full HD,Intel Core i5,8GB,512GB SSD,AMD Radeon RX 5600M,Windows 10,4.341995,33842.479566


In [37]:
df = raw_df.copy()

df.rename(str.lower, axis='columns', inplace=True)

df.rename(columns={
    "companyname": "brand",
    "typeoflaptop": "category",
    "inches": "screen_size",
    "screenresolution": "screen_resolution",
    "memory": "storage",
    "opsys": "os"
}, inplace=True)
df['screen_size'] = df['screen_size'].astype(int)
df.insert(list(df.columns).index('storage'), 'storage_capacity', df['storage'].str.split(' ').str[0])
df['storage'] = df['storage'].str.replace(r'\b(\d+(?:TB|GB))\b', '', regex=True)
df.insert(list(df.columns).index('screen_resolution'), 'touchscreen', df['screen_resolution'].apply(lambda x: 'IPS Panel Retina Display 2560x1600' in x))
# Discard Panel Type and Aspect Ratio
# Extract "Touchscreen" feature
# Rename Full HD & HD 1920x1080 to 1080p
df.replace({"screen_resolution": {
    "IPS Panel Retina Display 2560x1600": '2K',
    'HD 1920x1080 ': 'Full HD',
    'IPS Panel Full HD / Touchscreen 1920x1080': 'Full HD'
    
}}, inplace=True)
df.replace({"cpu": {
    "Intel Xeon E3-1505M ": 'Intel Xeon',
    'Intel Atom x5-Z8550': 'Intel Atom',
    'Intel Celeron Dual Core 3855U ': 'Intel Celeron',
    'Intel Pentium Quad Core N4200': 'Intel Pentium',
    'AMD A9-Series 9420': 'AMD A9-Series'
    
}}, inplace=True)
df

Unnamed: 0,brand,category,screen_size,touchscreen,screen_resolution,cpu,ram,storage_capacity,storage,gpu,os,weight,price
0,MSI,Business Laptop,17,True,2K,Intel Core i7,12GB,512GB,SSD,Intel Iris Xe Graphics,Linux,2.064834,35844.099371
1,Chuwi,2 in 1 Convertible,16,False,Full HD,Intel Core i5,12GB,128GB,PCIe SSD,Intel Iris Xe Graphics,No OS,4.060656,37019.059051
2,hp,WorkStation,17,False,Full HD,Intel Xeon,8GB,1TB,HDD,Intel Iris Xe Graphics,Linux,2.901689,33329.360341
3,MSI,2 in 1 Convertible,11,False,2K,Intel Core i7,16GB,512GB,NVMe SSD,Intel Iris Xe Graphics,Windows 10,2.914843,68631.102486
4,Microsoft,Gaming,12,False,Full HD,Intel Core i5,8GB,512GB,SSD,AMD Radeon RX 5600M,Windows 10,4.341995,33842.479566
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,hp,Gaming,16,False,Full HD,AMD A9-Series,8GB,128GB,SSD,AMD Radeon RX 5600M,Windows 10,4.685053,40254.533272
996,Microsoft,WorkStation,17,False,4K,Intel Celeron,8GB,2TB,SATA SSD,NVIDIA GeForce GTX 1650,macOS,3.934182,57272.697780
997,lenevo,NoteBook,13,False,Full HD,AMD Ryzen 7,16GB,128GB,PCIe SSD,NVIDIA GeForce GTX 1650,Windows 10,4.047468,48214.606894
998,Asus,UltraBook,11,True,2K,Intel Core i9,12GB,1TB,Fusion Drive,Intel Iris Xe Graphics,No OS,3.669825,58926.553683


# Question 4 : Matrix Manipulation and Polynomial Operations

# Question 5 : Advanced Data Manipulation, Plotting and Visualisation

In [None]:
M = np.matrix('2, 3, 4; 5, 6, 7; 8, 9, 10')

V, W = M[:, 2], M[0, :]


print(f"""
Matrix M:
{M}

Vector V: \n{V}
Vector W: {W}
""")

print(f"""
Minimum values of each row: \n{M.min(1)}
Minimum values of each column: {M.min(0)}
Maxmimum values of each row: \n{M.max(1)}
Maxmimum values of each column: {M.max(0)}
""")

In [None]:
x = np.arange(0, 121)
y = x

plt.figure(figsize=(8, 6))
plt.plot(x, y)
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.show()

In [None]:
x = np.linspace(0, 2 * np.pi, 1000)
y = 2j * np.cos(x)

plt.figure(figsize=(8, 6))
plt.plot(x, y.real, label='Real')
plt.plot(x, y.imag, label='Imaginary')
plt.xlabel('x')
plt.ylabel('y')
plt.title('2j * cos(x)')
plt.legend()
plt.grid(True)
plt.show()

# Question 6 : Advanced Plotting and Code Efficiency

In [None]:
x = np.arange(-7, 7, 0.02)
p = [4, 3, -120, 3, -8, 110]
y = np.polyval(p, x)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(x, y)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('Polynomial Coefficients p=4x^5 + 3x^4 - 120x^3 + 3x^2 - 8x + 110')


y = np.sinh(x)
plt.subplot(1, 2, 2)

plt.plot(x, y, color="green")
plt.xlabel('x')
plt.ylabel('sinh(x)')
plt.title('sinh(x)')


plt.tight_layout()
plt.show()

# Question 7 : Vectorisation

In [None]:
X = np.arange(1, 1001)
Y = np.cumsum(X)
Z = np.where(Y % 2 == 0, 'Even', 'Odd')

Code vectorization is a fundamental optimization technique in computer science that involves restructuring code to perform operations on entire arrays or data sets simultaneously, rather than processing individual elements sequentially. It leverages hardware capabilities and specialized libraries to execute computations in a parallel and efficient manner.

At its core, vectorization harnesses the concept of SIMD (Single Instruction, Multiple Data) processing, where a single instruction operates on multiple data elements simultaneously. This approach contrasts with traditional scalar operations that handle one piece of data at a time. By employing vectorized operations, computations can exploit the inherent parallelism within modern processors, executing tasks across multiple data elements concurrently.

The benefits of code vectorization are multifaceted. Firstly, it significantly enhances computational efficiency. Vectorized operations tap into specialized processor instructions optimized for parallel data processing, leading to faster execution times compared to iterative, element-wise computations. This efficiency becomes especially pronounced when dealing with large datasets or complex mathematical operations.

Moreover, vectorization streamlines code implementation. By abstracting away explicit loops and allowing operations to be applied directly to entire arrays or matrices, the code becomes more concise, readable, and maintainable. This not only reduces development time but also facilitates easier debugging and modification of algorithms.

Another advantage lies in hardware utilization. Vectorized code aligns well with modern processor architectures designed to handle bulk data operations efficiently. Utilizing these hardware capabilities not only speeds up computations but also optimizes resource utilization.

Additionally, many high-level programming languages and libraries, such as NumPy in Python or MATLAB, provide robust support for vectorized operations. This availability of powerful tools encourages developers to employ vectorization techniques, further enhancing productivity and code performance.

In summary, code vectorization involves restructuring code to process data in parallel, exploiting hardware capabilities and specialized libraries. Its benefits encompass improved computational efficiency, streamlined code implementation, optimized hardware utilization, and support from high-level programming tools, collectively enhancing the speed, readability, and scalability of software applications.

# Question 8 : Working with Equations

# Question 9 : Processing Data

In [None]:
df = pd.read_excel('./weather_data.xls', index_col=0, skiprows=7)
pd.options.display.max_columns = df.shape[1]
df

In [None]:
df.describe()

In [None]:
# Ensure that all values are positive
df = df.abs()

# Mark abnormal values as null 
df[df > 900] = np.nan

# Correct misspelled month
df.rename(index={'Fenruary': 'February'}, inplace=True)
df

In [None]:
year = df.index.name
months = df.index
days = df.columns
precipitation_per_month = list(zip(months, df.sum(axis=1)))
precipitation_per_day = list(zip(days, df.sum(axis=0)))
total_precipitation = sum(total for _, total in precipitation_per_month)
average_precipitation = total_precipitation / len(months)

print(f'\na)\tTotal precipitation in each month:\n')
print(tabulate(precipitation_per_month, headers=['Month', 'Total Precipitation (mm)']))
print(f'\nb)\tTotal precipitation recorded for year {year}: {total_precipitation} mm')
print(f'\nc)\tThe month that record the maximum precipitation during the year: {max(precipitation_per_month, key=lambda x: x[1])}')
print(f'\n\tThe day that record the maximum precipitation during the year: {max(precipitation_per_day, key=lambda x: x[1])}')
print(f'\nd)\tThe average precipitation for year {year}: {average_precipitation}')

# Question 10 : FFT Applied to Images

In [None]:
# Import image
image = plt.imread('./python.png')
# Transform image to grayscale
grayscale_image = np.mean(image, axis=2)

fft_image = np.fft.fft2(grayscale_image)
fft_shifted = np.fft.fftshift(fft_image)
log_abs_fft = np.log(np.abs(fft_shifted))

plt.figure(figsize=(10, 5))

# Plot grayscale image
plt.subplot(1, 2, 1)
plt.imshow(grayscale_image, cmap='gray')
plt.title('Original Image')
plt.axis('off')

# Plot transformed image
plt.subplot(1, 2, 2)
plt.imshow(log_abs_fft, cmap='gray')
plt.title('Transformed Image')
plt.axis('off')

plt.tight_layout()
plt.show()