In [2]:
import numpy as np
# Explanation of this line: 
data = np.array([(1, 90), (2, 75), (3, 85)], dtype=[('id', float), ('score', int)])
print()
# creates a structured NumPy array, which allows you to store heterogeneous data types (like a table in SQL or a Pandas DataFrame).

# ---------------- Heterogeneous data ----------------------
""" Heterogeneous data refers to data that consists of different data types, structures, formats, or sources, enabling efficient processing and analysis in various fields.
This type of data can be ambiguous(having different meanings) and of low quality due to missing values, high data redundancy, and variability in data types and formats.
Managing heterogeneous data presents challenges such as data integration, privacy, quality, and computational complexities. """
#  --------------------------------------------------------
""" 
B. dtype Explanation

dtype=[('id', int), ('score', int)]
This defines:

A field named "id" that stores int values.

A field named "score" that stores int values.

So, NumPy treats this array like a structured table.

"""

# We can also do this
print(data['id'])     # Extracts only the 'id' column
print(data['score'])  # Extracts only the 'score' column



[1. 2. 3.]
[90 75 85]


In [3]:
# One more example
data2 = np.array([
    (1, 'Alice', 90.5),
    (2, 'Bob', 75.0),
    (3, 'Charlie', 85.3)
], dtype=[('id', int), ('name', 'U10'), ('score', float)])  # 'U10' means Unicode string of length 10, means maximum length of string can reach upto 10

print(data2['name'])  # Extracts the 'name' column


['Alice' 'Bob' 'Charlie']


In [None]:
# We can also do with normal numpy array
random = np.random.default_rng(7)


ids = random.integers(1, 100, size=(2, 3))   # Random integers between 1 and 100
print('ids:\n',ids,'\n')
scores = random.random((2, 3)) * 100         # Random float scores between 0 and 100
print('scores:\n',scores)

# Define structured dtype
dtype = [('id', int), ('score', float)]

# Create structured array
data22 = np.zeros((2, 3), dtype=dtype) # because we do `dtype = [('id', int), ('score', float)]`, it make each element in the array a tuple
print(f'\ninitial data22:\n {data22}\n')

# Assign values
data22['id']  = ids
data22['score'] = scores
print(f'\ndata22 after assignments:\n {data22}\n')

ids:
 [[94 62 68]
 [89 58 77]] 

scores:
 [[22.520719   30.01662849 87.35534454]
 [ 0.52653046 82.12284184 79.70694288]]

initial data22:
 [[(0, 0.) (0, 0.) (0, 0.)]
 [(0, 0.) (0, 0.) (0, 0.)]]


data22 after assignments:
 [[(94, 22.520719  ) (62, 30.01662849) (68, 87.35534454)]
 [(89,  0.52653046) (58, 82.12284184) (77, 79.70694288)]]

