# Build a Minimal DataFrame-like Structure Using NumPy

# Import the basic libraries 

In [8]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style(style="darkgrid")

from miniframe import Miniframe

In [9]:
# let us now consider a dataset 
# Generate 50 records
# Sample name list
names = ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Hannah', 'Ian', 'Julia']
data = []

for i in range(1, 11):
    name = str(np.random.choice(names, replace=False))
    height = round(np.random.normal(loc=165, scale=10), 1)  # Avg 165cm, std dev 10
    weight = round(np.random.normal(loc=65, scale=12), 1)   # Avg 65kg, std dev 12
    data.append((i, name, height, weight))

# Define dtype
dtype = [('ID', 'i4'), ('Name', 'U10'), ('Height', 'f4'), ('Weight', 'f4')]

data

[(1, 'Alice', 176.5, 58.0),
 (2, 'David', 157.4, 57.3),
 (3, 'Alice', 151.8, 60.5),
 (4, 'Julia', 154.4, 70.8),
 (5, 'David', 160.4, 65.4),
 (6, 'Alice', 164.9, 49.1),
 (7, 'Julia', 156.5, 64.9),
 (8, 'Ian', 167.2, 56.4),
 (9, 'Ian', 165.0, 29.0),
 (10, 'Alice', 154.5, 70.7)]

- `'U10'` translates to `“Unicode string of maximum length 10,”`
- `'f4'` translates to “4-byte (i.e., 32-bit) float.”

In [10]:
# let us convert this to a strcutured array using Miniframe 
df = Miniframe(data=data, dtype=dtype)
df

[( 1, 'Alice', 176.5, 58. ) ( 2, 'David', 157.4, 57.3)
 ( 3, 'Alice', 151.8, 60.5) ( 4, 'Julia', 154.4, 70.8)
 ( 5, 'David', 160.4, 65.4) ( 6, 'Alice', 164.9, 49.1)
 ( 7, 'Julia', 156.5, 64.9) ( 8, 'Ian', 167.2, 56.4)
 ( 9, 'Ian', 165. , 29. ) (10, 'Alice', 154.5, 70.7)]

In [11]:
# let's say 3 results from head 
df.head(n=3)

array([(1, 'Alice', 176.5, 58. ), (2, 'David', 157.4, 57.3),
       (3, 'Alice', 151.8, 60.5)],
      dtype=[('ID', '<i4'), ('Name', '<U10'), ('Height', '<f4'), ('Weight', '<f4')])

In [12]:
# let's see three results from tail 
df.tail(n=3)

array([( 8, 'Ian', 167.2, 56.4), ( 9, 'Ian', 165. , 29. ),
       (10, 'Alice', 154.5, 70.7)],
      dtype=[('ID', '<i4'), ('Name', '<U10'), ('Height', '<f4'), ('Weight', '<f4')])

In [13]:
# see the shape 
df.shape()

(10,)

In [14]:
# the size - total elements in the structured array 
df.size()

40