# Preprocessing with NumPy
Preprocessing with NumPy is an essential step in preparing data for analysis, machine learning, or statistical modeling. It involves cleaning, transforming, and organizing data to ensure it's in a format that's ready for further use. NumPy, with its array-based operations, is a powerful tool for performing data preprocessing tasks efficiently.



In [2]:
import numpy as np

## Loading Data
You can load data from various sources using NumPy.

In [8]:
data = np.genfromtxt("Lending-Company-Numeric-Data-NAN-short.csv",
                                            delimiter=';', skip_header=1)
print(data)

[[ 4. nan  6.]
 [ 7.  8. nan]
 [nan 10. 11.]]


## Checking for Missing Values

In [9]:
import numpy as np

# Example data
data = np.array([[1, 2, np.nan],
                                    [4, np.nan, 6],
                                    [7, 8, 9]])

# Counting NaN values
num_nans = np.isnan(data).sum()

print(f"Number of NaN values: {num_nans}")

Number of NaN values: 2


In [15]:
data = np.genfromtxt("Lending-Company-Numeric-Data-NAN-short.csv",
                                            delimiter = ';',
                                            filling_values = 0)
print(data)

## Filling_values substitutes every nan with the value we're passing (0 in this case)

[[ 1.  2.  3.]
 [ 4.  0.  6.]
 [ 7.  8.  0.]
 [ 0. 10. 11.]]


In [14]:
np.isnan(data).sum()

## All the previously missing values are now 0s.

np.int64(0)

In [16]:
data = np.genfromtxt("Lending-Company-Numeric-Data-NAN-short.csv",
                              
                                            delimiter = ';') 
data
# We need to reimport the dataset since all the missing values are filled up. 

array([[ 1.,  2.,  3.],
       [ 4., nan,  6.],
       [ 7.,  8., nan],
       [nan, 10., 11.]])

In [17]:
temporary_fill = np.nanmax(data).round(2) + 1
# np.nanmax() calculates the maximum value of the array while ignoring any NaN values.
# This means that if lending_co_data_numeric_NAN contains missing values (NaN), 
# they are not considered when calculating the maximum value. Only the valid numbers are used.

# round(2) This step ensures that the maximum value is rounded to two decimal places,
#  making it easier to handle in further calculations.

# We use nanmax(), since max() returns nan. 
# We want a value greater than the max, since we have be certain it's unique to the dataset.

In [18]:
temporary_fill

np.float64(12.0)

In [22]:
data_NAN = np.genfromtxt("Lending-Company-Numeric-Data-NAN-short.csv",
                                            delimiter = ';',
                                            filling_values = temporary_fill) 
data_NAN
# Filling up all the missing values with the temporary filler. 

array([[ 1.,  2.,  3.],
       [ 4., 12.,  6.],
       [ 7.,  8., 12.],
       [12., 10., 11.]])

In [23]:
np.isnan(data_NAN)

array([[False, False, False],
       [False, False, False],
       [False, False, False],
       [False, False, False]])

In [24]:
np.isnan(data_NAN).sum()

np.int64(0)

In [25]:
data_NAN

array([[ 1.,  2.,  3.],
       [ 4., 12.,  6.],
       [ 7.,  8., 12.],
       [12., 10., 11.]])

## Substituting Missing Values

np.where() is a conditional function in NumPy that returns elements chosen from two options:

If the condition is True, it selects the value x.

If the condition is False, it selects the value y.

In [37]:
data = np.array([1, 2, np.nan, 4, 5])
print(data)  # Output: [ 1.  2. nan  4.  5.]
data_filled = np.where(np.isnan(data), 0, data)
print(data_filled)  # Output: [1. 2. 0. 4. 5.]


[ 1.  2. nan  4.  5.]
[1. 2. 0. 4. 5.]


In [38]:
mean_value = np.nanmean(data)  # Calculate mean excluding NaN
data_filled = np.where(np.isnan(data), mean_value, data)
print(data_filled)  # Output: [1. 2. 3. 4. 5.]


[1. 2. 3. 4. 5.]


In [39]:
median_value = np.nanmedian(data)  # Calculate median excluding NaN
data_filled = np.where(np.isnan(data), median_value, data)
print(data_filled)  # Output: [1. 2. 3. 4. 5.]


[1. 2. 3. 4. 5.]


## Reshaping
In reshaping, NumPy simply flattens the original array and fills the new shape (6 rows and 1043 columns) in row-major order.

Reshaping an array means changing the shape of the array without modifying its data, but it involves reordering the elements of the array to fit the new shape.
In reshaping, the elements are rearranged in the order they appear, which can result in a different layout of data in memory.
Reshaping can change the number of rows and columns (or more generally, the shape of the array) but doesn't guarantee the same structure of data as before.


In [40]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',')

In [41]:
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]], shape=(1043, 6))

In [42]:
lending_co_data_numeric.shape

(1043, 6)

In [43]:
np.reshape(lending_co_data_numeric, (6,1043))

# Reshaping (1043,6) to (6,1043) is not the same as transposing.

array([[ 2000.,    40.,   365., ...,   365.,  1581.,  3041.],
       [12277.,  2000.,    40., ...,    50.,   365.,  5350.],
       [ 6850., 15150.,  1000., ...,  2000.,    40.,   365.],
       [ 3101.,  4351., 16600., ..., 16600.,  2000.,    40.],
       [  365.,  3441.,  4661., ...,  8450., 22250.,  2000.],
       [   40.,   365.,  3701., ...,  4601.,  4601., 16600.]],
      shape=(6, 1043))

In [44]:
np.transpose(lending_co_data_numeric)

array([[ 2000.,  2000.,  1000., ...,  2000.,  1000.,  2000.],
       [   40.,    40.,    40., ...,    40.,    40.,    40.],
       [  365.,   365.,   365., ...,   365.,   365.,   365.],
       [ 3121.,  3061.,  2160., ...,  4201.,  2080.,  4601.],
       [ 4241.,  4171.,  3280., ...,  5001.,  3320.,  4601.],
       [13621., 15041., 15340., ..., 16600., 15600., 16600.]],
      shape=(6, 1043))

# **Reshaping vs Transposing in NumPy**

Reshaping and transposing a NumPy array are not the same operations. While both modify the structure of the array, they do so in different ways.

---

## **Key Differences**

1. **Reshaping**:
   - Changes the shape of the array by rearranging its elements in row-major order.
   - Does not preserve the relative positions of elements as they were in the original array.

2. **Transposing**:
   - Flips the rows and columns of the array (or swaps axes for higher dimensions).
   - Preserves the relative positions of elements.

---

## **Example**



In [46]:
import numpy as np
arr = np.array([[1, 2, 3,4,5,500],
                [4, 5, 6,7,8,800]])
print(arr.shape)
reshaped = arr.reshape(3, 4)
print("reshaped")

print(reshaped)
print("transposed")

transposed = arr.T
print(transposed)

(2, 6)
reshaped
[[  1   2   3   4]
 [  5 500   4   5]
 [  6   7   8 800]]
transposed
[[  1   4]
 [  2   5]
 [  3   6]
 [  4   7]
 [  5   8]
 [500 800]]


## Removing Values

In [47]:
import numpy as np

# Create a 1D array
data = np.array([1, 2, 3, 4, 5])

# Delete the element at index 2
result = np.delete(data, 2)
print(result)  # Output: [1 2 4 5]


[1 2 4 5]


In [48]:
# Create a 2D array
data_2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

# Delete the second row (index 1)
result = np.delete(data_2d, 1, axis=0) # axis = 0 for row
print(result)
# Output:
# [[1 2 3]
#  [7 8 9]]


[[1 2 3]
 [7 8 9]]


In [None]:
# Delete the third column (index 2)
result = np.delete(data_2d, 2, axis=1)
print(result)
# Output:
# [[1 2]
#  [4 5]
#  [7 8]]


In [None]:
# Delete elements from index 1 to 4 (exclusive)
result = np.delete(data, slice(1, 4))
print(result)  # Output: [1 5]


In [None]:
# Delete the element at index 4 in the flattened array
result = np.delete(data_2d, 4)
print(result)  # Output: [1 2 3 4 6 7 8 9]


In [65]:
lending_co_data_numeric1=np.delete(lending_co_data_numeric, 0)
print(lending_co_data_numeric1)
print(lending_co_data_numeric1.shape)



# If you do not specify the axis parameter,
#  np.delete() will flatten the array before performing the deletion.
#  This means that the entire array is considered as a 1D array, 
# and the deletion will remove an element from this 1D version of the array.



# As a result, the array is flattened, and the first element (index 0) is removed, 
# which will affect the total number of elements in the array.
#  The total number of elements decreases by one.



# Removes the first value of the flattened array. 

[   40.   365.  3121. ...  4601.  4601. 16600.]
(6257,)


In [70]:
arr = np.array([[1, 2, 3, 4, 5, 500],
                [4, 5, 6, 7, 8, 800],
                [400, 59, 26, 7, 8, 800],
                [34, 533, 6, 7, 8, 800]])
arr1=np.delete(arr, [0, 2], axis=0)

print(arr1)

# By setting an axis, we can simultaneously delete entire rows or columns. 
# [0, 2, 4]:
# A list of column indices to delete. Here, columns at indices 0, 2, and 4 are targeted.

[[  4   5   6   7   8 800]
 [ 34 533   6   7   8 800]]


In [66]:
np.delete(np.delete(lending_co_data_numeric, [0,2,4] , axis = 1), [0,2,-1] , axis = 0)

# We can simultaneously delete rows AND columns. 

array([[   40.,  3061., 15041.],
       [   40.,  3041., 15321.],
       [   50.,  3470., 13720.],
       ...,
       [   40.,  4240., 16600.],
       [   40.,  4201., 16600.],
       [   40.,  2080., 15600.]])

## Sorting Data

In [49]:
x = np.array([2, 1, 4, 3, 5])
np.sort(x)

array([1, 2, 3, 4, 5])

### Sorting along rows or columns
A useful feature of NumPy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the ``axis`` argument. For example:

In [3]:
# np.random.RandomState() constructs a random number generator. It does not have any effect
rand = np.random.RandomState(42)
# on the freestanding functions in np.random, but must be used explicitly:

X = rand.randint(0, 10, (4, 6))  # gives the start,end,vector dimension
print(X)

[[6 3 7 4 6 9]
 [2 6 7 4 3 7]
 [7 2 5 4 1 7]
 [5 1 4 0 9 5]]


In [4]:
# sort each column of X
np.sort(X, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [5]:
# sort each row of X
np.sort(X, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

In [6]:
np.sort(X, axis=None) #axis works differently
# Flatten the array and sort

array([0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 7,
       9, 9])

## Partial Sorts: Partitioning

Sometimes we're not interested in sorting the entire array, but simply want to find the *k* smallest values in the array. NumPy provides this in the ``np.partition`` function. ``np.partition`` takes an array and a number *K*; the result is a new array with the smallest *K* values to the left of the partition, and the remaining values to the right, in arbitrary order:

np.partition(x, k) ensures that the element at the k-th index is in its correct position as it would be in the sorted array.

In [53]:
x = np.array([7, 200, 300, 11, 16, 51, 4])

# Partition the array with index 5
result = np.partition(x, 5)
print(result)


[  4   7  11  16  51 200 300]


In [55]:
import numpy as np

X = np.array([[7, 2, 3, 1, 6, 5, 4],
              [9, 8, 7, 6, 5, 4, 3]])

# Partition the 2D array along rows (axis=1) at index 2
result = np.partition(X, 2, axis=1)
print(result)


[[1 2 3 4 5 6 7]
 [3 4 5 6 7 8 9]]


In [56]:
import numpy as np

X = np.array([[7, 2, 3, 1, 6, 5, 4],
              [9, 8, 7, 6, 5, 4, 3],
              [5, 6, 2, 9, 8, 7, 1]])

# Partition the 2D array along columns (axis=0) at index 2
result = np.partition(X, 2, axis=0)
print(result)
# sorting for second column element

[[5 2 2 1 5 4 1]
 [7 6 3 6 6 5 3]
 [9 8 7 9 8 7 4]]


## Argument Functions

### np.argsort()
# **`np.argsort()`**:

The **`np.argsort()`** function in NumPy is used to get the indices that would sort an array. Instead of returning the sorted values, it provides the positions of the elements in the sorted order.

---

## **Syntax**
```python
numpy.argsort(a, axis=-1, kind=None, order=None)


In [96]:
lending_co_data_numeric =  np.array([[1, 2, 3, 4, 5, 500],
                                          [4, 5, 6, 7, 8, 800],
                                          [400, 59, 26, 7, 8, 800],
                                          [34, 533, 6, 7, 8, 800]])

lending_co_data_numeric

array([[  1,   2,   3,   4,   5, 500],
       [  4,   5,   6,   7,   8, 800],
       [400,  59,  26,   7,   8, 800],
       [ 34, 533,   6,   7,   8, 800]])

In [97]:
np.argsort(lending_co_data_numeric) # rows

# np.argsort() is a function in NumPy that returns the indices that would sort an array.
#  In other words, it does not sort the array itself, but instead, 
# it provides the indices that can be used to sort the array.

# The resulting array from np.argsort() is a 1D array of indices,
#  and these indices represent the order in which the original elements would 
# need to be arranged in ascending order.

# Returns the order which will sort the array. 

array([[0, 1, 2, 3, 4, 5],
       [0, 1, 2, 3, 4, 5],
       [3, 4, 2, 1, 0, 5],
       [2, 3, 4, 0, 1, 5]])

In [98]:
np.sort(lending_co_data_numeric, axis = 0)

array([[  1,   2,   3,   4,   5, 500],
       [  4,   5,   6,   7,   8, 800],
       [ 34,  59,   6,   7,   8, 800],
       [400, 533,  26,   7,   8, 800]])

In [99]:
np.argsort(lending_co_data_numeric, axis = 0)
#  For each column, np.argsort(axis=0) returns the row indices
#  that would arrange the column elements in ascending order.

array([[0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1],
       [3, 2, 3, 2, 2, 2],
       [2, 3, 2, 3, 3, 3]])

### np.argwhere()
# **`np.argwhere()`**: Explanation

The **`np.argwhere()`** function in NumPy is used to find the indices of elements in an array that satisfy a given condition. It returns the indices of the matching elements in a structured format.

---



In [57]:
lending_co_data_numeric = np.array([[1, 2, 3, 4, 5, 500],
                                          [4, 5, 6, 7, 8, 800],
                                          [400, np.nan, 0, 7, 8, 800],
                                          [34, 533, 6, 7, 8, 800]])

lending_co_data_numeric

array([[  1.,   2.,   3.,   4.,   5., 500.],
       [  4.,   5.,   6.,   7.,   8., 800.],
       [400.,  nan,   0.,   7.,   8., 800.],
       [ 34., 533.,   6.,   7.,   8., 800.]])

In [58]:
np.argwhere(lending_co_data_numeric == False)

# Default condition is to return values are false 0. 

array([[2, 2]])

In [63]:
lending_co_data_numeric

array([[  1.,   2.,   3.,   4.,   5., 500.],
       [  4.,   5.,   6.,   7.,   8., 800.],
       [400.,  nan,   0.,   7.,   8., 800.],
       [ 34., 533.,   6.,   7.,   8., 800.]])

In [62]:
np.argwhere(lending_co_data_numeric %2 == 0)
# Return the indices where even values
# The condition can be more complex 

array([[0, 1],
       [0, 3],
       [0, 5],
       [1, 0],
       [1, 2],
       [1, 4],
       [1, 5],
       [2, 0],
       [2, 2],
       [2, 4],
       [2, 5],
       [3, 0],
       [3, 2],
       [3, 4],
       [3, 5]])

In [64]:
np.isnan(lending_co_data_numeric).sum()

np.int64(1)

In [65]:
lending_co_data_numeric = np.array([[1, 2, 3, 4, 5, 500],
                                    [4, 5, 6, 7, 8, 800],
                                    [400, np.nan, 0, 7, 8, 800],
                                    [34, 533, 6, 7, 8, 800]])
np.argwhere(np.isnan(lending_co_data_numeric))



# Returns the coordinates of all the missing values within the array. 

array([[2, 1]])

## Shuffling Data

Shuffling data is a common step in data preprocessing to ensure that the order of the data does not influence the outcome of your model or analysis. NumPy provides tools for shuffling arrays easily.

---

## **Shuffling with `np.random.shuffle()`**

### **Syntax**
```python
np.random.shuffle(arr)


In [7]:
import numpy as np

# Create a 1D array
arr = np.array([1, 2, 3, 4, 5])

# Shuffle the array
np.random.shuffle(arr)

print(arr)

[1 3 4 2 5]


In [8]:
import numpy as np

# Create a 2D array
arr = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

# Shuffle the array
np.random.shuffle(arr)

print(arr) #Output (rows shuffled, columns intact):

# No direct function to shuffle column


[[4 5 6]
 [7 8 9]
 [1 2 3]]


You can transpose the array, shuffle the rows of the transposed array (which correspond to the columns of the original array), and then transpose it back.


In [66]:
import numpy as np

# Create a 2D array
arr = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

print(arr.T)
# Shuffle the columns
np.random.shuffle(arr.T)

print(arr)

[[1 4 7]
 [2 5 8]
 [3 6 9]]
[[3 2 1]
 [6 5 4]
 [9 8 7]]


## Casting

In [111]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',') 
lending_co_data_numeric

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [112]:
lending_co_data_numeric.astype(dtype = np.int32)

# Creates an integer version of the array. 

array([[ 2000,    40,   365,  3121,  4241, 13621],
       [ 2000,    40,   365,  3061,  4171, 15041],
       [ 1000,    40,   365,  2160,  3280, 15340],
       ...,
       [ 2000,    40,   365,  4201,  5001, 16600],
       [ 1000,    40,   365,  2080,  3320, 15600],
       [ 2000,    40,   365,  4601,  4601, 16600]], dtype=int32)

In [113]:
lending_co_data_numeric = lending_co_data_numeric.astype(dtype = str)

# We need to overwrite the variable in order to work with strings. 

In [114]:
lending_co_data_numeric

array([['2000.0', '40.0', '365.0', '3121.0', '4241.0', '13621.0'],
       ['2000.0', '40.0', '365.0', '3061.0', '4171.0', '15041.0'],
       ['1000.0', '40.0', '365.0', '2160.0', '3280.0', '15340.0'],
       ...,
       ['2000.0', '40.0', '365.0', '4201.0', '5001.0', '16600.0'],
       ['1000.0', '40.0', '365.0', '2080.0', '3320.0', '15600.0'],
       ['2000.0', '40.0', '365.0', '4601.0', '4601.0', '16600.0']],
      dtype='<U32')

In [None]:
type(lending_co_data_numeric)

In [115]:
lending_co_data_numeric = lending_co_data_numeric.astype(dtype = np.float32)
lending_co_data_numeric.astype(dtype = np.int32)

## We can't directly cast strings to integers. We can go through floats (string -> float -> integer).

array([[ 2000,    40,   365,  3121,  4241, 13621],
       [ 2000,    40,   365,  3061,  4171, 15041],
       [ 1000,    40,   365,  2160,  3280, 15340],
       ...,
       [ 2000,    40,   365,  4201,  5001, 16600],
       [ 1000,    40,   365,  2080,  3320, 15600],
       [ 2000,    40,   365,  4601,  4601, 16600]], dtype=int32)

In [117]:
lending_co_data_numeric = np.loadtxt("Lending-company-Numeric.csv", delimiter = ',')
lending_co_data_numeric = lending_co_data_numeric.astype(dtype = str)
lending_co_data_numeric

# To showcase the other way to go from strings to integers,
#  we need to get the strings version of the array once again. 

array([['2000.0', '40.0', '365.0', '3121.0', '4241.0', '13621.0'],
       ['2000.0', '40.0', '365.0', '3061.0', '4171.0', '15041.0'],
       ['1000.0', '40.0', '365.0', '2160.0', '3280.0', '15340.0'],
       ...,
       ['2000.0', '40.0', '365.0', '4201.0', '5001.0', '16600.0'],
       ['1000.0', '40.0', '365.0', '2080.0', '3320.0', '15600.0'],
       ['2000.0', '40.0', '365.0', '4601.0', '4601.0', '16600.0']],
      dtype='<U32')

In [None]:
lending_co_data_numeric.astype(dtype = np.float32).astype(dtype = np.int32)
lending_co_data_numeric

## We can chain methods in NumPy.

## Stripping Data

In [70]:
import numpy as np

# Example array of strings
arr = np.array(['id_12345', 'id_67890', 'id_54321'])

# Use np.char.strip() to remove 'id_' from both the start and end of each string
stripped_array = np.char.strip(arr, 'id_')

print("Stripped array:")
print(stripped_array)


Stripped array:
['12345' '67890' '54321']


In [118]:
lending_co_total_price[:,0] = np.chararray.strip(lending_co_total_price[:,0], "id_")
lending_co_total_price[:,1] = np.chararray.strip(lending_co_total_price[:,1], "Product ")
lending_co_total_price[:,2] = np.chararray.strip(lending_co_total_price[:,2], "Location ")
lending_co_total_price

# Remove "id_" from the 1st column, as well as "Product " from the second and "Location " from the third one. 

  lending_co_total_price[:,0] = np.chararray.strip(lending_co_total_price[:,0], "id_")
  lending_co_total_price[:,1] = np.chararray.strip(lending_co_total_price[:,1], "Product ")
  lending_co_total_price[:,2] = np.chararray.strip(lending_co_total_price[:,2], "Location ")


array([['1', 'B', '2'],
       ['2', 'B', '3'],
       ['3', 'C', '5'],
       ...,
       ['413', 'B', '135'],
       ['414', 'C', '200'],
       ['415', 'A', '8']], dtype='<U12')

## Unique 

In [128]:
import numpy as np

lending_co_data_numeric = np.array([
    [1, 2, 3],
    [4, 2, 6],
    [7, 8, 9],
    [10, 8, 12],
    [13, 2, 15]
])

In [129]:
unique_values, indices, counts = np.unique(
    lending_co_data_numeric[:, 1], return_counts=True, return_index=True)
print("Unique Values:", unique_values)
print("Indices:", indices)
print("Counts:", counts)


# Unique -> returns the unique values within the array in increasing order

# return_counts -> returns how many times each unique value appears in the array

# return_index -> returns the index of the first encounter with each unique value

Unique Values: [2 8]
Indices: [0 2]
Counts: [3 2]


In [130]:
array_example = np.array(["a1", "a3","A1","A3","A3","AA1","B1","A2","B1","A2","B2","B2", "B3","a2","a3","B3","B3","a3" ])
np.unique(array_example)

# If the values of the array are text, the unique function sorts them in "alphabetical" order by their ASCII codes. 

array(['A1', 'A2', 'A3', 'AA1', 'B1', 'B2', 'B3', 'a1', 'a2', 'a3'],
      dtype='<U3')