# 6 Importing and Saving Data with NumPy
## 6_5 np.savez()
- numpy.savez(file, *args, **kwds)

- Save several arrays into a single file in uncompressed .npz format.
- Provide arrays as keyword arguments to store them under the corresponding name in the output file: savez(fn, x=x, y=y).
- If arrays are specified as positional arguments, i.e., savez(fn, x, y), their names will be arr_0, arr_1, etc.

In [1]:
import numpy as np
np.__version__

'1.26.2'

In [2]:
# Functions

def show_attr(arrnm: str) -> str:
    strout = f' {arrnm}: '

    for attr in ('shape', 'ndim', 'size', 'dtype', 'itemsize'):
            arrnm_attr = arrnm + '.' + attr
            strout += f'| {attr}: {eval(arrnm_attr)} '

    return strout


In [9]:
lend_co_LT = np.genfromtxt('lending-co-LT.csv',
                           delimiter=',',
                           dtype=str)

print(show_attr('lend_co_LT') + '\n')

print(lend_co_LT)

 lend_co_LT: | shape: (1044, 7) | ndim: 2 | size: 7308 | dtype: <U14 | itemsize: 56 

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [4]:
lending_co_NAN = np.genfromtxt('Lending-Company-Numeric-Data-NAN.csv',
                               delimiter=';')

print(show_attr('lending_co_NAN') + '\n')

lending_co_NAN

 lending_co_NAN: | shape: (1043, 6) | ndim: 2 | size: 6258 | dtype: float64 | itemsize: 8 



array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [5]:
lend_co_LTb = np.genfromtxt('lending-co-LT.csv',
                            delimiter=',',
                            dtype=str)

print(show_attr('lend_co_LTb') + '\n')

print(lend_co_LTb)

 lend_co_LTb: | shape: (1044, 7) | ndim: 2 | size: 7308 | dtype: <U14 | itemsize: 56 

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [6]:
# just to check cause twoo of them inport the same file
np.array_equal(lend_co_LT, lend_co_LTb)

True

In [10]:
# NOW. store the three ndarray in one file
np.savez('ThreeArr', lend_co_LT, lending_co_NAN, lend_co_LTb)


In [11]:
# Load what we savez
three_arr_loaded = np.load('ThreeArr.npz')

In [16]:
# Let see each arr and compare 
array_1 = three_arr_loaded['arr_0']
array_2 = three_arr_loaded['arr_1']
array_3 = three_arr_loaded['arr_2']

display(array_1, array_2, array_3)

np.array_equal(array_1, array_3)

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

True

In [17]:
# .savez() with keyword array id.
np.savez('ThreeArr', ltori = lend_co_LT,
         w_nan = lending_co_NAN, ltb = lend_co_LTb)

In [18]:
# let see
three_arr_loaded = np.load('ThreeArr.npz')
three_arr_loaded.files

['ltori', 'w_nan', 'ltb']

In [19]:
# Let use it
array_1 = three_arr_loaded['ltori']
array_2 = three_arr_loaded['w_nan']
array_3 = three_arr_loaded['ltb']

display(array_1, array_2, array_3)

np.array_equal(array_1, array_3)

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

True