In [1]:
import numpy as np

## Text and CSV Files

In [25]:
id_ = np.arange(10)

In [26]:
value = np.random.random(size=10)

In [27]:
date = np.array([np.random.randint(365) + np.datetime64('2022-01-01') for i in range(10) ])

In [53]:
### using numpy records array
data = np.core.records.fromarrays(
    [id_, value, date],
    names='id, value, date',
    formats='i4, f4, U10')
data

rec.array([(0, 0.1931487 , '2022-11-09'), (1, 0.5871547 , '2022-09-12'),
           (2, 0.93555206, '2022-06-27'), (3, 0.87388146, '2022-03-09'),
           (4, 0.37168115, '2022-04-08'), (5, 0.98898107, '2022-05-28'),
           (6, 0.06707367, '2022-10-04'), (7, 0.8648645 , '2022-11-06'),
           (8, 0.5264596 , '2022-10-11'), (9, 0.8824468 , '2022-10-08')],
          dtype=[('id', '<i4'), ('value', '<f4'), ('date', '<U10')])

In [54]:
### using numpy structured array
data2 = np.array(
    list(zip(id_, value, date)),
    dtype = [
        ('id', 'i4'),
        ('value', 'f4'),
        ('date', 'U10'),
    ])
data2

array([(0, 0.1931487 , '2022-11-09'), (1, 0.5871547 , '2022-09-12'),
       (2, 0.93555206, '2022-06-27'), (3, 0.87388146, '2022-03-09'),
       (4, 0.37168115, '2022-04-08'), (5, 0.98898107, '2022-05-28'),
       (6, 0.06707367, '2022-10-04'), (7, 0.8648645 , '2022-11-06'),
       (8, 0.5264596 , '2022-10-11'), (9, 0.8824468 , '2022-10-08')],
      dtype=[('id', '<i4'), ('value', '<f4'), ('date', '<U10')])

now we are saving the data into a file:
### saveing a numpy recored array as a csv file using np.savetxt

In [80]:
np.savetxt('./data.csv', data, fmt='%i, %.4f, %s', header='id, value, date')

### reading from a file using numpy.genfromtxt

In [81]:
read_data = np.genfromtxt(
    './data.csv',
    dtype='i4, f4, U10',
    delimiter=', ',
    skip_header=True,


)
read_data

array([(0, 0.1931, '2022-11-09'), (1, 0.5872, '2022-09-12'),
       (2, 0.9356, '2022-06-27'), (3, 0.8739, '2022-03-09'),
       (4, 0.3717, '2022-04-08'), (5, 0.989 , '2022-05-28'),
       (6, 0.0671, '2022-10-04'), (7, 0.8649, '2022-11-06'),
       (8, 0.5265, '2022-10-11'), (9, 0.8824, '2022-10-08')],
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', '<U10')])

In [82]:
data

rec.array([(0, 0.1931487 , '2022-11-09'), (1, 0.5871547 , '2022-09-12'),
           (2, 0.93555206, '2022-06-27'), (3, 0.87388146, '2022-03-09'),
           (4, 0.37168115, '2022-04-08'), (5, 0.98898107, '2022-05-28'),
           (6, 0.06707367, '2022-10-04'), (7, 0.8648645 , '2022-11-06'),
           (8, 0.5264596 , '2022-10-11'), (9, 0.8824468 , '2022-10-08')],
          dtype=[('id', '<i4'), ('value', '<f4'), ('date', '<U10')])

Now the data is read in to the record array, you will find that the second field is more than four digits after the decimal points as we specified in exporting the CSV. The reason for this is because <ins>we use f4 as its data type when we read and write it</ins>. The empty digits will be filled by NumPy, but the valid four digits remain the same as in the file. You may also notice we lost the field name, so let's specify it:

In [84]:
read_data.dtype.names = 'id', 'value', 'date'

In [85]:
read_data

array([(0, 0.1931, '2022-11-09'), (1, 0.5872, '2022-09-12'),
       (2, 0.9356, '2022-06-27'), (3, 0.8739, '2022-03-09'),
       (4, 0.3717, '2022-04-08'), (5, 0.989 , '2022-05-28'),
       (6, 0.0671, '2022-10-04'), (7, 0.8649, '2022-11-06'),
       (8, 0.5265, '2022-10-11'), (9, 0.8824, '2022-10-08')],
      dtype=[('id', '<i4'), ('value', '<f4'), ('date', '<U10')])

## .npy or .npz
it saves the data as a binart code

In [87]:
array = np.random.random(size=(3, 3))
array

array([[0.81907766, 0.84366377, 0.45966812],
       [0.26602213, 0.0448599 , 0.40899434],
       [0.9477299 , 0.32208344, 0.12493178]])

In [88]:
np.save('array.npy', array, allow_pickle=False) # remember to put allow_pickle to False

In [89]:
load_array = np.load('array.npy') # allow_pickle is False by default here, so you can ignore it

In [91]:
(load_array == array).all()

True

Similarly, you can use the savez() function to save several arrays into a single file. If you want to save your files as compressed NumPy binary files, you can use savez_compressed() as follows:

In [104]:
arr1 = np.arange(5)
arr2 = np.arange(10).reshape((2, 5))

In [105]:
np.savez('arr1_arr2.npz', arr1, arr2)

In [107]:
load_arr1_2 = np.load('arr1_arr2.npz')

In [116]:
load_arr1_2.files

['arr_0', 'arr_1']

In [118]:
arr1_load, arr2_load = load_arr1_2['arr_0'], load_arr1_2['arr_1']

In [119]:
arr1_load

array([0, 1, 2, 3, 4])

In [120]:
arr2_load

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

When you save several arrays in a single file, if you give a keyword argument such as first_array=x, your array will be saved with this name. Otherwise, by default, your first array will be given a variable name, such as arr_0.