# NumPy Data I/O Helper Functions

In [1]:
import numpy as np

- np.load() np.save() : serializing objects
- np.loadtxt(), np.savetxt(): commonly used for text files
- np.genfromtxt() : load from file, handle missing data items
- np.fromfile(), np.tofile(): read/write efficiently i binary format

## Serializing Objects: np.load() and np.save()¶
- np.load() loads serialized objects from a binary format file
- np.save() saves serialized objects to a binary format file
- .npy is added to the filename automatically when saving but not when loading
- serializing objects is often called pickling

In [3]:
a = np.random.rand(2)

In [4]:
a

array([0.59199783, 0.61890382])

In [6]:
np.save?

In [8]:
np.save('a.npy', a)

In [9]:
a

array([0.59199783, 0.61890382])

In [10]:
del a

In [12]:
a = np.load('a.npy')

In [13]:
a

array([0.59199783, 0.61890382])

## Text Files: np.loadtxt() and np.save.txt()

In [14]:
a = np.arange(0,21)

In [15]:
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])

In [16]:
a.reshape(3,7)

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20]])

In [17]:
np.savetxt('a.txt', a)

In [18]:
ls

01.NumPy.ipynb   03.NumPy_IO_Helper_Functions.ipynb  a.txt
02.Pandas.ipynb  a.npy                               [0m[01;34mdata_raw[0m/


In [19]:
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])

In [20]:
del a

In [22]:
a = np.loadtxt('a.txt')

In [23]:
a

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20.])

## Formatted Text Saving and Loading

In [24]:
names = np.array (['jon', 'ned'], dtype = object)
ages = np.array ([20, 40])
salaries = np.array ([10000.00, 20000.00])

In [25]:
# Stack arrays as columns in a 2d martix
np.column_stack?

In [27]:
a = np.column_stack([names,ages,salaries])

In [28]:
a

array([['jon', 20, 10000.0],
       ['ned', 40, 20000.0]], dtype=object)

In [30]:
format1 = '%s; %d; %.1f'
np.savetxt('b.txt', a, fmt=format1)

In [31]:
ls

01.NumPy.ipynb   03.NumPy_IO_Helper_Functions.ipynb  a.txt  [0m[01;34mdata_raw[0m/
02.Pandas.ipynb  a.npy                               b.txt


In [33]:
a

array([['jon', 20, 10000.0],
       ['ned', 40, 20000.0]], dtype=object)

In [34]:
del a

In [40]:
# loading back
names = np.loadtxt(fname= 'b.txt',
                  delimiter = ';',
                  usecols = [0],
                  dtype = '|S50').astype(str)

In [41]:
names

array(['jon', 'ned'], dtype='<U50')

In [42]:
ages = np.loadtxt(fname= 'b.txt',
                  delimiter = ';',
                  usecols = [1])
                  

In [43]:
ages

array([20., 40.])

In [44]:
salary = np.loadtxt(fname= 'b.txt',
                  delimiter = ';',
                  usecols = [2])
                  

In [45]:
salary

array([10000., 20000.])

## Text Files with Missing Data: np.genfromtxt()¶
- Loads data from a text file
- Handles missing data as specified
- Preferred NumPy method for retrieving text data
- Acts like np.loadtxt() but handles missing data values
- Returns the structured data array

In [49]:
file_name = 'data_raw/formatted_text_missing.txt'
data = np.genfromtxt(file_name,
                     delimiter = ';',
                    names = ['names','age','salary'],
                    filling_values = ['NONAME','-1','-999'],
                    dtype = ['|S50','<i8','<f8'])

In [50]:
data

array([(b'Ann', 33, 150000.), (b'Tim', -1,  35000.),
       (b'Jon', 25,   -999.)],
      dtype=[('names', 'S50'), ('age', '<i8'), ('salary', '<f8')])

In [51]:
data['names']

array([b'Ann', b'Tim', b'Jon'], dtype='|S50')

In [52]:
data['age']

array([33, -1, 25])

In [53]:
data['salary']

array([150000.,  35000.,   -999.])