# Table of Contents
 <p><div class="lev1"><a href="#Reading-and-Writing-Files"><span class="toc-item-num">1&nbsp;&nbsp;</span>Reading and Writing Files</a></div><div class="lev2"><a href="#CSV-files"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>CSV files</a></div><div class="lev2"><a href="#CSV-with-header"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>CSV with header</a></div><div class="lev2"><a href="#CSV-with-header-and-comments"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>CSV with header and comments</a></div><div class="lev2"><a href="#CSV-with-missing-values"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>CSV with missing values</a></div><div class="lev2"><a href="#Save-data-as-csv"><span class="toc-item-num">1.5&nbsp;&nbsp;</span>Save data as csv</a></div><div class="lev3"><a href="#Using-binary"><span class="toc-item-num">1.5.1&nbsp;&nbsp;</span>Using binary</a></div><div class="lev3"><a href="#Reading-any-data-from-binary"><span class="toc-item-num">1.5.2&nbsp;&nbsp;</span>Reading any data from binary</a></div><div class="lev2"><a href="#memory-mapped:-Cached-Access"><span class="toc-item-num">1.6&nbsp;&nbsp;</span>memory-mapped: Cached Access</a></div><div class="lev2"><a href="#Reading-and-writing-Matlab-data-format"><span class="toc-item-num">1.7&nbsp;&nbsp;</span>Reading and writing Matlab data format</a></div><div class="lev1"><a href="#Exercises"><span class="toc-item-num">2&nbsp;&nbsp;</span>Exercises</a></div>

# Reading and Writing Files
## CSV files

In [None]:
import numpy as np

import tempfile

from os.path import join

temp = tempfile.gettempdir()

# show the file via the shell
!cat 'files/testfile_1.csv'

In [None]:
csv_data = np.loadtxt(join('files', 'testfile_1.csv'), delimiter=',')
csv_data

In [None]:
# read csv with given datatypes:
csv_data = np.loadtxt(join('files', 'testfile_1.csv'), delimiter=',', dtype="int, float, int")
csv_data  # attention: result is (structured) record-ndarray

## CSV with header

In [None]:
!cat 'files/testfile_2.csv'

In [None]:
np.loadtxt(join('files', 'testfile_2.csv'), delimiter='\t', skiprows=1)

## CSV with header and comments

In [None]:
!cat files/testfile_3.csv

In [None]:
# genfromtext has even more options
data = np.genfromtxt(join('files', 'testfile_3.csv'), delimiter='\t', names=True, comments='#')
data

In [None]:
data['foo']

## CSV with missing values

In [None]:
!cat files/testfile_4.csv

In [None]:
np.genfromtxt(join('files', 'testfile_4.csv'),
              delimiter='\t', 
              names='foo,bar,names',
              dtype='int, float, S10',
              autostrip=True,
              missing_values={0:'N/A', 1:'N/A', 2:'???'},
              filling_values={0:-9999, 1:np.nan, 2:'???'}
              )

## Save data as csv

In [None]:
testfile = join(temp, 'test.csv')

np.savetxt(testfile, np.random.rand(7, 3) * 10, fmt='%i, %2.3f, %i')

!cat $testfile

### Using binary

In [None]:
testfile = join(temp, 'test.npy')

np.save(testfile, np.random.rand(7, 3))

np.load(testfile)

In [None]:
# saving severeal arrays in one file
testfile = join(temp, 'test.npz')

np.savez(testfile, my_arr1=np.random.rand(5, 2), my_second_array=np.random.rand(10))

In [None]:
# load several arrays
data = np.load(testfile)
data

In [None]:
data.keys()

In [None]:
data['my_second_array']

### Reading any data from binary

In [None]:
testfile = join(temp, 'test.bin.npy')

np.save(testfile, np.arange(10).reshape(2, 5))

# Opening the data:
infile = open(testfile)

# skip the header
infile.seek(80)

# define the data type: 2 x long int
numdtype = np.dtype('2l')

# read the data!
np.fromfile(infile, dtype=numdtype)

## memory-mapped: Cached Access

In [None]:
testfile = join(temp, 'big_test_file.npy')


np.save(testfile, np.empty(shape=(32 * 1024 ** 2)))
!ls -ahl $testfile

In [None]:
%timeit -n 1 nur_von_interesse = np.load(testfile)[10000:20000]

In [None]:
%timeit -n 1 nur_von_interesse = np.load(testfile, mmap_mode='r')[10000:20000]

## Reading and writing Matlab data format

In [None]:
from scipy.io import loadmat, savemat

foo = np.random.rand(10)
bar = np.arange(100)

testfile = join(temp, 'test.mat')


savemat(testfile, {'foo': foo, 'bar': bar}, oned_as='row')

my_mat = loadmat(testfile)
my_mat

---

# Exercises
<div class="alert alert-success">
<li>save and load arrays foo and bar with numpy_compressed </li>
</div>

foo = np.random.rand(8, 8)

bar = np.arange(256, dtype=np.uint8)

In [None]:
foo = np.random.rand(8, 8)
bar = np.arange(256, dtype=np.uint8)

In [None]:
np.savez_compressed(join(temp, 'compressed_numpy_file.npz'), foo=foo, bar=bar)

data = np.load(join(temp, 'compressed_numpy_file.npz'))
data['foo']
data['bar']

<div class="alert alert-success">
<li> Load the file 'files/testfile.bin' as a memmap</li>
<li> the header (str) is of size 24</li>
<li> data is saved as int8, this can be written as b'byte-string'</li>
<li> the shape is 8 x 8 </li>

</div>

In [None]:
# create file
with open(join('files', 'testfile.bin'), 'wb') as myfile:
    header = b'write a header! :-)'
    myfile.write(header)
    myfile.close()

# create memmep and save data
data = np.memmap(join('files', 'testfile.bin'), offset=24, shape=(8,8), dtype=np.int8)
data[:] = np.arange(64).reshape(8, 8)
data

In [None]:
# read data
neu = np.memmap(join('files', 'testfile.bin'),
                mode='r',
                offset=24,
                shape=(8, 8),
                dtype=np.int8)
neu