# Data Loading, Storage 

In [None]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

## Reading and Writing Data in Text Format

In [None]:
!cat examples/ex1.csv

In [None]:
df = pd.read_csv('examples/ex1.csv')
df

In [None]:
pd.read_table('examples/ex1.csv', sep=',')

In [None]:
!cat examples/ex2.csv

In [None]:
pd.read_csv('examples/ex1.csv', header=None)

In [None]:
pd.read_csv('examples/ex2.csv', header=None)

In [None]:
pd.read_csv('examples/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])

In [None]:
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('examples/ex2.csv', names=names, index_col='message')

In [None]:
!cat examples/csv_mindex.csv


In [None]:
!cat examples/ex4.csv

In [None]:

pd.read_csv('examples/ex4.csv', skiprows=[0, 2, 3])

In [None]:
!cat examples/ex5.csv

In [None]:

result = pd.read_csv('examples/ex5.csv')
result


In [None]:
pd.isnull(result)

In [None]:
result = pd.read_csv('examples/ex5.csv', na_values=['NULL'])
result

In [None]:
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
pd.read_csv('examples/ex5.csv', na_values=sentinels)

### Reading Text Files in Pieces

In [None]:
result = pd.read_csv('examples/ex6.csv')
result

In [None]:
pd.read_csv('examples/ex6.csv', nrows=5)

In [None]:
chunker = pd.read_csv('examples/ex6.csv', chunksize=1000)
chunker

In [None]:
chunker = pd.read_csv('examples/ex6.csv', chunksize=1000)

tot = pd.Series([])
for piece in chunker:
    tot = tot.add(piece['key'].value_counts(), fill_value=0)

tot = tot.sort_values(ascending=False)

In [None]:
tot[:10]

### Writing Data to Text Format

In [None]:
data = pd.read_csv('examples/ex5.csv')
data

In [None]:
data.to_csv('examples/out.csv')


In [None]:
!cat examples/out.csv

### Working with Delimited Formats

In [None]:
!cat examples/ex7.csv

In [None]:
import csv
f = open('examples/ex7.csv')

reader = csv.reader(f)

In [None]:
for line in reader:
    print(line)

In [None]:
with open('examples/ex7.csv') as f:
    lines = list(csv.reader(f))

In [None]:
header, values = lines[0], lines[1:]

In [None]:
data_dict = {h: v for h, v in zip(header, zip(*values))}
data_dict

### Reading Microsoft Excel Files

In [None]:
xlsx = pd.ExcelFile('examples/ex1.xlsx')


In [None]:
pd.read_excel(xlsx, 'Sheet1')

In [None]:
frame = pd.read_excel('examples/ex1.xlsx', 'Sheet1')
frame

In [None]:
writer = pd.ExcelWriter('examples/ex2.xlsx')
frame.to_excel(writer, 'Sheet1')
writer.save()

In [None]:
frame.to_excel('examples/ex2.xlsx')

In [None]:
!rm examples/ex2.xlsx