# Lecture 05 - Data IO

Build in Python tools:  https://docs.python.org/2/tutorial/inputoutput.html#reading-and-writing-files


### Text files

When opening a text file:
* r - read (assumed value if omitted)
* w - write (erasing any old version of that file)
* a - appends
* r+ - reading and writing

In [None]:
f = open("Alice's Adventures in Wonderland by Lewis Carroll.txt", 'r')
print type(f)

In [None]:
print f.read()

In [None]:
f.close()

In [None]:
print f.read()

In [None]:
f = open("Alice's Adventures in Wonderland by Lewis Carroll.txt")

In [None]:
print f.readline()

In [None]:
print f.readline()

In [None]:
print f.readline()

In [None]:
print f.readline()

In [None]:
for line in f:
    print line

In [None]:
f.close()

In [None]:
f = open("Alice's Adventures in Wonderland by Lewis Carroll.txt")
f_list = list(f)
print len(f_list)
print f_list[0]

f.close()

In [None]:
f = open('empty file.txt', 'r')

In [None]:
print f.read()

In [None]:
f.close()
f = open('empty file.txt', 'r')

In [None]:
f.write('test string\n')

In [None]:
f = open('empty file.txt', 'w')
f.write('test string\n')
f.close()

In [None]:
f = open('empty file.txt', 'w')
print f.read()

In [None]:
f = open('empty file.txt', 'r')
print f.read()

In [None]:
f = open('empty file.txt', 'w')
f.write('test string\n')
f.close()

In [None]:
f = open('empty file.txt', 'r')
print f.read()
f.close

##### Using with

It is common in python to use with when working with files in Python.  This ensures that the files is closed after you are done with it.  To do this, write code like this:

In [None]:
with open('empty file.txt', 'r') as my_file:
    for line in my_file:
        print line

### CSV (Comma-Separated Values)

https://docs.python.org/2/library/csv.html

In [None]:
import csv

In [None]:
with open('Titanic.csv') as csvfile:
    my_reader = csv.reader(csvfile, delimiter=',')
    for row in my_reader:
        print ' - '.join(row)

In [None]:
with open('Titanic.csv') as csvfile:
    my_reader = csv.reader(csvfile, delimiter=',')
    for row in my_reader:
        print type(row)
        print row
        break

### TSV (Tab-Separated Values)

In [None]:
import csv

In [None]:
file_list = list()

with open('Iris.csv', 'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        file_list.append(row)
        
print file_list[5]

In [None]:
with open('Iris.tsv', 'wb') as tsvfile:
    writer = csv.writer(tsvfile, delimiter='\t')
    for row in file_list:
        writer.writerow(row)

In [None]:
file_list = list()

with open('Iris.tsv', 'r') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    for row in reader:
        file_list.append(row)
        
print file_list[5]

### JSON (JavaScript Object Notation)

https://docs.python.org/2/library/json.html

https://pypi.python.org/pypi/PyYAML

In [None]:
file_list = list()
keys = list()

with open('Iris.tsv', 'r') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    keys = reader.next()
    for row in reader:
        file_list.append(row)
        
print keys
print

all_flowers = dict()

for flower in file_list:
    flower_number = flower[0]
    while len(flower_number) < 3:
        flower_number = '0' + flower_number
    all_flowers[flower_number] = dict()
    for idx,key in enumerate(keys[1:]):
        all_flowers[flower_number][key] = flower[idx+1]
        
print all_flowers

In [None]:
import json
with open('flower.json', 'wb') as jsonfile:
    json.dump(all_flowers, jsonfile, sort_keys=True, indent=4)

In [None]:
data = None

with open('flower.json', 'r') as jsonfile:
    data = json.load(jsonfile)
    
print data

In [None]:
import yaml

data = None

with open('flower.json', 'r') as jsonfile:
    data = yaml.safe_load(jsonfile)
    
print data

### Pandas

https://pandas.pydata.org/pandas-docs/stable/index.html

https://www.kaggle.com/c/expedia-hotel-recommendations/data

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("Titanic.csv")

In [None]:
titanic.shape

In [None]:
titanic.head(5)