Reading and writing various file types
============================

In this notebook we cover:
- Text files
- URLs
- XML
- JSON
- CSVs (csv module)
- Pickles


Read data from text file
---------------------

In [25]:
with open('sample/example.txt', 'r') as f:
    text = f.readlines()

Read a newline separated list from a text file
-----------------

In [None]:
text = []
with open('sample/example.txt', 'r') as f:
    raw_text = f.readlines()
    for line in raw_text:
        if line.endswith('\n'):
            text.append(line[0:-1])
        else:
            text.append(line)

Write data to text file
----------------

In [26]:
with open('sample/example.txt', 'w') as f:
    for line in text:
        f.write(line + '\n') 

Read data from url
---------------------

Grab a page from the GtR website as an example

In [1]:
import urllib.request as request
page = request.urlopen('http://gtr.rcuk.ac.uk/gtr/api/projects.json?p=1')
page = page.read().decode('utf-8')

Read from XML string
--------------

In [29]:
import xml.etree.ElementTree as ET

xml_string = """
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
    </country>
    <country name="Singapore">
        <rank>4</rank>
    </country>
</data>
"""
root = ET.fromstring(xml_string)

Read from XML file
---------------------

In [30]:
import xml.etree.ElementTree as ET
tree = ET.parse('sample/example.xml')
root = tree.getroot()

Write to XML file
--------------

In [31]:
tree.write('sample/example.xml')


Read from json file
---------------------

In [19]:
import json
with open('sample/example.json', 'r') as f:
     project = json.load(f)

Write to json file
-----------------


In [21]:
import json
with open('sample/example.json', 'w') as f:
    json.dump(project, f, indent=2)

Read from csv file
------------------

In [4]:
import csv
with open('sample/example.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

['id', 'Name', 'Type', 'Gender', 'Age']
['1', 'Alice', 'Cat', 'F', '2']
['2', 'Betty', 'Dog', 'F', '2']
['3', 'Charlie', 'Cat', 'F', '8']
['4', 'David', 'Rabbit', 'M', '3']
['5', 'Ed', 'Cat', 'M', '4']
['6', 'Fiona', 'Dog', 'F', '10']
['7', 'Gerry', 'Dog', 'M', '6']
['8', 'Haley', 'Hamster', 'F', '2']
['9', 'Ian', 'Dog', 'M', '11']
['10', 'Jacob', 'Cat', 'M', '17']


Write to csv file
---------------

In [None]:
import csv
to_write = [['id', 'Name', 'Type', 'Gender', 'Age'],
            ['1', 'Alice', 'Cat', 'F', '2'],
            ['2', 'Betty', 'Dog', 'F', '2'],
            ['3', 'Charlie', 'Cat', 'F', '8'],
            ['4', 'David', 'Rabbit', 'M', '3'],
            ['5', 'Ed', 'Cat', 'M', '4'],
            ['6', 'Fiona', 'Dog', 'F', '10'],
            ['7', 'Gerry', 'Dog', 'M', '6'],
            ['8', 'Haley', 'Hamster', 'F', '2'],
            ['9', 'Ian', 'Dog', 'M', '11'],
            ['10', 'Jacob', 'Cat', 'M', '17']]

with open('sample/example.csv', 'w', newline='') as f:
    # In python 2, could use 'wb' to ensure that writing happened without blank lines in between. This also
    # cause you to append to existing files rather than overwriting them. In python 3, newline= '' is used instead
    writer = csv.writer(f)
    for row in to_write:
        writer.writerow(row)

Read from csv file with pandas
-----------------------

In [10]:
import pandas as pd
df = pd.read_csv('sample/example.csv')
df.Gender

0    F
1    F
2    F
3    M
4    M
5    F
6    M
7    F
8    M
9    M
Name: Gender, dtype: object

Read from Pickle file
----------------

In [8]:
import pickle
example = pickle.load(open("sample/example.p", "rb"))
print(example)

['spam', 'eggs', 'ham']


Write to Pickle file
-----------------

In [6]:
example = ['spam', 'eggs', 'ham']
pickle.dump(example, open( "sample/example.p", "wb"))