In [2]:
## Data Has to Go Somewhere

In [3]:
## Write a text file with write()

poem = '''There was a young lady named Bright,
Whose speed was far faster than light;
She started one day,
In a relative way,
And returned on the previous night.
'''

In [4]:
len(poem)

152

In [5]:
## Write poem to the file 'relativity'

fout = open('relativity', 'wt')
fout.write(poem)
fout.close() ## Closing the files forces any remaining writes to complete

In [6]:
## print() adds a space after each argument, and a newline at the end; write() does not.
## -w will automatically overwrite the file if it exists.

fout = open('relativity', 'wt')
print(poem, file=fout)
fout.close()

In [7]:
## Suppress addition of spaces and newline with print()

fout = open('relativity', 'wt')
print(poem, file=fout, sep='', end='')
fout.close()

In [8]:
## For a large source string, write file in chunks; will write 100 characters on first pass and ~50 on second

fout = open('relativity', 'wt')
size = len(poem)
offset = 0
chunk = 100

while True:
    if offset > size:
        break
    fout.write(poem[offset:offset+chunk])
    offset += chunk

fout.close()

In [9]:
## Use -x to protect against overwrite

fout = open('relativity', 'xt')


FileExistsError: [Errno 17] File exists: 'relativity'

In [10]:
## Use -x with an exception handler

try:
    fout = open('relativity', 'xt')
    fout.write('stomp stomp stomp')
except:
    print('This file already exists!')

This file already exists!


In [11]:
## Read a text file with read(), readline(), or readlines()
## read() with no arguments will read the entire file at once; use caution with large files

fin = open('relativity', 'rt')
poem = fin.read()
fin.close()
len(poem)

152

In [12]:
## Read in only a subset of characters, as if relativity were a large file; append each chunk to a poem string.

poem = '' ## Initiate empty string
fin = open('relativity', 'rt')
chunk = 100

while True:
    fragment = fin.read(chunk) ## At end of source, calls to read will return False
    if not fragment: ## False breaks out of the while True loop.
        break
    poem += fragment

fin.close()
len(poem)

152

In [13]:
## Read file one line at a time with readline(), and append to poem string.

poem = ''
fin = open('relativity', 'rt')

while True:
    line = fin.readline()
    if not line:
        break
    poem += line

fin.close()
len(poem)

152

In [14]:
## Use an iterator to read a text file one line at a time.

poem = ''
fin = open('relativity', 'rt')

for line in fin:
    poem += line

fin.close()
len(poem)

152

In [15]:
fin = open('relativity', 'rt')
lines = fin.readlines()
fin.close()

print(len(lines), 'lines read')

5 lines read


In [16]:
for line in lines:
    print(line, end='') ## Suppress automatic newlines

There was a young lady named Bright,
Whose speed was far faster than light;
She started one day,
In a relative way,
And returned on the previous night.


In [17]:
## Write a binary file with write()

bdata = bytes(range(0,256)) ## Generate 256 byte values, from 0 to 255
len(bdata)

256

In [18]:
## Open file for writing in binary mode, and write all at once

fout = open('bfile', 'wb')
fout.write(bdata)
fout.close()

In [19]:
## Write binary data in chunks

fout = open('bfile', 'wb')
size = len(bdata)
offset = 0
chunk = 100

while True:
    if offset > size:
        break
    fout.write(bdata[offset:offset + chunk])
    offset += chunk

fout.close()

In [20]:
## Read a binary file

fin = open('bfile', 'rb')
bdata = fin.read()
len(bdata)
fin.close()

In [21]:
## Use a context manager to automatically close a file 

with open('relativity', 'wt') as fout:
    fout.write(poem)

In [22]:
## Use seek() to jump to a particular byte offset in a file without reading in the whole thing.

fin = open('bfile', 'rb')
fin.tell() ## Return current offset from beginning of file, in bytes

0

In [23]:
fin.seek(255)

255

In [24]:
bdata = fin.read() ## Reads from seek() point until the end of the file
len(bdata)

1

In [25]:
bdata[0]

255

In [26]:
## Add a second argument (origin) to seek: 
## - If 0, go offset from start
## - If 1, go offset from current position
## - If 2, go offset from end

## Values defined in standard os module, too
import os

os.SEEK_SET

0

In [27]:
os.SEEK_CUR

1

In [28]:
os.SEEK_END

2

In [29]:
fin = open('bfile', 'rb')
fin.seek(-1,2)

255

In [30]:
fin.tell()

255

In [31]:
bdata = fin.read()
len(bdata)

1

In [32]:
bdata[0]

255

In [33]:
fin = open('bfile', 'rb')

In [34]:
fin.seek(254,0)

254

In [35]:
fin.tell()

254

In [36]:
fin.seek(1,1)

255

In [37]:
fin.tell()

255

In [38]:
bdata = fin.read()
len(bdata)

1

In [39]:
bdata[0]

255

In [40]:
## Structured text files: delimited, tagged (XML/HTML), punctuated (JSON), indented (YAML), misc

In [41]:
## Delimited (CSV)
import csv

In [42]:
## Read and write a list of rows, that each contains a list of columns

villains = [
    ['Doctor', 'No'],
    ['Rosa', 'Klebb'],
    ['Mister', 'Big'],
    ['Auric', 'Goldfinger'],
    ['Ernst', 'Blofeld'],
]

with open('villains', 'wt') as fout: ## Creates CSV file with a line for each list
    csvout = csv.writer(fout)
    csvout.writerows(villains)

In [43]:
## Read CSV back in

with open('villains', 'rt') as fin:
    cin = csv.reader(fin)
    villains = [row for row in cin] # List comprehension

print(villains)

[['Doctor', 'No'], ['Rosa', 'Klebb'], ['Mister', 'Big'], ['Auric', 'Goldfinger'], ['Ernst', 'Blofeld']]


In [44]:
## Read file in as a list of dictionaries, instead of a list of lists

with open('villains', 'rt') as fin:
    cin = csv.DictReader(fin, fieldnames=['first', 'last'])
    villains = [row for row in cin]

print(villains)

[{'first': 'Doctor', 'last': 'No'}, {'first': 'Rosa', 'last': 'Klebb'}, {'first': 'Mister', 'last': 'Big'}, {'first': 'Auric', 'last': 'Goldfinger'}, {'first': 'Ernst', 'last': 'Blofeld'}]


In [45]:
## Add a header

villains = [
    {'first':'Doctor', 'last':'No'},
    {'first':'Rosa', 'last':'Klebb'},
    {'first':'Mister', 'last':'Big'},
    {'first':'Auric', 'last':'Goldfinger'},
    {'first':'Ernst', 'last':'Blofeld'}, # Note that there is always a terminating comma
]

In [46]:
with open('villains', 'wt') as fout:
    cout = csv.DictWriter(fout, ['first', 'last'])
    cout.writeheader()
    cout.writerows(villains)

In [47]:
with open('villains', 'rt') as fin:
    cin = csv.DictReader(fin) ## Omitting fieldnames argument tells DictReader to use values in first line as headers
    villains = [row for row in cin]

In [48]:
print(villains)

[{'first': 'Doctor', 'last': 'No'}, {'first': 'Rosa', 'last': 'Klebb'}, {'first': 'Mister', 'last': 'Big'}, {'first': 'Auric', 'last': 'Goldfinger'}, {'first': 'Ernst', 'last': 'Blofeld'}]


In [49]:
## Tagged (XML)

In [50]:
import xml.etree.ElementTree as et

tree = et.ElementTree(file='menu.xml')
root = tree.getroot()
root.tag

'menu'

In [51]:
for child in root:
    print('tag:', child.tag, 'attributes:', child.attrib)
    for grandchild in child:
        print('\ttag:', grandchild.tag, 'attributes:', grandchild.attrib)

tag: breakfast attributes: {'hours': '7-11'}
	tag: item attributes: {'price': '$6.00'}
	tag: item attributes: {'price': '$4.00'}
tag: lunch attributes: {'hours': '11-3'}
	tag: item attributes: {'price': '$5.00'}
tag: dinner attributes: {'hours': '3-10'}
	tag: item attributes: {'price': '$8.00'}


In [52]:
len(root) # Number of menu sections

3

In [53]:
len(root[0]) # Number of breakfast items

2

In [117]:
## Use Defused XML to safely load unknown XML

import defusedxml

from defusedxml.ElementTree import parse

et = parse('menu.xml')

In [120]:
root.tag

'menu'

In [None]:
## JSON

In [83]:
## Build a Python data structure with the data from the XML example

menu = \
{
"breakfast": {
        "hours": "7-11",
        "items": {
            "breakfast burritos": "$6.00",
            "pancakes": "$4.00"
        }
    },
"lunch": {
        "hours": "11-3",
        "items": {
            "hamburger": "$5.00"
        }
    },
"dinner": {
        "hours": "3-10",
        "items": {
            "spaghetti": "$8.00"
        }
    }
}

In [84]:
## Encode menu data structure to JSON

import json

menu_json = json.dumps(menu)
menu_json

'{"lunch": {"hours": "11-3", "items": {"hamburger": "$5.00"}}, "breakfast": {"hours": "7-11", "items": {"pancakes": "$4.00", "breakfast burritos": "$6.00"}}, "dinner": {"hours": "3-10", "items": {"spaghetti": "$8.00"}}}'

In [85]:
## Decode menu_json back to Python data structure

menu2 = json.loads(menu_json)
menu2

{'breakfast': {'hours': '7-11',
  'items': {'breakfast burritos': '$6.00', 'pancakes': '$4.00'}},
 'dinner': {'hours': '3-10', 'items': {'spaghetti': '$8.00'}},
 'lunch': {'hours': '11-3', 'items': {'hamburger': '$5.00'}}}

In [86]:
## Convert datetime (not JSON serializable) to JSON-serializable string and epoch value.

import datetime

now = datetime.datetime.utcnow()
now

datetime.datetime(2017, 11, 7, 21, 1, 16, 231614)

In [87]:
now_str = str(now)
json.dumps(now_str)

'"2017-11-07 21:01:16.231614"'

In [88]:
from time import mktime

now_epoch = int(mktime(now.timetuple()))
json.dumps(now_epoch)

'1510106476'

In [89]:
## Modify JSON encoding by creating subclass of JSONEncoder for datetime handling (returns epoch)

class DTEncoder(json.JSONEncoder):
    def default(self,obj):
        if isinstance(obj, datetime.datetime):
            return int(mktime(obj.timetuple()))
        return json.JSONEncoder.default(self,obj)

In [90]:
json.dumps(now, cls=DTEncoder)

'1510106476'

In [91]:
## Checking data types

In [92]:
type(now)

datetime.datetime

In [93]:
isinstance(now, datetime.datetime)

True

In [94]:
type(234)

int

In [95]:
isinstance(234, int)

True

In [96]:
type('hey')

str

In [97]:
isinstance('hey', str)

True

In [None]:
## YAML

## Use safe_load to import YAML

In [109]:
import yaml

with open('mcintyre.yml', 'rt') as fin:
    text = fin.read()

In [110]:
data = yaml.load(text)

In [111]:
data['details']

{'bearded': True, 'themes': ['cheese', 'Canada']}

In [112]:
len(data['poems'])

2

In [113]:
data['poems'][1]['title']

'Canadian Charms'

In [None]:
## Using configparser with Windows config files

In [121]:
import configparser

In [122]:
cfg = configparser.ConfigParser()

In [123]:
cfg.read('settings.cfg')

['settings.cfg']

In [124]:
cfg

<configparser.ConfigParser at 0x1050a3668>

In [125]:
cfg['french']

<Section: french>

In [126]:
cfg['french']['greeting']

'Bonjour'

In [127]:
cfg['files']['bin']

'/usr/local/bin'

In [129]:
## Pickle

## Serializing: Saving data structures to a file

import pickle

now1 = datetime.datetime.utcnow()
pickled = pickle.dumps(now1)
now2 = pickle.loads(pickled)

In [130]:
now1

datetime.datetime(2017, 11, 8, 0, 28, 35, 7367)

In [131]:
now2

datetime.datetime(2017, 11, 8, 0, 28, 35, 7367)

In [132]:
## Using pickle with self-created classes and objects

class Tiny():
    def __str__(self):
        return 'tiny'

obj1 = Tiny()
obj1

<__main__.Tiny at 0x104f62b70>

In [133]:
str(obj1)

'tiny'

In [135]:
pickled = pickle.dumps(obj1)
pickled

b'\x80\x03c__main__\nTiny\nq\x00)\x81q\x01.'

In [136]:
obj2 = pickle.loads(pickled)
obj2

<__main__.Tiny at 0x1047b1f98>

In [137]:
str(obj2)

'tiny'

In [54]:
## Things to do

In [75]:
## 8.1 "Assign the string 'This is a test of the emergency text system' to the variable test1,
## and write test1 to a file called test.txt."

test1 = 'This is a test of the emergency text system.'
file = open('test.txt', 'wt')
print(test1, file=file, end='')
file.close()

In [76]:
## 8.2 "Open the file test.txt and read its contents into the string test2.
## Are test1 and test2 the same?"

test2 = ''
file_in = open('test.txt', 'rt')

for line in file_in:
    test2 += line

file_in.close()

In [77]:
test1 == test2

True

In [80]:
# Book answer

with open('test.txt', 'rt') as infile:
    test2 = infile.read()

In [81]:
test1 == test2

True

In [99]:
## 8.3 "Save [specified] text lines to a file called books.csv. Notice that if the fields are separated by commas,
## you need to surround a field with quotes if it contains a comma."

books = [
    {'author':'J R R Tolkien', 'book':'The Hobbit'},
    {'author':'Lynne Truss', 'book':"Eats, Shoots & Leaves"}
]

with open('books', 'wt') as file_out:
    csv_out = csv.DictWriter(file_out, ['author', 'book'])
    csv_out.writeheader()
    csv_out.writerows(books)

In [100]:
## 8.4 "Use the csv module and its DictReader method to read books.csv to the variable books.
## Print the values in books. Did DictReader handle the quotes and commas in the second book's title?"

with open('books', 'rt') as file_in:
    csv_in = csv.DictReader(file_in)
    books = [row for row in csv_in]

books

[{'author': 'J R R Tolkien', 'book': 'The Hobbit'},
 {'author': 'Lynne Truss', 'book': 'Eats, Shoots & Leaves'}]

In [101]:
## 8.5 "Create a CSV file called books.csv by using [specified] lines."

books2 = [
    ['title', 'author', 'year'],
    ['The Weirdstone of Brisingamen', 'Alan Garner', '1960'],
    ['Perdido Street Station', 'China Mi'+'\u00e9'+'ville', '2000'],
    ['Thud!', 'Terry Pratchett', '2005'],
    ['The Spellman Files', 'Lisa Lutz', '2007'],
    ['Small Gods', 'Terry Pratchett', '1992']
]

In [103]:
with open('books2', 'wt') as file2_out:
    csv2_out = csv.writer(file2_out)
    csv2_out.writerows(books2)

In [104]:
with open('books2', 'rt') as file2_in:
    csv2_in = csv.DictReader(file2_in)
    books2 = [row for row in file2_in]

In [105]:
print(books2)

['title,author,year\n', 'The Weirdstone of Brisingamen,Alan Garner,1960\n', 'Perdido Street Station,China Miéville,2000\n', 'Thud!,Terry Pratchett,2005\n', 'The Spellman Files,Lisa Lutz,2007\n', 'Small Gods,Terry Pratchett,1992\n']


In [106]:
books2

['title,author,year\n',
 'The Weirdstone of Brisingamen,Alan Garner,1960\n',
 'Perdido Street Station,China Miéville,2000\n',
 'Thud!,Terry Pratchett,2005\n',
 'The Spellman Files,Lisa Lutz,2007\n',
 'Small Gods,Terry Pratchett,1992\n']

In [107]:
## Book answer

text = '''title,author,year
The Weirdstone of Brisingamen, Alan Garner, 1960
Perdido Street Station, China Miéville, 2000
Thud!, Terry Pratchett, 2005
The Spellman Files, Lisa Lutz, 2007
Small Gods, Terry Pratchett, 1992
'''

In [108]:
with open('books.csv', 'wt') as outfile:
    outfile.write(text)