# Storing and Retrieving Data

In [21]:
nobel_winners = [
    {'category': 'Physics',
    'name': 'Albert Einstein',
    'nationality': 'Swiss',
    'sex': 'male',
    'year': 1921},
    {'category': 'Physics',
    'name': 'Paul Dirac',
    'nationality': 'British',
    'sex': 'male',
    'year': 1933},
    {'category': 'Chemistry',
    'name': 'Marie Curie',
    'nationality': 'Polish',
    'sex': 'female',
    'year': 1911},
]

### Working with files manually (without csv module)

In [22]:
cols = nobel_winners[0].keys()
cols.sort()
with open('data/nobel_winners.csv', 'w') as f:
    f.write(','.join(cols) + '\n')
    for o in nobel_winners:
        row = [str(o[col]) for col in cols]
        f.write(','.join(row) + '\n')

### Now with csv

In [23]:
import csv

#Write
with open('data/nobel_winners.csv', 'wb') as f:
    fieldnames = nobel_winners[0].keys()
    fieldnames.sort()
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    for w in nobel_winners:
        writer.writerow(w)
        
#Read using csv.reader which returns strings
with open('data/nobel_winners.csv') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)
        
#Read in more convenient python dict form
with open('data/nobel_winners.csv') as f:
    reader = csv.DictReader(f)
    nobel_winners = list(reader)
for w in nobel_winners:
    w['year'] = int(w['year']) #convert string to int
print(nobel_winners)



['category', 'name', 'nationality', 'sex', 'year']
['Physics', 'Albert Einstein', 'Swiss', 'male', '1921']
['Physics', 'Paul Dirac', 'British', 'male', '1933']
['Chemistry', 'Marie Curie', 'Polish', 'female', '1911']
[{'category': 'Physics', 'nationality': 'Swiss', 'year': 1921, 'name': 'Albert Einstein', 'sex': 'male'}, {'category': 'Physics', 'nationality': 'British', 'year': 1933, 'name': 'Paul Dirac', 'sex': 'male'}, {'category': 'Chemistry', 'nationality': 'Polish', 'year': 1911, 'name': 'Marie Curie', 'sex': 'female'}]


### Now save in JSON instead of csv

In [24]:
import json

with open('data/nobel_winners.json', 'w') as f:
    json.dump(nobel_winners, f)
    
with open('data/nobel_winners.json') as f:
    nobel_winners = json.load(f)
    #print(f.read()) #note that we don't need to convert year to ints
print(nobel_winners)

[{u'category': u'Physics', u'nationality': u'Swiss', u'sex': u'male', u'name': u'Albert Einstein', u'year': 1921}, {u'category': u'Physics', u'nationality': u'British', u'sex': u'male', u'name': u'Paul Dirac', u'year': 1933}, {u'category': u'Chemistry', u'nationality': u'Polish', u'sex': u'female', u'name': u'Marie Curie', u'year': 1911}]


### Dealing with Dates and Times


In [25]:
from datetime import datetime
json.dumps(datetime.now()) # will trigger an error

TypeError: datetime.datetime(2017, 8, 22, 5, 41, 28, 626179) is not JSON serializable

In [26]:
import datetime
from dateutil import parser
import json

class JSONDateTimeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (datetime.date, datetime.datetime)):
            return obj.isoformat()
        else:
            return json.JSONEncoder.default(self, obj)
        
def mydumps(obj):
    return json.dumps(obj, cls=JSONDateTimeEncoder)
now_str = mydumps({'time': datetime.datetime.now()})
now_str


'{"time": "2017-08-22T05:41:40.151104"}'

In [27]:
from datetime import datetime
import dateutil

d = datetime.now()
d_iso = d.isoformat()  # convert to text
print(d_iso)

d = dateutil.parser.parse(d_iso) # back to a datetime object
print(d)

2017-08-22T05:41:42.691117
2017-08-22 05:41:42.691117


# Databases

## SQL Databases (all using SQLAlchemy as go-between interface)

Using SQLAlchemy -- first with SQLite

Begin by creating the engine.

In [28]:
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Enum

engine = sqlalchemy.create_engine('sqlite:///data/nobel_prize.db', echo=True)

#Now define the database tables using the new declarative style (recommended)

Base = declarative_base()

# now use Base to define the tables

class Winner(Base):
    __tablename__ = 'winners'
    id = Column(Integer, primary_key=True)
    name = Column(String)
    category = Column(String)
    year = Column(Integer)
    nationality = Column(String)
    sex = Column(Enum('male', 'female'))
    
    def __repr__(self):
        return("<Winner(name={}, category={}, year={})>".format(self.name,
                                    self.category, self.year))
# Having defined Base subclass, we supply is metadata.create_all method
# with our database engine to create our database.  Because we had set
# echo=True, it will tell us the SQL instructions it generates.
Base.metadata.create_all(engine)

2017-08-22 05:41:45,392 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-08-22 05:41:45,395 INFO sqlalchemy.engine.base.Engine ()
2017-08-22 05:41:45,397 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-08-22 05:41:45,399 INFO sqlalchemy.engine.base.Engine ()
2017-08-22 05:41:45,401 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("winners")
2017-08-22 05:41:45,402 INFO sqlalchemy.engine.base.Engine ()


In [30]:
# Now start adding winner instances to this new database
from sqlalchemy.orm import sessionmaker

Session = sessionmaker(bind=engine)
session = Session()

albert = Winner(**nobel_winners[0])
session.add(albert)
session.new #new is the set of items queued up this session
# these aren't committed to the DB until we call .commit
session.expunge(albert) #take this out of the queue
winner_rows = [Winner(**w) for w in nobel_winners]
session.add_all(winner_rows)
session.commit()

2017-08-22 05:44:16,524 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-08-22 05:44:16,527 INFO sqlalchemy.engine.base.Engine INSERT INTO winners (name, category, year, nationality, sex) VALUES (?, ?, ?, ?, ?)
2017-08-22 05:44:16,529 INFO sqlalchemy.engine.base.Engine (u'Albert Einstein', u'Physics', 1921, u'Swiss', u'male')
2017-08-22 05:44:16,532 INFO sqlalchemy.engine.base.Engine INSERT INTO winners (name, category, year, nationality, sex) VALUES (?, ?, ?, ?, ?)
2017-08-22 05:44:16,534 INFO sqlalchemy.engine.base.Engine (u'Paul Dirac', u'Physics', 1933, u'British', u'male')
2017-08-22 05:44:16,535 INFO sqlalchemy.engine.base.Engine INSERT INTO winners (name, category, year, nationality, sex) VALUES (?, ?, ?, ?, ?)
2017-08-22 05:44:16,537 INFO sqlalchemy.engine.base.Engine (u'Marie Curie', u'Chemistry', 1911, u'Polish', u'female')
2017-08-22 05:44:16,538 INFO sqlalchemy.engine.base.Engine COMMIT


### Querying the Database

In [32]:
session.query(Winner).count()  # output: 3

result = session.query(Winner).filter_by(nationality='Swiss') #filter_by uses keyword expressions
list(result)

result = session.query(Winner).filter(Winner.category == 'Physics', #filter uses SQL expressions
                                     Winner.nationality != 'Swiss')
list(result)

2017-08-22 05:49:20,496 INFO sqlalchemy.engine.base.Engine SELECT count(*) AS count_1 
FROM (SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners) AS anon_1
2017-08-22 05:49:20,499 INFO sqlalchemy.engine.base.Engine ()
2017-08-22 05:49:20,502 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners 
WHERE winners.nationality = ?
2017-08-22 05:49:20,504 INFO sqlalchemy.engine.base.Engine ('Swiss',)
2017-08-22 05:49:20,506 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.

[<Winner(name=Paul Dirac, category=Physics, year=1933)>]

In [33]:
session.query(Winner).get(3) # get the 3rd record

res = session.query(Winner).order_by('year')
list(res)

2017-08-22 05:51:01,113 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners 
WHERE winners.id = ?
2017-08-22 05:51:01,115 INFO sqlalchemy.engine.base.Engine (3,)
2017-08-22 05:51:01,120 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners ORDER BY winners.year
2017-08-22 05:51:01,121 INFO sqlalchemy.engine.base.Engine ()


[<Winner(name=Marie Curie, category=Chemistry, year=1911)>,
 <Winner(name=Albert Einstein, category=Physics, year=1921)>,
 <Winner(name=Paul Dirac, category=Physics, year=1933)>]

### Reconstruct our original python dict from out of the DB

In [34]:
def inst_to_dict(inst, delete_id=True):
    dat = {}
    for column in inst.__table__.columns: #access instance's table class to get list of column objects
        dat[column.name] = getattr(inst, column.name)
    if delete_id:
        dat.pop('id')
    return dat

winner_rows = session.query(Winner)
nobel_winners = [inst_to_dict(w) for w in winner_rows]
nobel_winners

2017-08-22 05:53:53,462 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners
2017-08-22 05:53:53,466 INFO sqlalchemy.engine.base.Engine ()


[{'category': u'Physics',
  'name': u'Albert Einstein',
  'nationality': u'Swiss',
  'sex': u'male',
  'year': 1921},
 {'category': u'Physics',
  'name': u'Paul Dirac',
  'nationality': u'British',
  'sex': u'male',
  'year': 1933},
 {'category': u'Chemistry',
  'name': u'Marie Curie',
  'nationality': u'Polish',
  'sex': u'female',
  'year': 1911}]

### Updating info in the DB

In [37]:
marie = session.query(Winner).get(3)
marie.nationality = 'French'
session.dirty #shows pending changes

session.commit()

# Deleting a result of query
session.query(Winner).filter_by(name='Albert Einstein',).delete()

# Drop the whole table
Winner.__table__.drop(engine)

2017-08-22 05:58:52,848 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-08-22 05:58:52,852 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners 
WHERE winners.id = ?
2017-08-22 05:58:52,855 INFO sqlalchemy.engine.base.Engine (3,)
2017-08-22 05:58:52,857 INFO sqlalchemy.engine.base.Engine COMMIT
2017-08-22 05:58:52,860 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-08-22 05:58:52,862 INFO sqlalchemy.engine.base.Engine SELECT winners.id AS winners_id, winners.name AS winners_name, winners.category AS winners_category, winners.year AS winners_year, winners.nationality AS winners_nationality, winners.sex AS winners_sex 
FROM winners 
WHERE winners.id = ?
2017-08-22 05:58:52,864 INFO sqlalchemy.engine.base.Engine (2,)
2017-08-22 05:58:52,866 INFO sqlalchemy.engine.base.Engine S

OperationalError: (sqlite3.OperationalError) database is locked [SQL: u'\nDROP TABLE winners']

## NoSQL Databases (MongoDB)