In [34]:
import json
import os
import sqlite3
import struct

import pandas as pd

## Files are just 0s and 1s

In [3]:
# Create file 1 - ASCII http://www.asciitable.com/

f = open('file1.txt','w')
architectstring = 'Your life is the sum of a remainder of an unbalanced equation inherent to the programming of the matrix.\n'
f.write(architectstring)
f.close()

In [7]:
# Inspect file 1

f = open('file1.txt')
contents = f.read()
print("ASCII text of file 1:")
print(contents)


print("\nString length of the original string: ", len(architectstring))



ASCII text of file 1:
Your life is the sum of a remainder of an unbalanced equation inherent to the programming of the matrix.


String length of the original string:  105


**Q1. How many bytes should the file contain?**

**Q2. What should the first 5 bytes of the file be?**

In [33]:
# Answers

print(os.path.getsize('file1.txt'))

f = open('file1.txt', 'rb')
struct.unpack('BBBBB', f.read(5))

105


(89, 111, 117, 114, 32)

## There are many simple readable file formats

In [29]:
# json: javascript object notation

data = {'name':'amy', 'dob':'1990/01/01', 'mean_abp':['60, 70, 70, 50']}

with open('file2.json', 'w') as f:  
    json.dump(data, f)

In [30]:
f = open('file2.json')
f.read()

'{"name": "amy", "mean_abp": ["60, 70, 70, 50"], "dob": "1990/01/01"}'

In [35]:
# csv: comma separated values

df = pd.read_csv('data/PhysionetChallenge2012-set-a.csv')
df.head()

Unnamed: 0,recordid,SAPS-I,SOFA,Length_of_stay,Survival,In-hospital_death,Age,Gender,Height,Weight,...,SysABP_last,TroponinI_last,TroponinT_last,WBC_last,Weight_last,pH_last,MechVentStartTime,MechVentDuration,MechVentLast8Hour,UrineOutputSum
0,132539,6,1,5,-1,0,54.0,0.0,,,...,,,,9.4,,,,,,
1,132540,16,8,8,-1,0,76.0,1.0,175.3,76.0,...,103.0,,,13.3,81.6,7.37,71.0,360.0,0.0,5.0
2,132541,21,11,19,-1,0,44.0,0.0,,56.7,...,126.0,,,6.2,56.7,7.47,617.0,2160.0,1.0,14.0
3,132543,7,1,9,575,0,68.0,1.0,180.3,84.6,...,,,,7.9,84.6,,,,,
4,132545,17,2,4,918,0,88.0,0.0,,,...,,,,4.8,,,,,,


## Relational Databases

https://docs.python.org/3/library/sqlite3.html

Relational table-like structure

In [101]:
# The data

# Employees
df_employees = pd.DataFrame([
    [1001, 'Erwin', '2001-01-01', 'ceo'],
    [1002, 'Annie', '2002-02-02', 'accountant'],
    [1003, 'Reiner', '2003-03-03', 'marketer'],
    [1004, 'Mikasa', '2004-04-04', 'engineer']],
    columns=['employee_id', 'name', 'hire_date', 'job_title'])

# Performance Reviews
df_reviews = pd.DataFrame([
    [8, 1002, 1001, '2005-12-01'],
    [7, 1003, 1001, '2005-12-01'],
    [9, 1004, 1001, '2005-12-01'],
    [7, 1002, 1001, '2006-12-01'],
    [9, 1003, 1001, '2006-12-01'],
    [10, 1004, 1001, '2006-12-01']],
    columns=['rating', 'employee_id', 'reviewer_id', 'review_date'])

# Bonuses
df_bonuses = pd.DataFrame([
    [10000, 1002, '2005-12-01'],
    [20000, 1004, '2005-12-01'],
    [20000, 1003, '2006-12-01'],
    [30000, 1004, '2006-12-01']],
    columns=['amount', 'employee_id', 'date'])

In [102]:
display(df_employees)
display(df_reviews)
display(df_bonuses)

Unnamed: 0,employee_id,name,hire_date,job_title
0,1001,Erwin,2001-01-01,ceo
1,1002,Annie,2002-02-02,accountant
2,1003,Reiner,2003-03-03,marketer
3,1004,Mikasa,2004-04-04,engineer


Unnamed: 0,rating,employee_id,reviewer_id,review_date
0,8,1002,1001,2005-12-01
1,7,1003,1001,2005-12-01
2,9,1004,1001,2005-12-01
3,7,1002,1001,2006-12-01
4,9,1003,1001,2006-12-01
5,10,1004,1001,2006-12-01


Unnamed: 0,amount,employee_id,date
0,10000,1002,2005-12-01
1,20000,1004,2005-12-01
2,20000,1003,2006-12-01
3,30000,1004,2006-12-01


In [106]:
# The connection object represents the database
# os.remove('staff.db')
conn = sqlite3.connect('staff.db')

c = conn.cursor()

# Create tables
c.execute('''CREATE TABLE employees
             (employee_id integer primary key, name text, hire_date date, job_title text);''')
c.execute('''CREATE TABLE reviews
             (rating, employee_id, reviewer_id, review_date);''')
c.execute('''CREATE TABLE bonuses
             ('amount', 'employee_id', 'date');''')

# Insert data
c.executemany("INSERT INTO employees VALUES (?, ?, ?, ?)", df_employees.values.tolist())
c.executemany("INSERT INTO reviews VALUES (?, ?, ?, ?)", df_reviews.values.tolist())
c.executemany("INSERT INTO bonuses VALUES (?, ?, ?)", df_bonuses.values.tolist())

# Save (commit) the changes
conn.commit()

# We can also close the connection if we are done with it.
# Just be sure any changes have been committed or they will be lost.
conn.close()

In [110]:
conn = sqlite3.connect('staff.db')
c = conn.cursor()

In [127]:
result = [row for row in c.execute("select * from employees where hire_date < date('2003-04-04');")]
result

[(1001, 'Erwin', '2001-01-01', 'ceo'),
 (1002, 'Annie', '2002-02-02', 'accountant'),
 (1003, 'Reiner', '2003-03-03', 'marketer')]

In [None]:
result = [row for row in c.execute("select * from bonuses where employee_id < date('2003-04-04');")]
result

Q. Design the relations (tables and columns) to hold the following data for patients:
- Name
- Gender
- Postal code
- DOB
- Patient ID
- Visit datetime
- Visit final bill
- Blood Sugar 
- Creatinine
- BNP
- Sodium
- Potassium

Hint: Add more columns/variables if you need to make things more efficient.