___

<a href='https://github.com/sonlinux/'><img src='../sonlinux@devsbranch.png'/></a>
___
<center><em>Coursework  delivered by: Alison Mukoma</em></center>
<center><em>Copyright: Evelyn Hone College cc DevsBranch.</em></center>

# Sqlite3 Database with python 
(working with the rest of the other databases will be demonstrated in a seperate class and shared in a seperate notebook)
## Before to Start: Importing the Libraries and Packages and Checking the Versions

A Quick Introduction to SQLite with Python

- **First**, connect to the database using the database library's `connect` method. 
- **Second**, get a `cursor` which will let us execute SQL commands
- **Third**, We can now execute any SQL commands that we want in the database using the cursor's `execute` method. Querying the database simply involves writing the appropriate SQL and placing it inside a string in the `execute` method call.
- **Fourth**, if you saved the cursor in a variable then close it and the close the database connection as well


In [None]:
db = sql3.connect('data.db')

with db:
    c = db.cursor()
    c.execute('SELECT SQLITE_VERSION()')
    data = c.fetchone()
    print "SQLite version: %s" % data

In [None]:
c = db.cursor()

In [None]:
c.execute('CREATE TABLE test (i INTEGER, j TEXT)')

In [None]:
n = 5
m = 'some text' 

c.execute('INSERT INTO test(i,j) VALUES (?,?)', (n,m))

In [None]:
n = 100
m = 'more text' 

c.execute('INSERT INTO test(i,j) VALUES (?,?)', (n,m))

In [None]:
c.execute('SELECT * FROM test')

In [None]:
results = c.fetchall()
print results

In [None]:
for (i,j) in results:
    print i,j

In [None]:
c.execute('SELECT * FROM test WHERE i=5')
print c.fetchall()

In [None]:
c.execute('UPDATE test SET j=\'yet more test\'WHERE i=5')
c.execute('SELECT * FROM test')
print c.fetchall()

In [None]:
c.execute('DELETE FROM test WHERE i=5')

In [None]:
c.execute('SELECT * FROM test')
print c.fetchall()

### 1. Inserting and Quering Data

In [None]:
# os.unlink('test.db')
con = sql3.connect('test.db')

with con:
    
    cur = con.cursor()
    cur.execute("DROP TABLE IF EXISTS Cars")    
    cur.execute("CREATE TABLE Cars(Id INT, Name TEXT, Price INT)")
    cur.execute("INSERT INTO Cars VALUES(1,'Audi',52642)")
    cur.execute("INSERT INTO Cars VALUES(2,'Mercedes',57127)")
    cur.execute("INSERT INTO Cars VALUES(3,'Skoda',9000)")
    cur.execute("INSERT INTO Cars VALUES(4,'Volvo',29000)")
    cur.execute("INSERT INTO Cars VALUES(5,'Bentley',350000)")
    cur.execute("INSERT INTO Cars VALUES(6,'Citroen',21000)")
    cur.execute("INSERT INTO Cars VALUES(7,'Hummer',41400)")
    cur.execute("INSERT INTO Cars VALUES(8,'Volkswagen',21600)")

In Python, we can use the `fetchall()` method to fetch all the records in the table:

In [None]:
con = sql3.connect('test.db')
cur = con.cursor()

cur.execute('SELECT * FROM Cars')

rows = cur.fetchall()
for row in rows:
    print row
    
# or, you can do also: 
# print cur.fetchall()

Or, alternatively, to get the results into Python we then use either the `fetchone()` method to fetch one record at a time (it returns None when there are no more records to fetch so that you know when to stop)

In [None]:
con = sql3.connect('test.db')
cur = con.cursor()

cur.execute('SELECT * FROM Cars')
record = cur.fetchone()

while record:
    print record
    record = cur.fetchone()
    
    

Another possiblity ...

In [None]:
con = sql3.connect('test.db')

with con:    
    
    cur = con.cursor()    
    cur.execute("SELECT * FROM Cars")
    
    rows = cur.fetchall()
    
    for row in rows:
        print row[0], row[1], row[2]
        


A technically better version of the previous code to retrive data is

In [None]:
con = sql3.connect('test.db')

with con:
    
    cur = con.cursor()    
    cur.execute("SELECT * FROM Cars")

    while True:
      
        row = cur.fetchone()
        
        if row == None:
            break
            
        print row[0], row[1], row[2]

We are going to create the same table. This time using the convenience `executemany()` method.

In [None]:
cars = (
    (1, 'Audi', 52642),
    (2, 'Mercedes', 57127),
    (3, 'Skoda', 9000),
    (4, 'Volvo', 29000),
    (5, 'Bentley', 350000),
    (6, 'Hummer', 41400),
    (7, 'Volkswagen', 21600)
)

con = sql3.connect('test.db')

with con:
    
    cur = con.cursor()    
    # This script drops a Cars table if it exists and (re)creates it.
    cur.execute("DROP TABLE IF EXISTS Cars")
    cur.execute("CREATE TABLE Cars(Id INT, Name TEXT, Price INT)")
    # The first SQL statement drops the Cars table, if it exists. 
    # The second SQL statement creates the Cars table.
    cur.executemany("INSERT INTO Cars VALUES(?, ?, ?)", cars)

Another way to create our Cars table: We commit the changes manually and provide our own **error handling**.
In the script below we re-create the Cars table using the `executescript()` method

In [None]:
try:
    con = sql3.connect('test.db')

    cur = con.cursor()  
    
    # The executescript() method allows us to execute the whole SQL code in one step.
    
    cur.executescript("""
        DROP TABLE IF EXISTS Cars;
        CREATE TABLE Cars(Id INT, Name TEXT, Price INT);
        INSERT INTO Cars VALUES(1,'Audi',52642);
        INSERT INTO Cars VALUES(2,'Mercedes',57127);
        INSERT INTO Cars VALUES(3,'Skoda',9000);
        INSERT INTO Cars VALUES(4,'Volvo',29000);
        INSERT INTO Cars VALUES(5,'Bentley',350000);
        INSERT INTO Cars VALUES(6,'Citroen',21000);
        INSERT INTO Cars VALUES(7,'Hummer',41400);
        INSERT INTO Cars VALUES(8,'Volkswagen',21600);
        """)

    con.commit()
    
except lite.Error, e:
    
    if con:
        con.rollback()
        
    print "Error %s:" % e.args[0]
    sys.exit(1)
    
finally:
    
    if con:
        con.close() 

##2. Parameterized queries

When we use parameterized queries, we use placeholders instead of directly writing the values into the statements. Parameterized queries increase security and performance.

The Python **SQLite3** module supports two types of placeholders. Question marks and named placeholders.

In [None]:
uId = 1
uPrice = 62300 

con = sql3.connect('test.db')

with con:

    cur = con.cursor()    

    cur.execute("UPDATE Cars SET Price=? WHERE Id=?", (uPrice, uId))        
    con.commit()
    
    print "Number of rows updated: %d" % cur.rowcount

The second example uses parameterized statements with named placeholders:

In [None]:
uId = 4

con = sql3.connect('test.db')

with con:

    cur = con.cursor()    

    cur.execute("SELECT Name, Price FROM Cars WHERE Id=:Id", 
        {"Id": uId})        
    con.commit()
    
    row = cur.fetchone()
    print row[0], row[1]

##3. Metadata

Metadata is information about the data in the database. Metadata in a SQLite contains information about the tables and columns, in which we store data. Number of rows affected by an SQL statement is a metadata. Number of rows and columns returned in a result set belong to metadata as well.

Metadata in SQLite can be obtained using the PRAGMA command. SQLite objects may have attributes, which are metadata. Finally, we can also obtain specific metatada from querying the SQLite system sqlite_master table.

In [None]:
con = sql3.connect('test.db')

with con:
    
    cur = con.cursor()    
    
    cur.execute('PRAGMA table_info(Cars)')
    
    data = cur.fetchall()
    
    for d in data:
        print d[0], d[1], d[2]

Next we will print all rows from the Cars table with their column names.

In [None]:
con = sql3.connect('test.db')

with con:
    
    cur = con.cursor()    
    cur.execute('SELECT * FROM Cars')
    
    col_names = [cn[0] for cn in cur.description]
    
    rows = cur.fetchall()
    
    print "%-5s %-15s %s" % (col_names[0], col_names[1], col_names[2])

    for row in rows:    
        print "%-5s %-15s %s" % row

Another example related to the metadata, we list all tables in the test.db database.

In [None]:
con = sql3.connect('test.db')

with con:
    
    cur = con.cursor()    
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")

    rows = cur.fetchall()

    for row in rows:
        print row[0]

##4. Export and Import of Data

We can dump data in an SQL format to create a simple backup of our database tables

In [None]:
cars = (
    (1, 'Audi', 52643),
    (2, 'Mercedes', 57642),
    (3, 'Skoda', 9000),
    (4, 'Volvo', 29000),
    (5, 'Bentley', 350000),
    (6, 'Hummer', 41400),
    (7, 'Volkswagen', 21600)
)

# The data from the table is being written to the file:

def writeData(data):
    
    f = open('cars.sql', 'w')
    
    with f:
        f.write(data)

# We create a temporary table in the memory: 

con = sql3.connect(':memory:')

# These lines create a Cars table, insert values and delete rows, 
# where the Price is less than 30000 units.

with con:
    
    cur = con.cursor()
    
    cur.execute("DROP TABLE IF EXISTS Cars")
    cur.execute("CREATE TABLE Cars(Id INT, Name TEXT, Price INT)")
    cur.executemany("INSERT INTO Cars VALUES(?, ?, ?)", cars)
    cur.execute("DELETE FROM Cars WHERE Price < 30000")
    
    # The con.iterdump() returns an iterator to dump the database 
    # in an SQL text format. The built-in join() function takes 
    # the iterator and joins all the strings in the iterator separated 
    # by a new line. This data is written to the cars.sql file in 
    # the writeData() function.
    
    data = '\n'.join(con.iterdump())
    
    writeData(data)

In [None]:
print data

Now we are going to perform a reverse operation. We will import the dumped table back into memory.

In [None]:
def readData():
    
    f = open('cars.sql', 'r')
    
    with f:
        data = f.read()
        return data
        

con = sql3.connect(':memory:')

with con:   

    cur = con.cursor()
    
    sql_query = readData()
    cur.executescript(sql_query)
    
    cur.execute("SELECT * FROM Cars")
    
    rows = cur.fetchall()
    
    for row in rows:
        print row    

##5. Transactions

A transaction is an atomic unit of database operations against the data in one or more databases. The effects of all the **SQL** statements in a transaction can be either all committed to the database or all rolled back.

In **SQLite**, any command other than the `SELECT` will start an implicit transaction. Also, within a transaction a command like `CREATE TABLE` ..., `VACUUM`, `PRAGMA`, will commit previous changes before executing.

Manual transactions are started with the `BEGIN TRANSACTION` statement and finished with the `COMMIT` or `ROLLBACK` statements.

**SQLite** supports three non-standard transaction levels. `DEFERRED`, `IMMEDIATE` and `EXCLUSIVE`. SQLite Python module also supports an autocommit mode, where all changes to the tables are immediately effective.

In [None]:
# We create a friends table and try to fill it with data. However, the data is not commited...
# because the commit() menhod is commented. 
# If we uncomment the line, the line will be written to the table:

#import sqlite3 as sql

try:
    con = sql3.connect('test.db')
    cur = con.cursor()    
    cur.execute("DROP TABLE IF EXISTS Friends")
    cur.execute("CREATE TABLE Friends(Id INTEGER PRIMARY KEY, Name TEXT)")
    cur.execute("INSERT INTO Friends(Name) VALUES ('Tom')")
    cur.execute("INSERT INTO Friends(Name) VALUES ('Rebecca')")
    cur.execute("INSERT INTO Friends(Name) VALUES ('Jim')")
    cur.execute("INSERT INTO Friends(Name) VALUES ('Robert')")
    
#---> con.commit()
            
except sql3.error, e:
    
    if con:
        con.rollback()
    
    print "Error %s:" % e.args[0]
    sys.exit(1)
    
finally:
    
    if con:
        con.close() 

##Code Example 04: A Data Base of Movies

The `pandas.io.sql` module provides a collection of query wrappers to both facilitate data retrieval and to reduce dependency on DB-specific API. These wrappers only support the Python database adapters which respect the Python DB-API.

Let us use, in the following example, a list of the 10.000 movies made since 1950 with the most IMDB user ratings. Download the data at http://bit.ly/cs109_imdb and save it as text file in your working directory.

In [None]:
names = ['imdbID', 'title', 'year', 'score', 'votes', 'runtime', 'genres']
movies = pd.read_csv('imdb_top_10000.txt', delimiter='\t', names = names).dropna()

print movies.head()

clean_runtime = [float(r.split(' ')[0]) for r in movies.runtime]
movies['runtime'] = clean_runtime

# determine the unique genres

genres = set()

for m in movies.genres:
    genres.update(g for g in m.split('|'))

genres = sorted(genres)

# make a column for each genre

for genre in genres:
    movies[genre] = [genre in movie.split('|') for movie in movies.genres]
         
movies['title'] = [t[0:-7] for t in movies.title]

print movies.head()  # print the first 10 rows

In [None]:

# to load Dataframes into a SQl dataBase we need something from pandas
from pandas.io import sql


# Create your connection.
cnx = sql3.connect('movies.db')

# Load the DataFrame in SQLite3
sql.write_frame(movies, name='movies', con=cnx)

# Retriving the data from SQLite3
# p1 = sql.read_frame('SELECT * FROM movies', cnx)

p3 = sql.read_frame('SELECT * FROM movies WHERE year=2001', cnx)

p3.shape

In [None]:
cnx = sql3.connect('movies.db')

with con:
    
    cur = con.cursor()    
    
    cur.execute('PRAGMA table_info(Movies)')
    
    data = cur.fetchall()
    
    for d in data:
        print d[0], d[1], d[2]

##Code Example 05: Time Series Storage in a Data Base

Functions from `pandas.io.data` extract data from various Internet sources into a DataFrame. Currently the following sources are supported:
   
   - **Yahoo! Finance** with `web.DataReader(ticker,'yahoo', start, end)`
   - **Google Finance** with `web.DataReader(ticker,'google', start, end)`
   - **St. Louis FED (FRED)** with `web.DataReader('GDP', 'fred', start, end)`
   - **Kenneth French’s** data library with `web.DataReader("5_Industry_Portfolios", "famafrench")`

It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.

In [None]:
# Download data from yahoo
import pandas.io.data as web

start = pd.datetime(2013, 1, 1)
end = pd.datetime(2013, 12, 1)

# f=web.DataReader("F", 'yahoo', start, end)

all_data = {}

for ticker in ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC']:
    all_data[ticker] = web.DataReader(ticker,'yahoo', start, end)

# create a data frame
# prices = pd.DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()}).dropna()


In [None]:
# create a data frame
prices = pd.DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})
prices['Dates']=prices.index()
print prices.describe()
print '='*100
print prices.head()

In [None]:
# Notice that writing your DataFrame into a database works only with SQLite. 
# Moreover, the index will currently be dropped, therefore first, we have 
# to move it as column

prices['Dates']=prices.index[:]
prices.head()

In [None]:
# To load DataFrames into a SQLite DataBase we need something from pandas
# that transforms DataFrames into tables and back 

from pandas.io import sql

# Create your connection

cnx = sql3.connect('prices.db')

# Load the DataFrame in SQLite3
cur = cnx.cursor()    
cur.execute("DROP TABLE IF EXISTS prices")

sql.write_frame(prices, name='prices', con = cnx)
       
# Retriving the data from SQLite3
# apple = sql.read_frame("SELECT AAPL FROM prices", cnx)


In [None]:
cnx = sql3.connect('prices.db')

with cnx:
    
    cur = cnx.cursor()    
    cur.execute('PRAGMA table_info(prices)')
    
    table = cur.fetchall()
    
    for d in table:
        print d[0], d[1]

In [None]:
# Retriving the data from SQLite3

from pandas.io import sql
from pandas.lib import Timestamp

cnx = sql3.connect('prices.db')
allp =  sql.read_frame("SELECT * FROM prices", cnx)
apple2 = sql.read_frame("SELECT AAPL,Dates FROM prices", cnx)

# from pandas.lib import Timestamp
apple2.Dates = apple2.Dates.apply(Timestamp)
apple = apple2.set_index('Dates')


start = pd.datetime(2013, 1, 1)
end = pd.datetime(2013, 12, 1)
rng = pd.bdate_range(start, end)

# apple.set_index(rng)
print allp.describe()
apple.head(15)

In [None]:
allp.head()

In [None]:
start = datetime(start)
end = datetime(end)

rng = date_range(start, end)


In [None]:
cur = cnx.cursor()    
cur.execute("SELECT AAPL FROM prices")
cur.fetchall()
del cur

In [None]:
whos