# Postgresql

In [None]:
import postgresql

db = postgresql.open('pq://user:password@host:port/database')
db.execute("CREATE TABLE emp (emp_first_name text, emp_last_name text, emp_salary numeric)")
make_emp = db.prepare("INSERT INTO emp VALUES ($1, $2, $3)")
make_emp("John", "Doe", "75,322")
with db.xact():
 make_emp("Jane", "Doe", "75,322")
 make_emp("Edward", "Johnson", "82,744")

In [None]:
db.execute("ALTER TABLE emp ADD COLUMN mycolumn character varying(50) DEFAULT 'whatever' NOT NULL;")
# Check alter performance
# Check write/read performance
# Check query performance
# It is faster to create a new table from scratch than to update every single row. Sequential writes are faster than sparse updates and you don’t get dead rows at the end.
# Table constraints and indexes heavily delay every write. If possible, you should drop all the indexes, triggers and foreign keys while the update runs and recreate them at the end.
# Adding a nullable column without a default value is a cheap operation. Writing the actual data of the column is the expensive part.
# Data stored in TOAST is not rewritten when the row is updated
# Converting between some data types does not require a full table rewrite since Postgres 9.2. Ex: conversion from VARCHAR(32) to VARCHAR(64).

# Hive! 

In [None]:
import sys
 
from hive import ThriftHive
from hive.ttypes import HiveServerException
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
 
try:
    transport = TSocket.TSocket('localhost', 10000)
    transport = TTransport.TBufferedTransport(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
 
    client = ThriftHive.Client(protocol)
    transport.open()
 
    client.execute("CREATE TABLE r(a STRING, b INT, c DOUBLE)")
    client.execute("LOAD TABLE LOCAL INPATH '/path' INTO TABLE r")
    client.execute("SELECT * FROM r")
    while (1):
      row = client.fetchOne()
      if (row == None):
        break
      print row
    client.execute("SELECT * FROM r")
    print client.fetchAll()
 
    transport.close()
 
except Thrift.TException, tx:
    print '%s' % (tx.message)

# Sqlite

In [2]:
# SQLite works great as the database engine for most low to medium traffic websites (which is to say, most websites). 
# The amount of web traffic that SQLite can handle depends on how heavily the website uses its database. 
# Generally speaking, any site that gets fewer than 100K hits/day should work fine with SQLite. 
# The 100K hits/day figure is a conservative estimate, not a hard upper bound. SQLite has been demonstrated to work
# with 10 times that amount of traffic.

# HDF5

# Apache Accumulo	

# Apache Parquet	

Decent python support

Apache Parquet is a columnar storage format available to any project in the Hadoop ecosystem, regardless of the choice of data processing framework, data model or programming language.



# EventStore

An open-source, functional database with support for Complex Event Processing. It provides a persistence engine for applications using event-sourcing, or for storing time-series data. Event Store is written in C#, C++ for the server which runs on Mono or the .NET CLR, on Linux or Windows. Applications using Event Store can be written in JavaScript. Event sourcing (ES) is a way of persisting your application's state by storing the history that determines the current state of your application.	

# Akiban Server	
Akiban Server is an open source database that brings document stores and relational databases together. Developers get powerful document access alongside surprisingly powerful SQL.	

# SenseiDB	
Open-source, distributed, realtime, semi-structured database. Some Features: Full-text search, Fast realtime updates, Structured and faceted search, BQL: SQL-like query language, Fast key-value lookup, High performance under concurrent heavy update and query volumes, Hadoop integration	

# InfluxDB
InfluxDB is an open source distributed time series database with no external dependencies. It's useful for recording metrics, events, and performing analytics. It has a built-in HTTP API so you don't have to write any server side code to get up and running. InfluxDB is designed to be scalable, simple to install and manage, and fast to get data in and out. It aims to answer queries in real-time. That means every data point is indexed as it comes in and is immediately available in queries that should return under 100ms.	