# RedPanda Example Usage

## 1. Create Database Model

Create a model representing a table in the DB

In [1]:
import redpanda

print redpanda.__version__

0.1.8


In [2]:
import sqlalchemy
import sqlalchemy.ext.declarative
import redpanda.mixins

# Declare the SQLAlchemy declarative base
Base = sqlalchemy.ext.declarative.declarative_base()

# Declare our model
class Widget(redpanda.mixins.RedPandaMixin, Base):
    __tablename__ = 'widgets'
    id            = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
    timestamp     = sqlalchemy.Column(sqlalchemy.DateTime)
    name          = sqlalchemy.Column(sqlalchemy.String)
    kind          = sqlalchemy.Column(sqlalchemy.String)
    units         = sqlalchemy.Column(sqlalchemy.Integer)

    # Class-defined RedPanda read_sql() arguments
    # This allows us to forego passing these into Widget.redpanda()
    __read_sql__ = {
        'index_col'   : ['timestamp'],
        'parse_dates' : ['timestamp'] }

    def __repr__(self):
        return "<Widget id: %s timestamp: '%s' name: '%s' kind: '%s' units: %d>" % \
            (self.id, self.timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                self.name, self.kind, self.units)

## 2. Database Engine

Create an in-memory SQLite engine

In [3]:
engine = sqlalchemy.create_engine('sqlite://')

## 3. Populate the Database

Fill the "widgets" table with some data

In [4]:
from datetime import datetime
import random
import random_words

def randdate(maxday=31):
    """ Generate a random datetime. """
    year = 2015
    month = random.randint(0,12) + 1
    day   = random.randint(0,maxday) + 1
    hour  = random.randint(0,24)
    minute = random.randint(0,60)
    try:
        return datetime(year, month, day, hour, minute)
    except ValueError:
        return randdate(maxday-1)

def widgetgen():
    """ Generate a set of widgets. """
    wordgen = random_words.RandomWords()
    kinds   = 'fizzer', 'buzzer', 'bopper'
    for i in range(0,25):
        for kind in kinds:
            name      = wordgen.random_word()
            timestamp = randdate()
            units     = random.randint(0,100)
            yield Widget(timestamp=timestamp, name=name, kind=kind, units=units)

# Set up our database
Base.metadata.create_all(engine)
sessionmaker = sqlalchemy.orm.sessionmaker(bind=engine)
sessiongen   = sqlalchemy.orm.scoped_session(sessionmaker)
session      = sessiongen()
session.add_all(sorted(widgetgen(), key=lambda x: x.timestamp))
session.commit()

## 4. RedPanda Model-to-DataFrame

Use the `Widget.redpanda()` method to create a framable query. Note because `Widget.__read_sql__` is initialized, the index will be set to `timestamp`.

In [5]:
Widget.redpanda().frame(engine).head()

Unnamed: 0_level_0,id,name,kind,units
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-02 10:15:00,1,deviations,fizzer,33
2015-01-12 06:23:00,2,safeguard,fizzer,34
2015-01-17 07:43:00,3,railroad,bopper,45
2015-01-22 08:02:00,4,schoolhouses,buzzer,98
2015-01-23 23:21:00,5,diagnosis,fizzer,26


### Adding constraints

Limit results to November 2015

In [6]:
Widget.redpanda()\
    .filter(Widget.timestamp.between('2015-11-01', '2015-11-30 23:59:59'))\
    .frame(engine)

Unnamed: 0_level_0,id,name,kind,units
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-11-09 01:44:00,63,kilograms,buzzer,85
2015-11-09 23:50:00,64,bolts,fizzer,67
2015-11-20 04:27:00,65,ovens,bopper,39


### Aggregation

Flatten table into the sum of units across timegroup vs. kind

In [7]:
import pandas
Widget.redpanda().frame(engine)\
    .groupby([pandas.TimeGrouper("M"), "kind"]).units.sum().unstack()

kind,bopper,buzzer,fizzer
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-31,45.0,98.0,93
2015-02-28,71.0,180.0,385
2015-03-31,10.0,7.0,149
2015-04-30,89.0,77.0,150
2015-05-31,151.0,,79
2015-06-30,71.0,150.0,62
2015-07-31,273.0,35.0,20
2015-08-31,,148.0,2
2015-09-30,284.0,57.0,100
2015-10-31,85.0,223.0,152


## 5. RedPanda DataFrame-to-Model

Use `Widget.redparse()` to parse a DataFrame into ORM objects

In [8]:
frame = Widget.redpanda().frame(engine)
modelgen = Widget.redparse(frame, parse_index=True)
print modelgen.next()

<Widget id: 1 timestamp: '2015-01-02 10:15:00' name: 'deviations' kind: 'fizzer' units: 33>
