Skip to content
Browse files

initial commit

  • Loading branch information...
0 parents commit c83f4ca42e7d8fd3c917139a9f1a926da754de45 @etrepum committed
1 .gitignore
@@ -0,0 +1 @@
+/.hg
18 Makefile
@@ -0,0 +1,18 @@
+OUTPUT=slides.html# slides.pdf
+
+all: $(OUTPUT)
+
+clean:
+ rm -f $(OUTPUT) slides.{snm,aux,out,log,nav,toc,latex}
+
+slides.latex: slides.txt ui/beamerdefs.txt ui/stylesheet.latex ui/author.latex
+ ./bin/rst2beamer.py --stylesheet=ui/stylesheet.latex --documentoptions=14pt slides.txt $@
+ sed -i.old 's/\\date{}/\\input{ui\/author.latex}/' $@
+
+slides.pdf: slides.latex
+ pdflatex slides.latex
+
+slides.html: slides.txt includes/*.html
+ rst2s5.py --theme-url ui/mochikit slides.txt $@
+
+.PHONY: all clean
1 bin/README.txt
@@ -0,0 +1 @@
+curl -O http://codespeak.net/svn/user/antocuni/bin/rst2beamer.py
170 bin/rst2beamer.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+A docutils script converting restructured text into Beamer-flavoured LaTeX.
+
+Beamer is a LaTeX document class for presentations. Via this script, ReST can
+be used to prepare slides. It can be called::
+
+ rst2beamer.py infile.txt > outfile.tex
+
+where ``infile.tex`` contains the produced Beamer LaTeX.
+
+See <http:www.agapow.net/programming/python/rst2beamer> for more details.
+
+"""
+# TODO: modifications for handout sections?
+# TOOD: sections and subsections?
+# TODO: enable beamer themes?
+# TODO: convert document metadata to front page fields?
+# TODO: toc-conversion?
+# TODO: fix descriptions
+
+# Unless otherwise stated, created by P-M Agapow on 2007-08-21
+# and open for academic & non-commercial use and modification .
+
+__docformat__ = 'restructuredtext en'
+__author__ = "Paul-Michael Agapow <agapow@bbsrc.ac.uk>"
+__version__ = "0.2"
+
+
+### IMPORTS ###
+
+import locale
+from docutils.core import publish_cmdline, default_description
+from docutils.writers.latex2e import Writer as Latex2eWriter
+from docutils.writers.latex2e import LaTeXTranslator, DocumentClass
+from docutils import nodes
+
+## CONSTANTS & DEFINES: ###
+
+BEAMER_SPEC = (
+ 'Beamer options',
+ 'These are derived almost entirely from the LaTeX2e options',
+ tuple (
+ [
+ (
+ 'Specify theme.',
+ ['--theme'],
+ {'default': '', }
+ ),
+ (
+ 'Specify document options. Multiple options can be given, '
+ 'separated by commas. Default is "10pt,a4paper".',
+ ['--documentoptions'],
+ {'default': '', }
+ ),
+ ] + list (Latex2eWriter.settings_spec[2][2:])
+ ),
+)
+
+BEAMER_DEFAULTS = {
+ 'output_encoding': 'latin-1',
+ 'documentclass': 'beamer',
+}
+
+
+### IMPLEMENTATION ###
+
+try:
+ locale.setlocale (locale.LC_ALL, '')
+except:
+ pass
+
+class BeamerTranslator (LaTeXTranslator):
+ """
+ A converter for docutils elements to beamer-flavoured latex.
+ """
+
+ def __init__ (self, document):
+ LaTeXTranslator.__init__ (self, document)
+ self.head_prefix = [x for x in self.head_prefix if ('{typearea}' not in x)]
+ hyperref_posn = [i for i in range (len (self.head_prefix)) if ('{hyperref}' in self.head_prefix[i])]
+ self.head_prefix[hyperref_posn[0]] = '\\usepackage{hyperref}\n'
+ self.head_prefix.extend ([
+ '\\definecolor{rrblitbackground}{rgb}{0.55, 0.3, 0.1}\n',
+ '\\newenvironment{rtbliteral}{\n',
+ '\\begin{ttfamily}\n',
+ '\\color{rrblitbackground}\n',
+ '}{\n',
+ '\\end{ttfamily}\n',
+ '}\n',
+ ])
+ # this fixes the hardcoded section titles in docutils 0.4
+ self.d_class = DocumentClass ('article')
+
+ def begin_frametag (self):
+ return '\\begin{frame}\n'
+
+ def end_frametag (self):
+ return '\\end{frame}\n'
+
+ def visit_section (self, node):
+ if (self.section_level == 0):
+ self.body.append (self.begin_frametag())
+ LaTeXTranslator.visit_section (self, node)
+
+ def depart_section (self, node):
+ # Remove counter for potential subsections:
+ LaTeXTranslator.depart_section (self, node)
+ if (self.section_level == 0):
+ self.body.append (self.end_frametag())
+
+ def visit_title (self, node):
+ if (self.section_level == 1):
+ self.body.append ('\\frametitle{%s}\n\n' % self.encode(node.astext()))
+ raise nodes.SkipNode
+ else:
+ LaTeXTranslator.visit_title (self, node)
+
+ def depart_title (self, node):
+ if (self.section_level != 1):
+ LaTeXTranslator.depart_title (self, node)
+
+ def visit_literal_block(self, node):
+ if not self.active_table.is_open():
+ self.body.append('\n\n\\smallskip\n\\begin{rtbliteral}\n')
+ self.context.append('\\end{rtbliteral}\n\\smallskip\n\n')
+ else:
+ self.body.append('\n')
+ self.context.append('\n')
+ if (self.settings.use_verbatim_when_possible and (len(node) == 1)
+ # in case of a parsed-literal containing just a "**bold**" word:
+ and isinstance(node[0], nodes.Text)):
+ self.verbatim = 1
+ self.body.append('\\begin{verbatim}\n')
+ else:
+ self.literal_block = 1
+ self.insert_none_breaking_blanks = 1
+
+ def depart_literal_block(self, node):
+ if self.verbatim:
+ self.body.append('\n\\end{verbatim}\n')
+ self.verbatim = 0
+ else:
+ self.body.append('\n')
+ self.insert_none_breaking_blanks = 0
+ self.literal_block = 0
+ self.body.append(self.context.pop())
+
+
+class BeamerWriter (Latex2eWriter):
+ """
+ A docutils writer that modifies the translator and settings for beamer.
+ """
+ settings_spec = BEAMER_SPEC
+ settings_defaults = BEAMER_DEFAULTS
+
+ def __init__(self):
+ Latex2eWriter.__init__(self)
+ self.translator_class = BeamerTranslator
+
+
+if __name__ == '__main__':
+ description = (
+ "Generates Beamer-flavoured LaTeX for PDF-based presentations." + default_description)
+ publish_cmdline (writer=BeamerWriter(), description=description)
+
+
+### END ######################################################################
+
BIN images/649px-Bloom_filter.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN images/i-has-minions.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN images/mochi_ad_sales.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN images/sc4_pub_ss_cassandra003_copy.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
BIN images/we-await-ur-instrucsions.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 includes/logo.html
@@ -0,0 +1,6 @@
+<script type="text/javascript">
+ addLoadEvent(function () {
+ appendChildNodes(document.body,
+ DIV({id: "mochikit_logo", 'class': 'draggable'}));
+ });
+</script>
67 notes.txt
@@ -0,0 +1,67 @@
+Theory:
+
+* http://queue.acm.org/detail.cfm?id=1394128
+* http://www.readwriteweb.com/archives/is_the_relational_database_doomed.php
+* http://research.microsoft.com/en-us/um/people/lamport/pubs/pubs.html#paxos-simple
+* http://en.wikipedia.org/wiki/Lamport_timestamps
+* http://en.wikipedia.org/wiki/Vector_clocks
+* http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=755678
+
+Caching:
+
+* http://www.danga.com/memcached/
+* Invalidation is hard
+
+Bloom filters:
+
+* http://en.wikipedia.org/wiki/Bloom_filter
+* http://www.youtube.com/watch?v=947gWqwkhu0
+* http://github.com/jaybaird/python-sbf/tree/master
+* http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
+* Bloom Filter, Scalable Bloom Filter, Counting Bloom Filter
+
+Cassandra:
+
+* http://code.google.com/p/the-cassandra-project/
+* http://www.slideshare.net/jhammerb/data-presentations-cassandra-sigmod
+* http://perspectives.mvdirona.com/2009/02/07/FacebookCassandraArchitectureAndDesign.aspx
+
+Column Databases:
+
+* http://hypertable.org/
+* http://db.csail.mit.edu/projects/cstore/
+* http://monetdb.cwi.nl/
+* http://code.google.com/appengine/docs/python/datastore/
+
+Bitmap Indexes:
+
+* https://codeforge.lbl.gov/projects/fastbit/
+
+Key-value stores:
+
+* http://blog.plathome.com/2009/02/first-key-value-storage-meeting-held.html
+* http://www.oracle.com/technology/products/berkeley-db/index.html
+* http://tokyocabinet.sourceforge.net/
+* http://code.google.com/p/redis/
+
+Distributed Key-Value stores:
+
+* http://opensource.plurk.com/LightCloud/
+* http://www.metabrew.com/article/anti-rdbms-a-list-of-distributed-key-value-stores/
+* http://delicious.com/RichardJones
+* http://en.wikipedia.org/wiki/Vector_clocks
+* http://en.wikipedia.org/wiki/Lamport_timestamps
+
+Distributed Databases
+
+* http://www.allthingsdistributed.com/2007/10/amazons_dynamo.html
+* http://labs.google.com/papers/bigtable.html
+* http://wiki.apache.org/hadoop/Hbase
+* http://www.hypertable.org/
+
+
+Document Databases:
+
+* http://www.mongodb.org/
+* http://couchdb.apache.org/
+* http://code.google.com/p/thrudb/
1,022 slides.html
@@ -0,0 +1,1022 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
+<meta name="version" content="S5 1.1" />
+<title>Drop ACID and think about data</title>
+<meta name="author" content="Bob Ippolito" />
+<meta name="date" content="March 2009" />
+<style type="text/css">
+
+/*
+:Author: David Goodger (goodger@python.org)
+:Id: $Id: html4css1.css 5196 2007-06-03 20:25:28Z wiemann $
+:Copyright: This stylesheet has been placed in the public domain.
+
+Default cascading style sheet for the HTML output of Docutils.
+
+See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
+customize this style sheet.
+*/
+
+/* used to remove borders from tables and images */
+.borderless, table.borderless td, table.borderless th {
+ border: 0 }
+
+table.borderless td, table.borderless th {
+ /* Override padding for "table.docutils td" with "! important".
+ The right padding separates the table cells. */
+ padding: 0 0.5em 0 0 ! important }
+
+.first {
+ /* Override more specific margin styles with "! important". */
+ margin-top: 0 ! important }
+
+.last, .with-subtitle {
+ margin-bottom: 0 ! important }
+
+.hidden {
+ display: none }
+
+a.toc-backref {
+ text-decoration: none ;
+ color: black }
+
+blockquote.epigraph {
+ margin: 2em 5em ; }
+
+dl.docutils dd {
+ margin-bottom: 0.5em }
+
+/* Uncomment (and remove this text!) to get bold-faced definition list terms
+dl.docutils dt {
+ font-weight: bold }
+*/
+
+div.abstract {
+ margin: 2em 5em }
+
+div.abstract p.topic-title {
+ font-weight: bold ;
+ text-align: center }
+
+div.admonition, div.attention, div.caution, div.danger, div.error,
+div.hint, div.important, div.note, div.tip, div.warning {
+ margin: 2em ;
+ border: medium outset ;
+ padding: 1em }
+
+div.admonition p.admonition-title, div.hint p.admonition-title,
+div.important p.admonition-title, div.note p.admonition-title,
+div.tip p.admonition-title {
+ font-weight: bold ;
+ font-family: sans-serif }
+
+div.attention p.admonition-title, div.caution p.admonition-title,
+div.danger p.admonition-title, div.error p.admonition-title,
+div.warning p.admonition-title {
+ color: red ;
+ font-weight: bold ;
+ font-family: sans-serif }
+
+/* Uncomment (and remove this text!) to get reduced vertical space in
+ compound paragraphs.
+div.compound .compound-first, div.compound .compound-middle {
+ margin-bottom: 0.5em }
+
+div.compound .compound-last, div.compound .compound-middle {
+ margin-top: 0.5em }
+*/
+
+div.dedication {
+ margin: 2em 5em ;
+ text-align: center ;
+ font-style: italic }
+
+div.dedication p.topic-title {
+ font-weight: bold ;
+ font-style: normal }
+
+div.figure {
+ margin-left: 2em ;
+ margin-right: 2em }
+
+div.footer, div.header {
+ clear: both;
+ font-size: smaller }
+
+div.line-block {
+ display: block ;
+ margin-top: 1em ;
+ margin-bottom: 1em }
+
+div.line-block div.line-block {
+ margin-top: 0 ;
+ margin-bottom: 0 ;
+ margin-left: 1.5em }
+
+div.sidebar {
+ margin: 0 0 0.5em 1em ;
+ border: medium outset ;
+ padding: 1em ;
+ background-color: #ffffee ;
+ width: 40% ;
+ float: right ;
+ clear: right }
+
+div.sidebar p.rubric {
+ font-family: sans-serif ;
+ font-size: medium }
+
+div.system-messages {
+ margin: 5em }
+
+div.system-messages h1 {
+ color: red }
+
+div.system-message {
+ border: medium outset ;
+ padding: 1em }
+
+div.system-message p.system-message-title {
+ color: red ;
+ font-weight: bold }
+
+div.topic {
+ margin: 2em }
+
+h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
+h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
+ margin-top: 0.4em }
+
+h1.title {
+ text-align: center }
+
+h2.subtitle {
+ text-align: center }
+
+hr.docutils {
+ width: 75% }
+
+img.align-left {
+ clear: left }
+
+img.align-right {
+ clear: right }
+
+ol.simple, ul.simple {
+ margin-bottom: 1em }
+
+ol.arabic {
+ list-style: decimal }
+
+ol.loweralpha {
+ list-style: lower-alpha }
+
+ol.upperalpha {
+ list-style: upper-alpha }
+
+ol.lowerroman {
+ list-style: lower-roman }
+
+ol.upperroman {
+ list-style: upper-roman }
+
+p.attribution {
+ text-align: right ;
+ margin-left: 50% }
+
+p.caption {
+ font-style: italic }
+
+p.credits {
+ font-style: italic ;
+ font-size: smaller }
+
+p.label {
+ white-space: nowrap }
+
+p.rubric {
+ font-weight: bold ;
+ font-size: larger ;
+ color: maroon ;
+ text-align: center }
+
+p.sidebar-title {
+ font-family: sans-serif ;
+ font-weight: bold ;
+ font-size: larger }
+
+p.sidebar-subtitle {
+ font-family: sans-serif ;
+ font-weight: bold }
+
+p.topic-title {
+ font-weight: bold }
+
+pre.address {
+ margin-bottom: 0 ;
+ margin-top: 0 ;
+ font-family: serif ;
+ font-size: 100% }
+
+pre.literal-block, pre.doctest-block {
+ margin-left: 2em ;
+ margin-right: 2em }
+
+span.classifier {
+ font-family: sans-serif ;
+ font-style: oblique }
+
+span.classifier-delimiter {
+ font-family: sans-serif ;
+ font-weight: bold }
+
+span.interpreted {
+ font-family: sans-serif }
+
+span.option {
+ white-space: nowrap }
+
+span.pre {
+ white-space: pre }
+
+span.problematic {
+ color: red }
+
+span.section-subtitle {
+ /* font-size relative to parent (h1..h6 element) */
+ font-size: 80% }
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px }
+
+table.docinfo {
+ margin: 2em 4em }
+
+table.docutils {
+ margin-top: 0.5em ;
+ margin-bottom: 0.5em }
+
+table.footnote {
+ border-left: solid 1px black;
+ margin-left: 1px }
+
+table.docutils td, table.docutils th,
+table.docinfo td, table.docinfo th {
+ padding-left: 0.5em ;
+ padding-right: 0.5em ;
+ vertical-align: top }
+
+table.docutils th.field-name, table.docinfo th.docinfo-name {
+ font-weight: bold ;
+ text-align: left ;
+ white-space: nowrap ;
+ padding-left: 0 }
+
+h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
+h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
+ font-size: 100% }
+
+ul.auto-toc {
+ list-style-type: none }
+
+</style>
+<!-- configuration parameters -->
+<meta name="defaultView" content="slideshow" />
+<meta name="controlVis" content="hidden" />
+<!-- style sheet links -->
+<script src="ui/mochikit/slides.js" type="text/javascript"></script>
+<link rel="stylesheet" href="ui/mochikit/slides.css"
+ type="text/css" media="projection" id="slideProj" />
+<link rel="stylesheet" href="ui/mochikit/outline.css"
+ type="text/css" media="screen" id="outlineStyle" />
+<link rel="stylesheet" href="ui/mochikit/print.css"
+ type="text/css" media="print" id="slidePrint" />
+<link rel="stylesheet" href="ui/mochikit/opera.css"
+ type="text/css" media="projection" id="operaFix" />
+
+<style type="text/css">
+#currentSlide {display: none;}
+</style>
+</head>
+<body>
+<div class="layout">
+<div id="controls"></div>
+<div id="currentSlide"></div>
+<div id="header">
+
+</div>
+<div id="footer">
+<h1>Drop ACID and think about data</h1>
+
+</div>
+</div>
+<div class="presentation">
+<div class="slide" id="slide0">
+<h1 class="title">Drop ACID and think about data</h1>
+<table class="docinfo" frame="void" rules="none">
+<col class="docinfo-name" />
+<col class="docinfo-content" />
+<tbody valign="top">
+<tr><th class="docinfo-name">Author:</th>
+<td>Bob Ippolito</td></tr>
+<tr><th class="docinfo-name">Date:</th>
+<td>March 2009</td></tr>
+<tr class="field"><th class="docinfo-name">Venue:</th><td class="field-body">PyCon 2009</td>
+</tr>
+</tbody>
+</table>
+<!-- Definitions of interpreted text roles (classes) for S5/HTML data. -->
+<!-- This data file has been placed in the public domain. -->
+<!-- Colours
+======= -->
+<!-- Text Sizes
+========== -->
+<!-- Display in Slides (Presentation Mode) Only
+========================================== -->
+<!-- Display in Outline Mode Only
+============================ -->
+<!-- Display in Print Only
+===================== -->
+<!-- Display in Handout Mode Only
+============================ -->
+<!-- Incremental Display
+=================== -->
+<!-- colors -->
+<!-- =========================== -->
+<!-- general useful commands -->
+<!-- =========================== -->
+<!-- closed bracket -->
+<!-- =========================== -->
+<!-- example block -->
+<!-- =========================== -->
+<!-- alert block -->
+<!-- =========================== -->
+<!-- columns -->
+<!-- =========================== -->
+<script type="text/javascript">
+ addLoadEvent(function () {
+ appendChildNodes(document.body,
+ DIV({id: "mochikit_logo", 'class': 'draggable'}));
+ });
+</script>
+
+</div>
+<div class="slide" id="bob-s-perspective">
+<h1>Bob's Perspective</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Startup with lots of data:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Cofounded Mochi Media in 2005</li>
+<li>MochiBot analytics platform (for Flash)</li>
+<li>MochiAds ad serving platform (for Flash games)</li>
+<li>Other cool services for game developers</li>
+</ul>
+</div>
+<div class="slide" id="mochi-ad-sales">
+<h1>Mochi Ad Sales</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Hard Sell:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<img alt="images/mochi_ad_sales.jpg" src="images/mochi_ad_sales.jpg" />
+</div>
+<div class="slide" id="what-s-acid">
+<h1>What's ACID?</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">A promise ring your DBMS wears:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+<tr class="field"><th class="field-name">Atomicity:</th><td class="field-body">all or nothing</td>
+</tr>
+<tr class="field"><th class="field-name">Consistency:</th><td class="field-body">no explosions</td>
+</tr>
+<tr class="field"><th class="field-name">Isolation:</th><td class="field-body">no fights</td>
+</tr>
+<tr class="field"><th class="field-name">Durability:</th><td class="field-body">no lying</td>
+</tr>
+</tbody>
+</table>
+</div>
+<div class="slide" id="acid-trips">
+<h1>ACID Trips</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Scalability and reliability:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Downtime is unacceptable</li>
+<li>Reliable is &gt;= 2 nodes</li>
+<li>Scalable is ... more</li>
+<li>Networks make it hard</li>
+<li>Networks make it hard</li>
+<li>Networks make it hard</li>
+</ul>
+</div>
+<div class="slide" id="what-can-i-have">
+<h1>What can I have?</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">CAP theorem says pick two:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Consistency</li>
+<li>Availability</li>
+<li>Partition tolerance</li>
+</ul>
+</div>
+<div class="slide" id="turn-up-the-base">
+<h1>Turn up the BASE</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Write smarter applications:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Basically Available</li>
+<li>Soft state</li>
+<li>Eventually consistent</li>
+</ul>
+</div>
+<div class="slide" id="base-jumping">
+<h1>BASE jumping</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Everyone else is doing it:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Google</li>
+<li>Amazon</li>
+<li>eBay</li>
+<li>Yahoo!</li>
+<li>Facebook</li>
+<li>...</li>
+</ul>
+</div>
+<div class="slide" id="bigtable">
+<h1>BigTable</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Google:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Paxos (Chubby)</li>
+<li>Single-master</li>
+<li>Distributed tablets via GFS</li>
+<li>Row/Column db hybrid</li>
+<li>Compression (BMDiff, Zippy)</li>
+<li>Versioned (Row, Column, Timestamp)</li>
+<li>Bloom filters</li>
+</ul>
+</div>
+<div class="slide" id="bigtable-pros">
+<h1>BigTable Pros</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Pros:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Compression = Awesome</li>
+<li>Clients are probably simple</li>
+<li>Integrates with map/reduce</li>
+</ul>
+</div>
+<div class="slide" id="bigtable-cons">
+<h1>BigTable Cons</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Cons:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Proprietary to Google</li>
+<li>Single-master</li>
+</ul>
+</div>
+<div class="slide" id="bigtable-diagram">
+<h1>BigTable Diagram</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Single-master:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<img alt="images/i-has-minions.jpg" src="images/i-has-minions.jpg" />
+</div>
+<div class="slide" id="dynamo">
+<h1>Dynamo</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Amazon:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Key/Value store</li>
+<li>Consistent hashing</li>
+<li>Vector clocks</li>
+<li>Read repair</li>
+</ul>
+</div>
+<div class="slide" id="dynamo-pros">
+<h1>Dynamo Pros</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Pros:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>No master</li>
+<li>Highly available for write</li>
+<li>Knobs to make it fast to read</li>
+<li>&quot;Simple&quot; (lots of half-baked clones!)</li>
+</ul>
+</div>
+<div class="slide" id="dynamo-cons">
+<h1>Dynamo Cons</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Cons:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Proprietary to Amazon</li>
+<li>Clients need to be smart</li>
+<li>No compression</li>
+<li>Not suitable for column-like workloads</li>
+<li>Just a Key/Value store</li>
+</ul>
+</div>
+<div class="slide" id="dynamo-diagram">
+<h1>Dynamo Diagram</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Smart client:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<img alt="images/we-await-ur-instrucsions.jpg" src="images/we-await-ur-instrucsions.jpg" />
+</div>
+<div class="slide" id="cassandra">
+<h1>Cassandra</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Facebook -&gt; Apache:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Open source!</li>
+<li>No master like Dynamo</li>
+<li>Storage model more like BigTable</li>
+</ul>
+</div>
+<div class="slide" id="cassandra-pros">
+<h1>Cassandra Pros</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Pros:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>OPEN SOURCE</li>
+<li>Incrementally scalable</li>
+<li>Minimal administration</li>
+</ul>
+</div>
+<div class="slide" id="cassandra-cons">
+<h1>Cassandra Cons</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Cons:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Not polished</li>
+<li>No compression yet</li>
+</ul>
+</div>
+<div class="slide" id="cassandra-diagram">
+<h1>Cassandra Diagram</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Soul Calibur:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<img alt="images/sc4_pub_ss_cassandra003_copy.jpg" src="images/sc4_pub_ss_cassandra003_copy.jpg" />
+</div>
+<div class="slide" id="distributed-musings">
+<h1>Distributed Musings</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">New Hotness:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Distributed databases are the new web framework</li>
+<li>... except none of them are awesome yet</li>
+<li>I don't think we need another half-baked Dynamo clone</li>
+</ul>
+</div>
+<div class="slide" id="key-value-stores">
+<h1>Key-Value Stores</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Simple and Fast:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Similar to a Python dict</li>
+<li>Keys usually bytes, probably limited</li>
+<li>Values usually bytes, often have fewer limits</li>
+<li>Extremely fast, simple</li>
+</ul>
+</div>
+<div class="slide" id="memcached">
+<h1>Memcached</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Key/Value store as cache:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>No persistence</li>
+<li>RAM only</li>
+<li>Throws data away (on purpose)</li>
+<li>Lightning fast</li>
+<li>&quot;Everyone&quot; uses it</li>
+</ul>
+</div>
+<div class="slide" id="caching-immutable-data">
+<h1>Caching Immutable Data</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">If only data never changed:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Immutable is easy, do that</li>
+</ul>
+</div>
+<div class="slide" id="caching-mutable-data">
+<h1>Caching Mutable Data</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Invalidation sucks:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Mutable is hard</li>
+<li>Failed transactions?</li>
+<li>Concurrent writers?</li>
+<li>Dependent cache keys?</li>
+<li>You will get it wrong and it will be hard to debug</li>
+</ul>
+</div>
+<div class="slide" id="tokyo-cabinet-tyrant">
+<h1>Tokyo Cabinet/Tyrant</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Not your mom's BerkeleyDB:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Disk persistent</li>
+<li>Very performant</li>
+<li>Actively developed</li>
+<li>Similar replication strategy to MySQL</li>
+</ul>
+</div>
+<div class="slide" id="redis">
+<h1>Redis</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Still very new:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Not just a Key/Value store</li>
+<li>Matching on key spaces</li>
+<li>Values can be bytes, lists or sets</li>
+<li>Requires full store in RAM</li>
+<li>Might be a nice cache server?</li>
+</ul>
+</div>
+<div class="slide" id="document-databases">
+<h1>Document Databases</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Schema-free:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Very easy to use</li>
+<li>Document Versioning</li>
+<li>Great for storing documents</li>
+</ul>
+</div>
+<div class="slide" id="couchdb">
+<h1>CouchDB</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Document DB Poster Child:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Apache project</li>
+<li>Asynchronous replication</li>
+<li>JSON based</li>
+<li>Views materialized on demand (not indexes)</li>
+<li>Neat admin UI</li>
+</ul>
+</div>
+<div class="slide" id="mongodb">
+<h1>MongoDB</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">C++'s revenge:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Fast</li>
+<li>JSON and BSON (binary JSON-ish)</li>
+<li>Asynchronous replication with auto-sharding &quot;soon&quot;</li>
+<li>Index support</li>
+<li>Nested documents</li>
+<li>Advanced queries</li>
+</ul>
+</div>
+<div class="slide" id="column-databases">
+<h1>Column Databases</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Data Warehousing:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Sequential reads are awesome</li>
+<li>Columns compress better than rows</li>
+<li>Doesn't waste I/O on uninteresting columns</li>
+</ul>
+</div>
+<div class="slide" id="monetdb">
+<h1>MonetDB</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Research project:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Tried really hard to get it to work</li>
+<li>Crashes a lot and corrupts your data</li>
+<li>Do not waste your time</li>
+</ul>
+</div>
+<div class="slide" id="luciddb">
+<h1>LucidDB</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Sounds interesting:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Java/C++ open source data warehouse</li>
+<li>No clustering</li>
+<li>No experience yet</li>
+</ul>
+</div>
+<div class="slide" id="vertica">
+<h1>Vertica</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">We paid for it:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Commercial (based on C-Store)</li>
+<li>Clustered</li>
+<li>Would still prefer open source</li>
+</ul>
+</div>
+<div class="slide" id="bitmap-indexes">
+<h1>Bitmap Indexes</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Sequential Scans can be fast:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>1-N bits per row of data</li>
+<li>Can apply logical operations across indexes</li>
+<li>Can be compressed (BBC, WAH)</li>
+<li>FastBit is a good implementation</li>
+</ul>
+</div>
+<div class="slide" id="bitmap-index-uses">
+<h1>Bitmap Index Uses</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Big Queries:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>PostgreSQL 8.1+ in-memory for some queries</li>
+<li>Almost a requirement for column stores</li>
+<li>FastBit is a great implementation (WAH)</li>
+</ul>
+</div>
+<div class="slide" id="bloom-filters-are-neat">
+<h1>Bloom Filters are Neat</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">But our Princess is in another castle:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Probabilistic data structure</li>
+<li>False positives at a known error</li>
+<li>Constant space</li>
+<li>I won't bore you with the math</li>
+</ul>
+</div>
+<div class="slide" id="bloom-filter-diagram">
+<h1>Bloom Filter Diagram</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Actually Relevant:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<img alt="images/649px-Bloom_filter.png" src="images/649px-Bloom_filter.png" />
+</div>
+<div class="slide" id="bloom-filter-uses">
+<h1>Bloom Filter Uses</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name" colspan="2">Find stuff, maybe:</th></tr>
+<tr><td>&nbsp;</td><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Approximate counting of a large set (e.g. unique IPs from logs)</li>
+<li>Knowing that data is definitely NOT stored somewhere, e.g. remote cache</li>
+<li>Several variants (Counting Bloom Filter, Scalable Bloom Filter, ...)</li>
+</ul>
+</div>
+<div class="slide" id="questions">
+<h1>Questions?</h1>
+<table class="docutils field-list" frame="void" rules="none">
+<col class="field-name" />
+<col class="field-body" />
+<tbody valign="top">
+<tr class="field"><th class="field-name">Open Space:</th><td class="field-body"></td>
+</tr>
+</tbody>
+</table>
+<ul class="simple">
+<li>Open Space TODAY &#64; 5pm, Lambert. See Jonathan Ellis</li>
+</ul>
+</div>
+</div>
+</body>
+</html>
BIN slides.pdf
Binary file not shown.
386 slides.txt
@@ -0,0 +1,386 @@
+.. include:: <s5defs.txt>
+.. include:: ui/beamerdefs.txt
+
+.. raw:: html
+ :file: includes/logo.html
+
+================================
+ Drop ACID and think about data
+================================
+
+:Author:
+ Bob Ippolito
+:Date:
+ March 2009
+:Venue:
+ PyCon 2009
+
+Bob's Perspective
+=================
+
+:Startup with lots of data:
+
+* Cofounded Mochi Media in 2005
+* MochiBot analytics platform (for Flash)
+* MochiAds ad serving platform (for Flash games)
+* Other cool services for game developers
+
+Mochi Ad Sales
+==============
+
+:Hard Sell:
+
+.. image:: images/mochi_ad_sales.jpg
+
+What's ACID?
+============
+
+:A promise ring your DBMS wears:
+
+:Atomicity:
+ all or nothing
+:Consistency:
+ no explosions
+:Isolation:
+ no fights
+:Durability:
+ no lying
+
+ACID Trips
+==========
+
+:Scalability and reliability:
+
+* Downtime is unacceptable
+* Reliable is >= 2 nodes
+* Scalable is ... more
+* Networks make it hard
+* Networks make it hard
+* Networks make it hard
+
+What can I have?
+================
+
+:CAP theorem says pick two:
+
+* Consistency
+* Availability
+* Partition tolerance
+
+Turn up the BASE
+================
+
+:Write smarter applications:
+
+* Basically Available
+* Soft state
+* Eventually consistent
+
+BASE jumping
+============
+
+:Everyone else is doing it:
+
+* Google
+* Amazon
+* eBay
+* Yahoo!
+* Facebook
+* ...
+
+BigTable
+========
+
+:Google:
+
+* Paxos (Chubby)
+* Single-master
+* Distributed tablets via GFS
+* Row/Column db hybrid
+* Compression (BMDiff, Zippy)
+* Versioned (Row, Column, Timestamp)
+* Bloom filters
+
+BigTable Pros
+=============
+
+:Pros:
+
+* Compression = Awesome
+* Clients are probably simple
+* Integrates with map/reduce
+
+BigTable Cons
+=============
+
+:Cons:
+
+* Proprietary to Google
+* Single-master
+
+BigTable Diagram
+================
+
+:Single-master:
+
+.. image:: images/i-has-minions.jpg
+
+Dynamo
+======
+
+:Amazon:
+
+* Key/Value store
+* Consistent hashing
+* Vector clocks
+* Read repair
+
+Dynamo Pros
+===========
+
+:Pros:
+
+* No master
+* Highly available for write
+* Knobs to make it fast to read
+* "Simple" (lots of half-baked clones!)
+
+Dynamo Cons
+===========
+
+:Cons:
+
+* Proprietary to Amazon
+* Clients need to be smart
+* No compression
+* Not suitable for column-like workloads
+* Just a Key/Value store
+
+Dynamo Diagram
+==============
+
+:Smart client:
+
+.. image:: images/we-await-ur-instrucsions.jpg
+
+Cassandra
+=========
+
+:Facebook -> Apache:
+
+* Open source!
+* No master like Dynamo
+* Storage model more like BigTable
+
+Cassandra Pros
+==============
+
+:Pros:
+
+* OPEN SOURCE
+* Incrementally scalable
+* Minimal administration
+
+Cassandra Cons
+==============
+
+:Cons:
+
+* Not polished
+* No compression yet
+
+Cassandra Diagram
+=================
+
+:Soul Calibur:
+
+.. image:: images/sc4_pub_ss_cassandra003_copy.jpg
+
+Distributed Musings
+===================
+
+:New Hotness:
+
+* Distributed databases are the new web framework
+* ... except none of them are awesome yet
+* I don't think we need another half-baked Dynamo clone
+
+Key-Value Stores
+================
+
+:Simple and Fast:
+
+* Similar to a Python dict
+* Keys usually bytes, probably limited
+* Values usually bytes, often have fewer limits
+* Extremely fast, simple
+
+Memcached
+=========
+
+:Key/Value store as cache:
+
+* No persistence
+* RAM only
+* Throws data away (on purpose)
+* Lightning fast
+* "Everyone" uses it
+
+Caching Immutable Data
+======================
+
+:If only data never changed:
+
+* Immutable is easy, do that
+
+Caching Mutable Data
+====================
+
+:Invalidation sucks:
+
+* Mutable is hard
+* Failed transactions?
+* Concurrent writers?
+* Dependent cache keys?
+* You will get it wrong and it will be hard to debug
+
+Tokyo Cabinet/Tyrant
+====================
+
+:Not your mom's BerkeleyDB:
+
+* Disk persistent
+* Very performant
+* Actively developed
+* Similar replication strategy to MySQL
+
+Redis
+=====
+
+:Still very new:
+
+* Not just a Key/Value store
+* Matching on key spaces
+* Values can be bytes, lists or sets
+* Requires full store in RAM
+* Might be a nice cache server?
+
+Document Databases
+==================
+
+:Schema-free:
+
+* Very easy to use
+* Document Versioning
+* Great for storing documents
+
+CouchDB
+=======
+
+:Document DB Poster Child:
+
+* Apache project
+* Asynchronous replication
+* JSON based
+* Views materialized on demand (not indexes)
+* Neat admin UI
+
+MongoDB
+=======
+
+:C++'s revenge:
+
+* Fast
+* JSON and BSON (binary JSON-ish)
+* Asynchronous replication with auto-sharding "soon"
+* Index support
+* Nested documents
+* Advanced queries
+
+Column Databases
+================
+
+:Data Warehousing:
+
+* Sequential reads are awesome
+* Columns compress better than rows
+* Doesn't waste I/O on uninteresting columns
+
+MonetDB
+=======
+
+:Research project:
+
+* Tried really hard to get it to work
+* Crashes a lot and corrupts your data
+* Do not waste your time
+
+LucidDB
+=======
+
+:Sounds interesting:
+
+* Java/C++ open source data warehouse
+* No clustering
+* No experience yet
+
+Vertica
+=======
+
+:We paid for it:
+
+* Commercial (based on C-Store)
+* Clustered
+* Would still prefer open source
+
+Bitmap Indexes
+==============
+
+:Sequential Scans can be fast:
+
+* 1-N bits per row of data
+* Can apply logical operations across indexes
+* Can be compressed (BBC, WAH)
+* FastBit is a good implementation
+
+Bitmap Index Uses
+=================
+
+:Big Queries:
+
+* PostgreSQL 8.1+ in-memory for some queries
+* Almost a requirement for column stores
+* FastBit is a great implementation (WAH)
+
+Bloom Filters are Neat
+======================
+
+:But our Princess is in another castle:
+
+* Probabilistic data structure
+* False positives at a known error
+* Constant space
+* I won't bore you with the math
+
+Bloom Filter Diagram
+====================
+
+:Actually Relevant:
+
+.. image:: images/649px-Bloom_filter.png
+
+Bloom Filter Uses
+=================
+
+:Find stuff, maybe:
+
+* Approximate counting of a large set (e.g. unique IPs from logs)
+* Knowing that data is definitely NOT stored somewhere, e.g. remote cache
+* Several variants (Counting Bloom Filter, Scalable Bloom Filter, ...)
+
+Questions?
+==========
+
+:Open Space:
+
+* Open Space TODAY @ 5pm, Lambert. See Jonathan Ellis
6 ui/author.latex
@@ -0,0 +1,6 @@
+\definecolor{rrblitbackground}{rgb}{0.0, 0.0, 0.0}
+
+\title[Drop ACID and think about data]{Drop ACID and think about data}
+\author[Bob Ippolito]{Bob Ippolito\\Mochi Media, Inc.}
+\institute[PyCon 2009]{PyCon 2009 - Chicago (actually Rosemont)}
+\date{March 28, 2009}
77 ui/beamerdefs.txt
@@ -0,0 +1,77 @@
+.. colors
+.. ===========================
+
+.. role:: green
+.. role:: red
+
+
+.. general useful commands
+.. ===========================
+
+.. |pause| raw:: latex
+
+ \pause
+
+.. |small| raw:: latex
+
+ {\small
+
+.. |end_small| raw:: latex
+
+ }
+
+
+.. closed bracket
+.. ===========================
+
+.. |>| raw:: latex
+
+ }
+
+
+.. example block
+.. ===========================
+
+.. |example<| raw:: latex
+
+ \begin{exampleblock}{
+
+
+.. |end_example| raw:: latex
+
+ \end{exampleblock}
+
+
+
+.. alert block
+.. ===========================
+
+.. |alert<| raw:: latex
+
+ \begin{alertblock}{
+
+
+.. |end_alert| raw:: latex
+
+ \end{alertblock}
+
+
+
+.. columns
+.. ===========================
+
+.. |column1| raw:: latex
+
+ \begin{columns}
+ \begin{column}{0.45\textwidth}
+
+.. |column2| raw:: latex
+
+ \end{column}
+ \begin{column}{0.45\textwidth}
+
+
+.. |end_columns| raw:: latex
+
+ \end{column}
+ \end{columns}
6,922 ui/mochikit/MochiKit/MochiKit.js
6,922 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
2 ui/mochikit/MochiKit/__package__.js
@@ -0,0 +1,2 @@
+dojo.hostenv.conditionalLoadModule({"common": ["MochiKit.MochiKit"]});
+dojo.hostenv.moduleLoaded("MochiKit.*");
BIN ui/mochikit/blank.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
43 ui/mochikit/examples/dominterpreter.css
@@ -0,0 +1,43 @@
+#dominterpreter_text {
+ font-family: Monaco, "lucida console", Courier;
+ border: 1px solid #CCCCCC;
+ font-size: .60em;
+ padding: 2px 4px;
+ margin-top: .3em;
+ width: 96%;
+}
+
+#dominterpreter_text:focus {
+ background-color: #FFFEE3;
+}
+
+#dominterpreter_area {
+ display: block;
+ border: 1px solid #CCCCCC;
+ padding: 2px 4px;
+ margin-top: .3em;
+ width: 95%;
+ height: 11em;
+ overflow: auto;
+}
+
+#dominterpreter_output {
+ display: inline;
+ font-family: Monaco, "lucida console", Courier;
+ font-size: .60em;
+}
+
+#dominterpreter_output span {
+ white-space: -moz-pre-wrap; /* Mozilla */
+ white-space: -o-pre-wrap; /* Opera 7 */
+ white-space: pre-wrap; /* CSS 2.1 */
+ white-space: pre-line; /* CSS 3 (and 2.1 as well, actually) */
+ word-wrap: break-word; /* IE */
+ wrap-option: emergency; /* CSS 3 */
+}
+
+.code { color: blue; }
+.data { color: black; }
+.error { color: red; }
+.banner { color: green; }
+.invisible { display: none; }
329 ui/mochikit/examples/dominterpreter.js
@@ -0,0 +1,329 @@
+/*
+
+ DOMInterpreter: JavaScript Interactive DOMInterpreter
+
+*/
+DOMInterpreterManager = function () {
+ bindMethods(this);
+};
+
+DOMInterpreterManager.prototype.initialize = function () {
+
+ connect("dominterpreter_text", "onkeyup", this.keyUp);
+ connect("dominterpreter_text", "onfocus", this.setupWindowFunctions);
+ connect("dominterpreter_form", "onsubmit", this.submit);
+ // getElement("dominterpreter_text").focus();
+
+ this.banner();
+ this.lines = [];
+ this.history = [];
+ this.currentHistory = "";
+ this.historyPos = -1;
+ this.blockingOn = null;
+ if (typeof(this.doEval) == "undefined") {
+ // detect broken eval, warn at some point if a namespace ever gets used
+ this.doEval = function () {
+ return eval(arguments[0]);
+ }
+ }
+};
+
+DOMInterpreterManager.prototype.banner = function () {
+ var _ua = window.navigator.userAgent;
+ var ua = _ua.replace(/^Mozilla\/.*?\(.*?\)\s*/, "");
+ if (ua == "") {
+ // MSIE
+ ua = _ua.replace(/^Mozilla\/4\.0 \(compatible; MS(IE .*?);.*$/, "$1");
+ }
+ appendChildNodes("dominterpreter_output",
+ SPAN({"class": "banner"},
+ "MochiKit v" + MochiKit.Base.VERSION + " [" + ua + "]",
+ BR(),
+ "Type your expression in the input box below and press return, or see the notes below for more information."
+ ),
+ BR()
+ );
+};
+
+DOMInterpreterManager.prototype.submit = function (event) {
+ if (this.blockingOn) {
+ try {
+ this.blockingOn.cancel();
+ } catch (e) {
+ this.showError(e);
+ }
+ this.blockingOn = null;
+ }
+ this.doSubmit();
+ this.doScroll();
+ event.stop();
+};
+
+DOMInterpreterManager.prototype.doScroll = function () {
+ var p = getElement("dominterpreter_output").lastChild;
+ if (typeof(p) == "undefined" || p == null) {
+ return;
+ }
+ var area = getElement("dominterpreter_area");
+ if (area.offsetHeight > area.scrollHeight) {
+ area.scrollTop = 0;
+ } else {
+ area.scrollTop = area.scrollHeight;
+ }
+};
+
+DOMInterpreterManager.prototype.moveHistory = function (dir) {
+ // totally bogus value
+ if (dir == 0 || this.history.length == 0) {
+ return;
+ }
+ var elem = getElement("dominterpreter_text");
+ if (this.historyPos == -1) {
+ this.currentHistory = elem.value;
+ if (dir > 0) {
+ return;
+ }
+ this.historyPos = this.history.length - 1;
+ elem.value = this.history[this.historyPos];
+ return;
+ }
+ if (this.historyPos == 0 && dir < 0) {
+ return;
+ }
+ if (this.historyPos == this.history.length - 1 && dir > 0) {
+ this.historyPos = -1;
+ elem.value = this.currentHistory;
+ return;
+ }
+ this.historyPos += dir;
+ elem.value = this.history[this.historyPos];
+}
+
+DOMInterpreterManager.prototype.runMultipleLines = function (text) {
+ var lines = rstrip(text).replace("\r\n", "\n").split(/\n/);
+ appendChildNodes("dominterpreter_output",
+ SPAN({"class": "code"}, ">>> ", izip(lines, imap(BR, cycle([null]))))
+ );
+ this.runCode(text);
+}
+
+DOMInterpreterManager.prototype.areaKeyDown = function (e) {
+ var mod = e.modifier();
+ var hasMod = mod.alt || mod.ctrl || mod.meta;
+ if (e.key().string == 'KEY_ENTER' && hasMod) {
+ var elem = getElement("dominterpreter_textarea");
+ var text = elem.value;
+ elem.value = "";
+ this.runMultipleLines(text);
+ e.stop();
+ }
+};
+
+DOMInterpreterManager.prototype.keyUp = function (e) {
+ var key = e.key();
+ // if any meta key is pressed, don't handle the signal
+ if (e.modifier().any) {
+ return;
+ }
+ switch (key.string) {
+ case 'KEY_ARROW_UP': this.moveHistory(-1); break;
+ case 'KEY_ARROW_DOWN': this.moveHistory(1); break;
+ default: return;
+ }
+ e.stop();
+};
+
+DOMInterpreterManager.prototype.blockOn = function (d) {
+ var node = SPAN({"class": "banner"}, "blocking on " + repr(d) + "...");
+ this.blockingOn = d;
+ appendChildNodes("dominterpreter_output", node);
+ this.doScroll();
+ d.addBoth(function (res) {
+ swapDOM(node);
+ this.blockingOn = null;
+ if (res instanceof CancelledError) {
+ window.writeln(SPAN({"class": "error"}, repr(d) + " cancelled!"));
+ return undefined;
+ }
+ return res;
+ });
+ d.addCallbacks(this.showResult, this.showError);
+};
+
+DOMInterpreterManager.prototype.showError = function (e) {
+ if (typeof(e) != "object") {
+ e = new Error(e);
+ }
+ appendChildNodes("dominterpreter_output",
+ SPAN({"class": "error"}, "Error:"),
+ TABLE({"class": "error"},
+ THEAD({"class": "invisible"}, TD({"colspan": 2})),
+ TFOOT({"class": "invisible"}, TD({"colspan": 2})),
+ TBODY(null,
+ map(function (kv) {
+ var v = kv[1];
+ if (typeof(v) == "function") {
+ return;
+ }
+ if (typeof(v) == "object") {
+ v = repr(v);
+ }
+ return TR(null,
+ TD({"class": "error"}, kv[0]),
+ TD({"class": "data"}, v)
+ );
+ }, sorted(items(e)))
+ )
+ )
+ );
+ window.last_exc = e;
+ this.doScroll();
+};
+
+EvalFunctions = {
+ evalWith: function () {
+ with (arguments[1] || window) { return eval(arguments[0]); };
+ },
+ evalCall: function () {
+ return eval.call(arguments[1] || window, arguments[0]);
+ },
+ choose: function () {
+ var ns = {__test__: this};
+ var e;
+ try {
+ if (this.evalWith("return __test__", ns) === this) {
+ return this.evalWith;
+ }
+ } catch (e) {
+ // pass
+ }
+ try {
+ if (this.evalCall("return __test__", ns) === this) {
+ return this.evalCall;
+ }
+ } catch (e) {
+ // pass
+ }
+ return undefined;
+ }
+};
+
+DOMInterpreterManager.prototype.doEval = EvalFunctions.choose();
+
+DOMInterpreterManager.prototype.doSubmit = function () {
+ var elem = getElement("dominterpreter_text");
+ var code = elem.value;
+ elem.value = "";
+ var isContinuation = false;
+ if (code.length >= 2 && code.lastIndexOf("//") == code.length - 2) {
+ isContinuation = true;
+ code = code.substr(0, code.length - 2);
+ }
+ appendChildNodes("dominterpreter_output",
+ SPAN({"class": "code"}, ">>> ", code),
+ BR()
+ );
+ this.lines.push(code);
+ this.history.push(code);
+ this.historyPos = -1;
+ this.currentHistory = "";
+ if (isContinuation) {
+ return;
+ }
+ var allCode = this.lines.join("\n");
+ this.lines = [];
+ this.runCode(allCode);
+ return;
+};
+
+DOMInterpreterManager.prototype.runCode = function (allCode) {
+ var res;
+ try {
+ res = this.doEval(allCode);
+ } catch (e) {
+ // mozilla shows some keys more than once!
+ this.showError(e);
+ return;
+ }
+ this.showResult(res);
+};
+
+DOMInterpreterManager.prototype.setupWindowFunctions = function () {
+ window.writeln = function () {
+ appendChildNodes("dominterpreter_output",
+ SPAN({"class": "data"}, arguments),
+ BR()
+ );
+ dominterpreterManager.doScroll();
+ };
+
+ window.clear = function () {
+ replaceChildNodes("dominterpreter_output");
+ getElement("dominterpreter_area").scrollTop = 0;
+ };
+
+ window.blockOn = function (d) {
+ if (!(d instanceof Deferred)) {
+ throw new TypeError(repr(d) + " is not a Deferred!");
+ }
+ dominterpreterManager.blockOn(d);
+ };
+
+ window.dir = function (o) {
+ // Python muscle memory!
+ return sorted(keys(o));
+ };
+
+ window.inspect = function (o) {
+ window._ = o;
+ if ((typeof(o) != "function" && typeof(o) != "object") || o == null) {
+ window.writeln(repr(o));
+ return;
+ }
+ var pairs = items(o);
+ if (pairs.length == 0) {
+ window.writeln(repr(o));
+ return;
+ }
+ window.writeln(TABLE({"border": "1"},
+ THEAD({"class": "invisible"}, TR(null, TD(), TD())),
+ TFOOT({"class": "invisible"}, TR(null, TD(), TD())),
+ TBODY(null,
+ map(
+ function (kv) {
+ var click = function () {
+ try {
+ window.inspect(kv[1]);
+ } catch (e) {
+ dominterpreterManager.showError(e);
+ }
+ return false;
+ }
+ return TR(null,
+ TD(null, A({href: "#", onclick: click}, kv[0])),
+ TD(null, repr(kv[1]))
+ );
+ },
+ pairs
+ )
+ )
+ ));
+ };
+}
+
+DOMInterpreterManager.prototype.showResult = function (res) {
+ if (typeof(res) != "undefined") {
+ window._ = res;
+ }
+ if (typeof(res) != "undefined") {
+ appendChildNodes("dominterpreter_output",
+ SPAN({"class": "data"}, repr(res)),
+ BR()
+ );
+ this.doScroll();
+ }
+};
+
+
+dominterpreterManager = new DOMInterpreterManager();
+addLoadEvent(dominterpreterManager.initialize);
76 ui/mochikit/examples/draggable.js
@@ -0,0 +1,76 @@
+/*
+
+ Drag: A Really Simple Drag Handler
+
+*/
+Drag = {
+ _move: null,
+ _down: null,
+
+ start: function(e) {
+ e.stop();
+
+ // We need to remember what we're dragging.
+ Drag._target = e.target();
+
+ /*
+ There's no cross-browser way to get offsetX and offsetY, so we
+ have to do it ourselves. For performance, we do this once and
+ cache it.
+ */
+ Drag._orig = elementPosition(Drag._target);
+ var mouse_pos = e.mouse().page;
+ Drag._offset = Drag._diff(
+ mouse_pos,
+ Drag._orig);
+ Drag._move = connect(document, 'onmousemove', Drag._drag);
+ Drag._down = connect(document, 'onmouseup', Drag._stop);
+
+ var monitor = $("drag_monitor");
+ if (monitor) {
+ replaceChildNodes(monitor,
+ "mouse: " + mouse_pos, BR(),
+ "logo: " + Drag._orig, BR());
+ }
+
+ },
+
+ _offset: null,
+ _target: null,
+
+ _diff: function(lhs, rhs) {
+ return new MochiKit.DOM.Coordinates(lhs.x - rhs.x, lhs.y - rhs.y);
+ },
+
+ _drag: function(e) {
+ e.stop();
+ var mouse_pos = e.mouse().page;
+ var new_pos = Drag._diff(mouse_pos, Drag._offset)
+ setElementPosition(
+ Drag._target,