# Introduction

This example illustrates how to work with data in an SQLite database on disk. The database itself is not included, since it was the output from scraping a housing website.

In [2]:
import sqltables

# Accessing an existing database

In [3]:
db = sqltables.Database("properties.sqlite3")
db.create_function("pow", 2, lambda a, b: a**b)

properties = db.query("select * from properties")
sales = db.query("select * from sales")

In [4]:
properties.view("select * from _ limit 2")

|residentialArea|postal|city|address|itemTypeName|bbrLink|soldLink|hasCoordinates|staticMapUrlstring|lat|lng|wishPropertyLocationLink|bbr|
|-|-|-|-|-|-|-|-|-|-|-|-|-|
|\'84\'|\'2300\'|\'København S\'|\'Jens Otto Krags Gade 12\, 2\. th\'|\'Ejerlejlighed\'|\'\~\/boligen\/101\-1\#bbr\'|\'\~\/boligen\/101\-1\'|1|None|55\.658569|12\.568486|\'https\:\/\/minside\.boligsiden\.dk\/oenskeboliger\?postal\=2300\&city\=K\%c3\%b8benhavn\+S\&street\=Jens\+Otto\+Krags\+Gade\&housenumber\=12\&floorandside\=2\%7cth\'|\'101\-1\'|
|\'107\'|\'2300\'|\'København S\'|\'Weidekampsgade 41\, 2\. th\'|\'Ejerlejlighed\'|\'\~\/boligen\/101\-1000\#bbr\'|\'\~\/boligen\/101\-1000\'|1|None|55\.666784|12\.5822|\'https\:\/\/minside\.boligsiden\.dk\/oenskeboliger\?postal\=2300\&city\=K\%c3\%b8benhavn\+S\&street\=Weidekampsgade\&housenumber\=41\&floorandside\=2\%7cth\'|\'101\-1000\'|


In [5]:
sales.view("select * from _ limit 2")

|bbr|saleDate|salePrice|saleTypeId|saleType|
|-|-|-|-|-|
|\'101\-431096\'|\'2018\-08\-01\'|2520000|1|\'Fri handel\'|
|\'101\-431096\'|\'2017\-09\-29\'|2345000|1|\'Fri handel\'|


# Join tables

In [6]:
property_sales = sales.view("select * from _ join p on _.bbr = p.bbr", 
                            bindings={"p": properties})

# Create a function that transforms tables

In [7]:
def compute_stats(property_sales):
    stats1 = property_sales.view("""
    select 
        city,
        avg(salePrice) as mean,
        pow(avg(pow(salePrice,2.0))-pow(avg(salePrice),2.0), 0.5) as std 
        from _ 
        group by city
    """)
    stats2 = stats1.view("select *, std/mean as rho from _")
    return stats2

In [8]:
list(compute_stats(property_sales))[:2]

[Row(city='Brønshøj', mean=1643912.7326475927, std=1291936.5023915744, rho=0.7858911709448559),
 Row(city='Frederiksberg', mean=1909967.35626842, std=1590687.5959055678, rho=0.8328349647888005)]

# Slice data and apply function to slices

In [9]:
item_types = [x for [x] in properties.view("select distinct itemTypeName from _")]

In [10]:
item_types

['Ejerlejlighed', 'Villa', 'Rækkehus', 'Fritidsbolig']

In [11]:
item_stats = {
    k: compute_stats(property_sales.table(
        "select * from _ where itemTypeName = ?", [k]))
    for k in item_types
}

In [12]:
item_stats["Villa"].view("select * from _ order by mean desc limit 5")

|city|mean|std|rho|
|-|-|-|-|
|\'København K\'|7626134\.277777778|6392582\.316826187|0\.8382467556929718|
|\'Frederiksberg C\'|5882047\.825242719|5777640\.072892616|0\.9822497614007764|
|\'Nordhavn\'|5646356\.5|3627243\.5|0\.6424042654763298|
|\'København Ø\'|5204032\.951704546|5429995\.779291962|1\.043420714219998|
|\'Hellerup\'|4906243\.431111111|3940130\.9778663856|0\.8030850962024256|
