## Read data

In [12]:
#import required libraries
import sqlite3
import pandas as pd 

In [13]:
#connect to the database
conn = sqlite3.connect("factbook.db")

In [14]:
# return facts table and assign them to schema
schema = conn.execute("pragma table_info(facts);").fetchall()
for s in schema:
    print(s)

(0, 'id', 'INTEGER', 1, None, 1)
(1, 'code', 'varchar(255)', 1, None, 0)
(2, 'name', 'varchar(255)', 1, None, 0)
(3, 'area', 'integer', 0, None, 0)
(4, 'area_land', 'integer', 0, None, 0)
(5, 'area_water', 'integer', 0, None, 0)
(6, 'population', 'integer', 0, None, 0)
(7, 'population_growth', 'float', 0, None, 0)
(8, 'birth_rate', 'float', 0, None, 0)
(9, 'death_rate', 'float', 0, None, 0)
(10, 'migration_rate', 'float', 0, None, 0)
(11, 'created_at', 'datetime', 0, None, 0)
(12, 'updated_at', 'datetime', 0, None, 0)


## Create index statement

In [19]:
# Query plan
query_plan_one = conn.execute("explain query plan select * from facts where population > 10000 ;").fetchall()
print(query_plan_one)
conn.execute("create index if not exists pop_idx on facts(population)")
query_plan_two = conn.execute("explain query plan select * from facts where population > 10000 ;").fetchall()
print(query_plan_two)

[(0, 0, 0, 'SCAN TABLE facts')]
[(0, 0, 0, 'SEARCH TABLE facts USING INDEX pop_idx (population>?)')]


In [21]:
# Check if the table meets where constraints
query_plan_three = conn.execute("explain query plan select * from facts where population > 1000000 and population_growth < 0.05;").fetchall()
print(query_plan_three)

[(0, 0, 0, 'SEARCH TABLE facts USING INDEX pop_growth_idx (population_growth<?)')]


In [20]:
# Multi column queries
conn.execute("create index if not exists pop_idx on facts(population);").fetchall()
conn.execute("create index if not exists pop_growth_idx on facts(population_growth);").fetchall()
query_plan_four = conn.execute("explain query plan select * from facts where population > 1000000 and population_growth < 0.05;").fetchall()
print(query_plan_four)

[(0, 0, 0, 'SEARCH TABLE facts USING INDEX pop_growth_idx (population_growth<?)')]


In [22]:
# Specify two columns in the ON statement
conn.execute("create index if not exists pop_pop_growth_idx on facts(population, population_growth);")
query_plan_five = conn.execute("explain query plan select * from facts where population > 1000000 and population_growth < 0.05;").fetchall()
print(query_plan_five)

[(0, 0, 0, 'SEARCH TABLE facts USING INDEX pop_pop_growth_idx (population>?)')]


In [23]:
# Covering index
conn.execute("create index if not exists pop_pop_growth_idx on facts(population, population_growth);")
query_plan_six = conn.execute("explain query plan select population, population_growth from facts where population > 1000000 and population_growth < 0.05;").fetchall()
print(query_plan_six)

[(0, 0, 0, 'SEARCH TABLE facts USING COVERING INDEX pop_pop_growth_idx (population>?)')]


In [24]:
# Covering a single column index
conn.execute("create index if not exists pop_pop_growth_idx on facts(population, population_growth);")
query_plan_seven = conn.execute("explain query plan select population from facts where population > 1000000;").fetchall()
print(query_plan_seven)

[(0, 0, 0, 'SEARCH TABLE facts USING COVERING INDEX pop_idx (population>?)')]
