Activity 7-1
------------

The goal for this activity will be to compute some BCNF decompositions, using the tools from last lecture

First we'll load those tools, and some sample data:

In [1]:
def to_set(x):
    if type(x) == set:
        return x
    elif type(x) in [list, set]:
        return set(x)
    elif type(x) in [str, int]:
        return set([x])
    else:
        raise Exception("Unrecognized type.")
def fd_to_str((lhs,rhs)): return ",".join(to_set(lhs)) + " -> " + ",".join(to_set(rhs))
def fds_to_str(fds): return "\n\t".join(map(fd_to_str, fds))
def set_to_str(x): return "{" + ",".join(x) + "}"
def fd_applies_to(fd, x): 
    lhs, rhs = map(to_set, fd)
    return lhs.issubset(x)
def compute_closure(x, fds, verbose=False):
    bChanged = True        # We will repeat until there are no changes.
    x_ret    = to_set(x).copy()    # Make a copy of the input to hold x^{+}
    while bChanged:
        bChanged = False   # Must change on each iteration
        for fd in fds:     # loop through all the FDs.
            (lhs, rhs) = map(to_set, fd) # recall: lhs -> rhs
            if fd_applies_to(fd, x_ret) and not rhs.issubset(x_ret):
                x_ret = x_ret.union(rhs)
                if verbose:
                    print("Using FD " + fd_to_str(fd))
                    print("\t Updated x to " + set_to_str(x_ret))
                bChanged = True
    return x_ret
def is_superkey_for(A, X, fds, verbose=False): 
    return X.issubset(compute_closure(A, fds, verbose=verbose))
import itertools
def is_key_for(A, X, fds, verbose=False):
    subsets = set(itertools.combinations(A, len(A)-1))
    return is_superkey_for(A, X, fds) and \
        all([not is_superkey_for(set(SA), X, fds) for SA in subsets])

In [2]:
%load_ext sql
%sql sqlite://

  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")


'Connected: None@None'

In [3]:
%%sql drop table if exists T;
create table T(name varchar, ssn varchar, phone varchar, city varchar, zipcode int);
insert into T values ('Fred', '123-45-6789', '650-555-1234', 'Palo Alto', 94306);
insert into T values ('Fred', '123-45-6789', '650-555-2000', 'Palo Alto', 94306);
insert into T values ('Joe', '987-65-4321', '415-555-3131', 'Palo Alto', 94306);
insert into T values ('Joe', '987-65-4321', '415-555-4000', 'Palo Alto', 94306);

Done.
Done.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.


[]

### Exercise 1

Explicitly go through the steps of the BCNF algorithm using the `compute_closure` function above, then decompose the following table (i.e. by creating new SQL tables) into BCNF:

In [4]:
%sql SELECT * FROM T;

Done.


name,ssn,phone,city,zipcode
Fred,123-45-6789,650-555-1234,Palo Alto,94306
Fred,123-45-6789,650-555-2000,Palo Alto,94306
Joe,987-65-4321,415-555-3131,Palo Alto,94306
Joe,987-65-4321,415-555-4000,Palo Alto,94306


Given the following FDs:

In [5]:
A = set(['name', 'ssn', 'phone', 'city', 'zipcode'])
F = [('city', 'zipcode'), ('ssn', set(['name', 'city']))]

In [6]:
# We find an X s.t. X^+ != A and X^+ != X
X = set(['ssn'])
compute_closure(X, F)

{'city', 'name', 'ssn', 'zipcode'}

In [7]:
Y = set(['city','name','zipcode'])
Z = set(['phone'])

We decompose into $T_1$ and $T_2$:

In [10]:
%%sql
DROP TABLE IF EXISTS T1; CREATE TABLE T1 AS SELECT ssn, name, city, zipcode FROM T;
DROP TABLE IF EXISTS T2; CREATE TABLE T2 AS SELECT ssn, phone FROM T;

Done.
Done.
Done.
Done.


[]

In [18]:
%sql SELECT DISTINCT * FROM T1;

Done.


ssn,name,city,zipcode
123-45-6789,Fred,Palo Alto,94306
987-65-4321,Joe,Palo Alto,94306


In [12]:
%sql SELECT * FROM T2;

Done.


ssn,phone
123-45-6789,650-555-1234
123-45-6789,650-555-2000
987-65-4321,415-555-3131
987-65-4321,415-555-4000


In [13]:
# Next decomp:
X = set(['city'])
compute_closure(X, F)

{'city', 'zipcode'}

In [14]:
Y = set(['zipcode'])
Z = set(['ssn', 'name'])

We decompose $T_1$ into $T_{11}$ and $T_{12}$:

In [15]:
%%sql
DROP TABLE IF EXISTS T11; CREATE TABLE T11 AS SELECT city, zipcode FROM T1;
DROP TABLE IF EXISTS T12; CREATE TABLE T12 AS SELECT ssn, name, city FROM T1;

Done.
Done.
Done.
Done.


[]

In [17]:
%sql SELECT DISTINCT * FROM T11;

Done.


city,zipcode
Palo Alto,94306


In [19]:
%sql SELECT DISTINCT * FROM T12;

Done.


ssn,name,city
123-45-6789,Fred,Palo Alto
987-65-4321,Joe,Palo Alto
