# Excercise 3: Creating Facts & Dimensions

In [1]:
# Load ipython-sql
%load_ext sql

# Setup database connection
# Define parameters
DB_ENDPOINT = 'localhost'
DB_NAME = 'pagila'
DB_USER = 'postgres'
DB_PASSWORD = 'postgres'
DB_PORT = '5432'

conn_string = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_ENDPOINT}:{DB_PORT}/{DB_NAME}"

# Connect
%sql $conn_string

### Star Schema - Entity Relationship Diagram

<img src="../../../images/cloud_data_warehouse_star_schema_pagila.png" width="50%"/>

### Create dimension tables

In [None]:
%%sql
CREATE TABLE dimDate
(
  date_key integer NOT NULL PRIMARY KEY,
  date date NOT NULL,
  year smallint NOT NULL,
  quarter smallint NOT NULL,
  month smallint NOT NULL,
  day smallint NOT NULL,
  week smallint NOT NULL,
  is_weekend boolean NOT NULL
);

%%sql
CREATE TABLE dimCustomer
(
  customer_key SERIAL PRIMARY KEY,
  customer_id  smallint NOT NULL,
  first_name   varchar(45) NOT NULL,
  last_name    varchar(45) NOT NULL,
  email        varchar(50),
  address      varchar(50) NOT NULL,
  address2     varchar(50),
  district     varchar(20) NOT NULL,
  city         varchar(50) NOT NULL,
  country      varchar(50) NOT NULL,
  postal_code  varchar(10),
  phone        varchar(20) NOT NULL,
  active       boolean NOT NULL,
  create_date  timestamp NOT NULL,
  start_date   date NOT NULL,
  end_date     date NOT NULL
);

CREATE TABLE dimMovie
(
  movie_key          SERIAL PRIMARY KEY,
  film_id            smallint NOT NULL,
  title              varchar(255) NOT NULL,
  description        text,
  release_year       year,
  language           varchar(20) NOT NULL,
  original_language  varchar(20),
  rental_duration    smallint NOT NULL,
  length             smallint NOT NULL,
  rating             varchar(5) NOT NULL,
  special_features   varchar(60) NOT NULL
);

CREATE TABLE dimStore
(
  store_key           SERIAL PRIMARY KEY,
  store_id            smallint NOT NULL,
  address             varchar(50) NOT NULL,
  address2            varchar(50),
  district            varchar(20) NOT NULL,
  city                varchar(50) NOT NULL,
  country             varchar(50) NOT NULL,
  postal_code         varchar(10),
  manager_first_name  varchar(45) NOT NULL,
  manager_last_name   varchar(45) NOT NULL,
  start_date          date NOT NULL,
  end_date            date NOT NULL
);

In [9]:
%%sql
SELECT 
    column_name, 
    data_type
FROM information_schema.columns
WHERE table_name = 'dimstore'

 * postgresql://postgres:***@localhost:5432/pagila
12 rows affected.


column_name,data_type
store_key,integer
store_id,smallint
start_date,date
end_date,date
district,character varying
city,character varying
country,character varying
postal_code,character varying
manager_first_name,character varying
manager_last_name,character varying


### Create the fact table

In [15]:
%%sql
CREATE TABLE factSales
(
  sales_key SERIAL PRIMARY KEY,
  date_key integer NOT NULL,
  customer_key integer NOT NULL,
  movie_key integer NOT NULL,
  store_key integer NOT NULL,
  sales_amount numeric NOT NULL
);

 * postgresql://postgres:***@localhost:5432/pagila
Done.


[]

In [16]:
%%sql
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name   = 'factsales'

 * postgresql://postgres:***@localhost:5432/pagila
6 rows affected.


column_name,data_type
sales_key,integer
date_key,integer
customer_key,integer
movie_key,integer
store_key,integer
sales_amount,numeric
