# Database Normalization Script

## UDT and Utils schemas and essential UDTs

In [None]:
-- create the udts and utils schemas
CREATE SCHEMA [Udt]

CREATE SCHEMA [Utils]

-- create essential udts used throught the database (its not specific to any schema so i just put it on top)

CREATE TYPE [Udt].[SurrogateKeyInt] FROM [int] NOT NULL

CREATE TYPE [Udt].[BitVal] FROM [bit] NOT NULL
CREATE DEFAULT [DF_Udt_BitVal] AS 0;

CREATE TYPE [Udt].[Name] FROM [nvarchar](150) NOT NULL
CREATE DEFAULT [DF_Udt_Name] AS N'(Unknown)';

CREATE TYPE [Udt].[Comment] FROM [nvarchar](4000) NOT NULL
CREATE DEFAULT [DF_Udt_Comment] AS N'(None)';

CREATE TYPE [Udt].[CurrencyAmount] FROM [decimal](18, 2) NOT NULL
CREATE DEFAULT [DF_Udt_CurrencyAmount] AS 0.00;

CREATE TYPE [Udt].[TransactionDate] FROM [datetime] NOT NULL
CREATE DEFAULT [DF_Udt_TransactionDate] AS '1900-01-01 00:00:00.000';

CREATE TYPE [Udt].[TransactionTime] FROM [time](7) NOT NULL
CREATE DEFAULT [DF_Udt_TransactionTime] AS '00:00:00.0000000';

-- create view for metadata (just professors code)
CREATE VIEW [Utils].[uvw_FindColumnDefinitionPlusDefaultAndCheckConstraint]
AS
SELECT
    CONCAT(tbl.TABLE_SCHEMA, '.', tbl.TABLE_NAME) AS FullyQualifiedTableName,
    tbl.TABLE_SCHEMA AS SchemaName,
    tbl.TABLE_NAME AS TableName,
    col.COLUMN_NAME AS ColumnName,
    col.ORDINAL_POSITION AS OrdinalPosition,
    CONCAT(col.DOMAIN_SCHEMA, '.', col.DOMAIN_NAME) AS FullyQualifiedDomainName,
    col.DOMAIN_NAME AS DomainName,
    CASE
        WHEN col.DATA_TYPE = 'varchar' THEN CONCAT('varchar(', CHARACTER_MAXIMUM_LENGTH, ')')
        WHEN col.DATA_TYPE = 'char' THEN CONCAT('char(', CHARACTER_MAXIMUM_LENGTH, ')')
        WHEN col.DATA_TYPE = 'nvarchar' THEN CONCAT('nvarchar(', CHARACTER_MAXIMUM_LENGTH, ')')
        WHEN col.DATA_TYPE = 'nchar' THEN CONCAT('nchar(', CHARACTER_MAXIMUM_LENGTH, ')')
        WHEN col.DATA_TYPE = 'numeric' THEN CONCAT('numeric(', NUMERIC_PRECISION, ', ', NUMERIC_SCALE, ')')
        WHEN col.DATA_TYPE = 'decimal' THEN CONCAT('decimal(', NUMERIC_PRECISION, ', ', NUMERIC_SCALE, ')')
        ELSE col.DATA_TYPE
    END AS DataType,
    col.IS_NULLABLE AS IsNullable,
    dcn.DefaultName,
    col.COLUMN_DEFAULT AS DefaultNameDefinition,
    cc.CONSTRAINT_NAME AS CheckConstraintRuleName,
    cc.CHECK_CLAUSE AS CheckConstraintRuleNameDefinition
FROM
(
    SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE
    FROM INFORMATION_SCHEMA.TABLES
    WHERE (TABLE_TYPE = 'BASE TABLE')
) AS tbl
INNER JOIN
(
    SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION, COLUMN_DEFAULT, IS_NULLABLE, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE, DOMAIN_SCHEMA, DOMAIN_NAME
    FROM INFORMATION_SCHEMA.COLUMNS
) AS col
    ON col.TABLE_CATALOG = tbl.TABLE_CATALOG
    AND col.TABLE_SCHEMA = tbl.TABLE_SCHEMA
    AND col.TABLE_NAME = tbl.TABLE_NAME
LEFT OUTER JOIN
(
    SELECT t.name AS TableName, schema_name(s.schema_id) AS SchemaName, ac.name AS ColumnName, d.name AS DefaultName
    FROM sys.all_columns AS ac
    INNER JOIN sys.tables AS t ON ac.object_id = t.object_id
    INNER JOIN sys.schemas AS s ON t.schema_id = s.schema_id
    INNER JOIN sys.default_constraints AS d ON ac.default_object_id = d.object_id
) AS dcn
    ON dcn.SchemaName = tbl.TABLE_SCHEMA
    AND dcn.TableName = tbl.TABLE_NAME
    AND dcn.ColumnName = col.COLUMN_NAME
LEFT OUTER JOIN
(
    SELECT cu.TABLE_SCHEMA, cu.TABLE_NAME, cu.COLUMN_NAME, c.CONSTRAINT_NAME, c.CHECK_CLAUSE
    FROM INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE AS cu
    INNER JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS AS c ON c.CONSTRAINT_NAME = cu.CONSTRAINT_NAME
) AS cc
    ON cc.TABLE_SCHEMA = tbl.TABLE_SCHEMA
    AND cc.TABLE_NAME = tbl.TABLE_NAME
    AND cc.COLUMN_NAME = col.COLUMN_NAME;



## Reference Schema

In [None]:
-- reference schema (already created)
CREATE SCHEMA [Reference]

-- create reference udts
CREATE TYPE [Udt].[CountryISO2] FROM [nchar](2) NOT NULL
CREATE DEFAULT [DF_Udt_CountryISO2] AS N'UN';


CREATE TYPE [Udt].[CountryISO3] FROM [nchar](3) NOT NULL
CREATE DEFAULT [DF_Udt_CountryISO3] AS N'UNK';

-- create sales region and country tables
CREATE TABLE [Reference].[SalesRegion](
	[SalesRegionID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[SalesRegionName] [Udt].[Name] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
	CONSTRAINT [PK_Reference_SalesRegion] PRIMARY KEY CLUSTERED ([SalesRegionID] ASC),
    CONSTRAINT [AK_Reference_SalesRegion_Name] UNIQUE NONCLUSTERED ([SalesRegionName])
    WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [Reference].[Country](
	[CountryId] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[CountryName] [Udt].[Name] NOT NULL,
	[CountryISO2] [Udt].[CountryISO2] NOT NULL,
	[CountryISO3] [Udt].[CountryISO3] NOT NULL,
	[SalesRegionId] [Udt].[SurrogateKeyInt] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Reference_Country] PRIMARY KEY CLUSTERED ([CountryId] ASC),
    CONSTRAINT [AK_Reference_Country_ISO2] UNIQUE NONCLUSTERED ([CountryISO2]),
    CONSTRAINT [AK_Reference_Country_ISO3] UNIQUE NONCLUSTERED ([CountryISO3]),
    CONSTRAINT [AK_Reference_Country_Name] UNIQUE NONCLUSTERED ([CountryName])
    WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

-- migrate data for sales region (we can discuss the use of coalesce here)
INSERT INTO [Reference].[SalesRegion] ([SalesRegionName], [ReviewRow])
SELECT DISTINCT 
    COALESCE(RTRIM(D.SalesRegion), '(Unknown)') AS SalesRegionName,
    CASE WHEN D.SalesRegion IS NULL THEN 1 ELSE 0 END AS ReviewRow
FROM [Data].[Country] D;

-- migrate country
INSERT INTO [Reference].[Country] (CountryName, CountryISO2, CountryISO3, SalesRegionId, ReviewRow)
SELECT
    COALESCE(D.CountryName, '(Unknown)') AS CountryName, -- coalesce is needed here for null values (can cause insertion failures bc udts enforce not null which values from the messy data violate)
    COALESCE(RTRIM(D.CountryISO2), 'UN') AS CountryISO2, -- rtrim here is to remove trailing spaces (could cause problems bc of udt)
    COALESCE(RTRIM(D.CountryISO3), 'UNK') AS CountryISO3,
    COALESCE(SR.SalesRegionID, (SELECT TOP 1 SalesRegionID FROM [Reference].[SalesRegion] WHERE SalesRegionName = '(Unknown)')) AS SalesRegionID,
    CASE
        WHEN D.CountryName IS NULL OR D.CountryISO2 IS NULL OR D.CountryISO3 IS NULL OR D.SalesRegion IS NULL THEN 1
        ELSE 0
    END AS ReviewRow
FROM [Data].[Country] AS D
LEFT JOIN [Reference].[SalesRegion] AS SR ON RTRIM(D.SalesRegion) = SR.SalesRegionName;

-- create foreign keys
ALTER TABLE [Reference].[Country] WITH CHECK
ADD CONSTRAINT [FK_Country_SalesRegion] FOREIGN KEY([SalesRegionId])
REFERENCES [Reference].[SalesRegion] ([SalesRegionID])




## Human Resources Schema

In [None]:
-- human resources

CREATE SCHEMA [HumanResources]

-- create hr udts 
CREATE TYPE [Udt].[DepartmentID] FROM [int] NULL

CREATE TYPE [Udt].[ManagerID] FROM [int] NULL


-- create deppartment and staff tables
CREATE TABLE [HumanResources].[Department](
	[DepartmentID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[Name] [Udt].[Name] NOT NULL,
    CONSTRAINT [PK_HR_Department] PRIMARY KEY CLUSTERED ([DepartmentID] ASC),
    CONSTRAINT [AK_HR_Name] UNIQUE NONCLUSTERED ([Name])
    WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

CREATE TABLE [HumanResources].[Staff](
	[StaffID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[StaffName] [Udt].[Name] NOT NULL,
	[ManagerID] [Udt].[ManagerID] NULL,
	[DepartmentID] [Udt].[DepartmentID] NULL,
    CONSTRAINT [PK_HR_Staff] PRIMARY KEY CLUSTERED ([StaffID] ASC),
    CONSTRAINT [AK_HR_Staff_Name] UNIQUE NONCLUSTERED ([StaffName])
    WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

-- migrate data

-- department data first as staff uses it
INSERT INTO [HumanResources].[Department] (Name)
SELECT DISTINCT
    Department            
FROM [Reference].[Staff]
WHERE Department IS NOT NULL

INSERT INTO [HumanResources].[Staff] (StaffName, ManagerID, DepartmentID)
SELECT             
    s.StaffName,     
    s.ManagerID,
    d.DepartmentID
FROM [Reference].[Staff] as s
LEFT JOIN [HumanResources].[Department] as d
ON d.Name = s.Department

-- create foreign keys 
ALTER TABLE [HumanResources].[Staff] WITH CHECK
ADD CONSTRAINT [FK_Staff_Department] FOREIGN KEY([DepartmentID])
REFERENCES [HumanResources].[Department] ([DepartmentID])

ALTER TABLE [HumanResources].[Staff] WITH CHECK
ADD CONSTRAINT [FK_Staff_Manager] FOREIGN KEY ([ManagerID])
REFERENCES [HumanResources].[Staff] ([StaffID])


## Inventory Schema

In [None]:
-- inventory schema

CREATE SCHEMA [Inventory]

-- create inventory udts

CREATE TYPE [Udt].[UniqueIdentifierCode] FROM [nvarchar](50) NOT NULL
CREATE DEFAULT [DF_Udt_UniqueIdentifierCode] AS N'00000000-0000-0000-0000-000000000000';

CREATE TYPE [Udt].[ModelName] FROM [nvarchar](150) NOT NULL
CREATE DEFAULT [DF_Udt_ModelName] AS N'(N/A)';


CREATE TYPE [Udt].[YearValue] FROM [char](4) NOT NULL
CREATE DEFAULT [DF_Udt_YearValue] AS N'9999';
CREATE RULE [CK_Udt_YearValue_Format] AS @value LIKE '[0-9][0-9][0-9][0-9]'

CREATE TYPE [Udt].[CurrencyAmount] FROM [decimal](19,4) NOT NULL
CREATE TYPE [Udt].[Comment] FROM [nvarchar](100) NOT NULL

CREATE TYPE [Udt].[ColorName] FROM [nvarchar](50) NOT NULL
CREATE DEFAULT [DF_Udt_ColorName] AS N'(Unknown)';

-- create make model and stock tables
CREATE TABLE [Inventory].[Make](
    [MakeID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
    [MakeName] [Udt].[Name] NOT NULL,
    [CountryID] [Udt].[SurrogateKeyInt] NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Inventory_Make] PRIMARY KEY CLUSTERED ([MakeID] ASC),
    CONSTRAINT [AK_Inventory_Make_Name] UNIQUE NONCLUSTERED ([MakeName])
) ON [PRIMARY]

CREATE TABLE [Inventory].[Model](
    [ModelID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,        
    [MakeID] [Udt].[SurrogateKeyInt] NOT NULL,
    [ModelName] [Udt].[ModelName] NOT NULL,
    [ModelVariant] [Udt].[ModelName] NOT NULL,
    [YearFirstProduced] [Udt].[YearValue] NOT NULL,
    [YearLastProduced] [Udt].[YearValue] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Inventory_Model] PRIMARY KEY CLUSTERED ([ModelID] ASC),
) ON [PRIMARY]

CREATE TABLE [Inventory].[Stock](
    [StockID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
    [StockCode] [Udt].[UniqueIdentifierCode] NOT NULL,
    [ModelID] [Udt].[SurrogateKeyInt] NULL,
    [Cost] [Udt].[CurrencyAmount] NOT NULL,
    [RepairsCost] [Udt].[CurrencyAmount] NOT NULL,
    [PartsCost] [Udt].[CurrencyAmount] NOT NULL,
    [TransportInCost] [Udt].[CurrencyAmount] NOT NULL,
    [IsRHD] [Udt].[BitVal] NOT NULL,
    [Color] [Udt].[ColorName] NOT NULL,
    [BuyerComments] [Udt].[Comment] NOT NULL,
    [DateBought] [Udt].[TransactionDate] NOT NULL,
    [TimeBought] [Udt].[TransactionTime] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Inventory_Stock] PRIMARY KEY CLUSTERED ([StockID] ASC),
    CONSTRAINT [AK_Inventory_Stock_StockCode] UNIQUE NONCLUSTERED ([StockCode]),
    CONSTRAINT [FK_Stock_Model] FOREIGN KEY([ModelID])
    REFERENCES [Inventory].[Model] ([ModelID])
) ON [PRIMARY]

-- data migration

-- make data
INSERT INTO [Inventory].[Make] ([MakeName], [CountryID], [ReviewRow])
SELECT 
    D.MakeName,
    COALESCE(C.CountryId, (SELECT CountryId FROM [Reference].[Country] WHERE CountryISO3 = 'UNK')),
    CASE WHEN C.CountryId IS NULL THEN 1 ELSE 0 END
FROM [Data].[Make] AS D
LEFT JOIN [Reference].[Country] AS C ON RTRIM(D.MakeCountry) = C.CountryISO3;

-- model data (just insert rows and generate pk but keep the old model id)
INSERT INTO [Inventory].[Model] ([MakeID], [ModelName], [ModelVariant], [YearFirstProduced], [YearLastProduced], [ReviewRow])
SELECT
    COALESCE(M.MakeID, 0),
    COALESCE(D.ModelName, '(N/A)'),
    COALESCE(D.ModelVariant, '(N/A)'),
    COALESCE(D.YearFirstProduced, '9999'),
    COALESCE(D.YearLastProduced, '9999'),
    CASE WHEN D.MakeID IS NULL OR D.ModelName IS NULL OR D.ModelVariant IS NULL THEN 1 ELSE 0 END
FROM [Data].[Model] AS D
LEFT JOIN [Inventory].[Make] AS M ON D.MakeID = M.MakeID; 


-- stock data (use the bridge column to map the fk)
INSERT INTO [Inventory].[Stock] ([StockCode], [ModelID], [Cost], [RepairsCost], [PartsCost], [TransportInCost], [IsRHD], [Color], [BuyerComments], [DateBought], [TimeBought], [ReviewRow])
SELECT
    COALESCE(D.StockCode, 'placeholder'),
    IM.ModelID AS ModelID, 
    COALESCE(D.Cost, 0.00),
    COALESCE(D.RepairsCost, 0.00),
    COALESCE(D.PartsCost, 0.00),
    COALESCE(D.TransportInCost, 0.00),
    COALESCE(D.IsRHD, 0),
    COALESCE(D.Color, '(Unknown)'),
    COALESCE(D.BuyerComments, '(None)'),
    COALESCE(D.DateBought, '1900-01-01 00:00:00.000'),
    COALESCE(D.TimeBought, '00:00:00.0000000'),
    CASE WHEN D.StockCode IS NULL OR D.ModelID IS NULL OR D.Cost IS NULL OR D.RepairsCost IS NULL OR D.PartsCost IS NULL OR D.TransportInCost IS NULL OR D.IsRHD IS NULL OR D.Color IS NULL OR D.BuyerComments IS NULL OR D.DateBought IS NULL OR D.TimeBought IS NULL THEN 1 ELSE 0 END AS ReviewRow
FROM [Data].[Stock] AS D
LEFT JOIN [Inventory].[Model] AS IM ON D.ModelID = IM.ModelID 

-- create foreign keys
ALTER TABLE [Inventory].[Make] WITH CHECK
ADD CONSTRAINT [FK_Make_Country] FOREIGN KEY([CountryID])
REFERENCES [Reference].[Country] ([CountryId])


ALTER TABLE [Inventory].[Model] WITH CHECK
ADD CONSTRAINT [FK_Model_Make] FOREIGN KEY([MakeID])
REFERENCES [Inventory].[Make] ([MakeID])


## Sales Schema

In [None]:
-- sales schema 

CREATE SCHEMA [Sales]


-- create sales udts
CREATE TYPE [Udt].[Threshold] FROM [int] NOT NULL
CREATE DEFAULT [DF_Udt_Threshold] AS 0;

CREATE TYPE [Udt].[InvoiceNumber] FROM [char](8) NOT NULL
CREATE DEFAULT [DF_Udt_InvoiceNumber] AS N'00000000';

CREATE TYPE [Udt].[LineItemNumber] FROM [tinyint] NOT NULL
CREATE DEFAULT [DF_Udt_LineItemNumber] AS 0;

CREATE TYPE [Udt].[SpendCapacity] FROM [nvarchar](25) NOT NULL
CREATE DEFAULT [DF_Udt_SpendCapacity] AS N'(Unknown)';

CREATE TYPE [Udt].[BudgetArea] FROM [nvarchar](25) NOT NULL
CREATE DEFAULT [DF_Udt_BudgetArea] AS N'(Unknown)';

CREATE TYPE [Udt].[MonthNumber] FROM [tinyint] NULL
CREATE DEFAULT [DF_Udt_MonthNumber] AS 0;


-- create customer, sales order and sales order detail tables (customer first though as sales relys on it)

CREATE TABLE [Sales].[Customer](
    [CustomerID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
    [CustomerName] [Udt].[Name] NOT NULL,
    [Address1] [Udt].[Name] NOT NULL,
    [Address2] [Udt].[Name] NOT NULL,
    [Town] [Udt].[Name] NOT NULL,
    [PostCode] [nvarchar](50) NOT NULL,
    [CountryID] [Udt].[SurrogateKeyInt] NOT NULL,
    [IsReseller] [Udt].[BitVal] NOT NULL,
    [IsCreditRisk] [Udt].[BitVal] NOT NULL,
    [OldCustomerID] [nvarchar](10) NOT NULL, -- Retained for SalesOrder link
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Sales_Customer] PRIMARY KEY CLUSTERED ([CustomerID] ASC),
) ON [PRIMARY]

-- migrate customer data
INSERT INTO [Sales].[Customer] ([CustomerName], [Address1], [Address2], [Town], [PostCode], [CountryID], [IsReseller], [IsCreditRisk], [OldCustomerID], [ReviewRow])
SELECT
    COALESCE(D.CustomerName, '(Unknown)') AS CustomerName,
    COALESCE(D.Address1, '(Unknown)') AS Address1,
    COALESCE(D.Address2, '(Unknown)') AS Address2,
    COALESCE(D.Town, '(Unknown)') AS Town,
    COALESCE(D.PostCode, '(Unknown)') AS PostCode,
    COALESCE(C.CountryId, (SELECT CountryId FROM [Reference].[Country] WHERE CountryISO2 = 'UN')) AS CountryID,
    COALESCE(D.IsReseller, 0) AS IsReseller,
    COALESCE(D.IsCreditRisk, 0) AS IsCreditRisk,
    COALESCE(D.CustomerID, N'0000') AS OldCustomerID,
    CASE
        WHEN D.CustomerName IS NULL OR D.Address1 IS NULL OR D.PostCode IS NULL OR D.Country IS NULL OR D.IsReseller IS NULL OR D.IsCreditRisk IS NULL OR D.CustomerID IS NULL
        THEN 1
        ELSE 0
    END AS ReviewRow
FROM [Data].[Customer] AS D
LEFT JOIN [Reference].[Country] AS C ON RTRIM(D.Country) = C.CountryISO2;


-- create sales table
CREATE TABLE [Sales].[SalesOrder](
    [SalesOrderID] [Udt].[SurrogateKeyInt] NOT NULL,
    [CustomerID] [Udt].[SurrogateKeyInt] NOT NULL,
    [InvoiceNumber] [Udt].[InvoiceNumber] NOT NULL,
    [SaleDate] [Udt].[TransactionDate] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Sales_SalesOrder] PRIMARY KEY CLUSTERED ([SalesOrderID] ASC),
    CONSTRAINT [AK_Sales_SalesOrder_Invoice] UNIQUE NONCLUSTERED ([InvoiceNumber])
) ON [PRIMARY]


-- sales order data
INSERT INTO [Sales].[SalesOrder] ([SalesOrderID], [CustomerID], [InvoiceNumber], [SaleDate], [ReviewRow])
SELECT
    COALESCE(D.SalesID, 0) AS SalesOrderID,
    COALESCE(S.CustomerID, (SELECT TOP 1 CustomerID FROM [Sales].[Customer] WHERE CustomerName = '(Unknown)')) AS CustomerID,
    COALESCE(D.InvoiceNumber, '00000000') AS InvoiceNumber,
    COALESCE(D.SaleDate, '1900-01-01 00:00:00.000') AS SaleDate,
    CASE 
        WHEN D.SalesID IS NULL OR D.CustomerID IS NULL OR D.InvoiceNumber IS NULL OR D.SaleDate IS NULL OR S.CustomerID IS NULL
        THEN 1
        ELSE 0
    END AS ReviewRow
FROM [Data].[Sales] AS D
LEFT JOIN [Sales].[Customer] AS S ON D.CustomerID = S.OldCustomerID;



-- create sales order detail
CREATE TABLE [Sales].[SalesOrderDetail](
    [SalesOrderDetailID] [Udt].[SurrogateKeyInt] NOT NULL, 
    [SalesOrderID] [Udt].[SurrogateKeyInt] NOT NULL,
    [LineItemNumber] [Udt].[LineItemNumber] NOT NULL,
    [StockID] [Udt].[SurrogateKeyInt] NOT NULL,
    [SalePrice] [Udt].[CurrencyAmount] NOT NULL,
    [LineItemDiscount] [Udt].[CurrencyAmount] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Sales_SalesOrderDetail] PRIMARY KEY CLUSTERED ([SalesOrderDetailID] ASC),
    CONSTRAINT [AK_Sales_SalesOrderDetail_LineItem] UNIQUE NONCLUSTERED ([SalesOrderID], [LineItemNumber])
) ON [PRIMARY]

-- migrate sales order detail Data (requires mapping code to id)
INSERT INTO [Sales].[SalesOrderDetail] ([SalesOrderDetailID], [SalesOrderID], [LineItemNumber], [StockID], [SalePrice], [LineItemDiscount], [ReviewRow])
SELECT
    COALESCE(D.SalesDetailsID, 0) AS SalesOrderDetailID,
    COALESCE(SO.SalesOrderID, 0) AS SalesOrderID,
    COALESCE(D.LineItemNumber, 0) AS LineItemNumber,
    COALESCE(S.StockID, 0) AS StockID, -- linking the pk
    COALESCE(D.SalePrice, 0.00) AS SalePrice,
    COALESCE(D.LineItemDiscount, 0.00) AS LineItemDiscount,
    CASE 
        WHEN D.SalesDetailsID IS NULL OR D.SalesID IS NULL OR D.LineItemNumber IS NULL OR D.StockID IS NULL OR D.SalePrice IS NULL OR D.LineItemDiscount IS NULL OR SO.SalesOrderID IS NULL OR S.StockID IS NULL
        THEN 1
        ELSE 0
    END AS ReviewRow
FROM [Data].[SalesDetails] AS D
LEFT JOIN [Sales].[SalesOrder] AS SO ON D.SalesID = SO.SalesOrderID
LEFT JOIN [Inventory].[Stock] AS S ON D.StockID = S.StockCode;


-- create and migrate other sales related tables

-- sales category
CREATE TABLE [Sales].[SalesCategory](
    [SalesCategoryID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
    [LowerThreshold] [Udt].[Threshold] NOT NULL,
    [UpperThreshold] [Udt].[Threshold] NOT NULL,
    [CategoryDescription] [Udt].[Name] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
    CONSTRAINT [PK_Sales_SalesCategory] PRIMARY KEY CLUSTERED ([SalesCategoryID] ASC)
) ON [PRIMARY]

INSERT INTO [Sales].[SalesCategory] ([LowerThreshold], [UpperThreshold], [CategoryDescription], [ReviewRow])
SELECT 
    COALESCE(LowerThreshold, 0), 
    COALESCE(UpperThreshold, 99999999), 
    COALESCE(CategoryDescription, '(Unknown)'),
    CASE 
        WHEN LowerThreshold IS NULL OR UpperThreshold IS NULL OR CategoryDescription IS NULL THEN 1
        ELSE 0
    END
FROM [Reference].[SalesCategory];


-- budget
	
CREATE TABLE [Sales].[Budget](
	[BudgetID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[BudgetValue] [Udt].[CurrencyAmount] NOT NULL,
	[BudgetYear] [Udt].[YearValue] NOT NULL,
	[BudgetMonth] [Udt].[MonthNumber] NULL,
	[BudgetDetail] [Udt].[Comment] NOT NULL,
	[BudgetElement] [Udt].[Name] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
 CONSTRAINT [PK_Budget] PRIMARY KEY CLUSTERED 
(
	[BudgetID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

SET IDENTITY_INSERT [Sales].[Budget] ON;

INSERT INTO [Sales].[Budget] (
    [BudgetID],
    [BudgetValue],
    [BudgetYear],
    [BudgetMonth],
    [BudgetDetail],
    [BudgetElement],
    [ReviewRow]
)
SELECT
    [BudgetKey],     
    [BudgetValue],
    [Year],
    [Month],
    [BudgetDetail],
    [BudgetElement],
    CASE 
        WHEN BudgetKey IS NULL OR BudgetValue IS NULL OR [Year] IS NULL OR [Month] IS NULL OR BudgetDetail IS NULL OR BudgetElement IS NULL THEN 1 
        ELSE 0
    END
FROM [Reference].[Budget]

SET IDENTITY_INSERT [Sales].[Budget] OFF;

-- sales budget
CREATE TABLE [Sales].[SalesBudget](
	[SalesBudgetID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[BudgetArea] [Udt].[BudgetArea] NOT NULL,
	[BudgetAmount] [Udt].[CurrencyAmount] NOT NULL,
	[BudgetYear] [Udt].[YearValue] NULL,
	[BudgetMonth] [Udt].[MonthNumber] NOT NULL,
	[DateUpdated] [Udt].[TransactionDate] NOT NULL,
	[Comments] [Udt].[Comment] NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
 CONSTRAINT [PK_SalesBudget] PRIMARY KEY CLUSTERED 
(
	[SalesBudgetID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

INSERT INTO [Sales].[SalesBudget] (
    [BudgetArea],
    [BudgetAmount],
    [BudgetYear],
    [BudgetMonth],
    [DateUpdated],
    [Comments],
    [ReviewRow]
)
SELECT
    [BudgetArea],     
    [BudgetAmount],
    [BudgetYear],
    [BudgetMonth],
    [DateUpdated],
    [Comments],
    CASE 
        WHEN BudgetArea IS NULL OR BudgetAmount IS NULL OR BudgetYear IS NULL OR BudgetMonth IS NULL OR DateUpdated IS NULL OR Comments IS NULL THEN 1
        ELSE 0
    END
FROM [Reference].[SalesBudgets]


ALTER TABLE [Sales].[SalesBudget]
ADD CONSTRAINT [CK_Budget_MonthRange]
CHECK ([BudgetMonth] BETWEEN 1 AND 12);

-- forex

CREATE TYPE [Udt].[ExchangeDate] FROM [date] NOT NULL
CREATE DEFAULT Def_ExchangeDate AS GETDATE();

CREATE TYPE [Udt].[ISOCurrencyCode] FROM [char](3) NULL
CREATE DEFAULT Def_ISOCurrencyCode AS 'USD';


CREATE TABLE [Sales].[Forex](
	[ForexID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[ExchangeDate] [Udt].[ExchangeDate] NOT NULL,
	[ISOCurrency] [Udt].[ISOCurrencyCode] NULL,
	[ExchangeRate] [Udt].[CurrencyAmount] NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
 CONSTRAINT [PK_Forex] PRIMARY KEY CLUSTERED 
(
	[ForexID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]


INSERT INTO [Sales].[Forex] (
    [ExchangeDate],
    [ISOCurrency],
    [ExchangeRate],
    [ReviewRow]
)
SELECT
    COALESCE(F.ExchangeDate, '1900-01-01') AS ExchangeDate,     
    COALESCE(F.ISOCurrency, 'UNK') AS ISOCurrency,
    COALESCE(F.ExchangeRate, 0.0000) AS ExchangeRate,
    CASE 
        WHEN F.ExchangeDate IS NULL OR F.ISOCurrency IS NULL OR F.ExchangeRate IS NULL THEN 1
        ELSE 0
    END AS ReviewRow
FROM [Reference].[Forex] AS F;

-----------------------------

CREATE TYPE [Udt].[SpendCapacity] FROM [nvarchar](15) NOT NULL
CREATE DEFAULT Def_SpendCapacity AS '(Unknown)';


CREATE TABLE [Sales].[MarketingInformation](
	[MarketingInformationID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[CustomerID] [Udt].[SurrogateKeyInt] NOT NULL,
	[SpendCapacity] [Udt].[SpendCapacity] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
 CONSTRAINT [PK_MarketingInformation] PRIMARY KEY CLUSTERED 
(
	[MarketingInformationID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

ALTER TABLE [Sales].[MarketingInformation]  WITH CHECK 
ADD CONSTRAINT [FK_MarketingInformation_Customer] FOREIGN KEY ([CustomerID])
REFERENCES [Sales].[Customer] ([CustomerID]);

INSERT INTO [Sales].[MarketingInformation] (
    [CustomerID],
    [SpendCapacity],
    [ReviewRow]
)
SELECT
    c.[CustomerID],     
    mi.[SpendCapacity],
    CASE 
        WHEN c.CustomerID IS NULL OR mi.SpendCapacity IS NULL THEN 1
        ELSE 0
    END AS ReviewRow
FROM [Reference].[MarketingInformation] as mi
JOIN Sales.Customer c
    ON c.CustomerName = mi.CUST

------------------------- 
CREATE TYPE [Udt].[MarketingTypeName] FROM [nvarchar](50) NOT NULL
CREATE DEFAULT Def_MarketingTypeName AS '(Unknown)';

CREATE TABLE [Sales].[MarketingType](
	[MarketingTypeID] [Udt].[SurrogateKeyInt] IDENTITY(1,1) NOT NULL,
	[MarketingTypeName] [Udt].[MarketingTypeName] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
 CONSTRAINT [PK_MarketingType] PRIMARY KEY CLUSTERED 
(
	[MarketingTypeID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

WITH [SplitTypes] AS (
    SELECT DISTINCT
        LTRIM(RTRIM(value)) AS MarketingTypeName
    FROM [Reference].[MarketingCategories] mc
    CROSS APPLY STRING_SPLIT(mc.MarketingType, ',')
)
INSERT INTO [Sales].[MarketingType] (MarketingTypeName, ReviewRow)
SELECT [MarketingTypeName],
CASE 
    WHEN MarketingTypeName IS NULL THEN 1
    ELSE 0
END AS ReviewRow
FROM [SplitTypes]

CREATE TABLE [Sales].[MakeMarketingType](
	[MakeID] [Udt].[SurrogateKeyInt] NOT NULL,
	[MarketingTypeID] [Udt].[SurrogateKeyInt] NOT NULL,
	[ReviewRow] [Udt].[BitVal] NOT NULL,
 CONSTRAINT [PK_MakeMarketingType] PRIMARY KEY CLUSTERED 
(
	[MakeID] ASC, [MarketingTypeID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]

INSERT INTO [Sales].[MakeMarketingType] (MakeID, MarketingTypeID, ReviewRow)
SELECT DISTINCT
    m.MakeID,
    mt.MarketingTypeID,
    CASE 
    	WHEN MarketingTypeName IS NULL THEN 1
    ELSE 0
END AS ReviewRow
FROM [Reference].[MarketingCategories] mc
JOIN [Inventory].[Make] m
    ON m.MakeName = mc.MakeName
CROSS APPLY STRING_SPLIT(mc.MarketingType, ',') s
JOIN [Sales].[MarketingType] mt
    ON mt.MarketingTypeName = LTRIM(RTRIM(s.value));

ALTER TABLE [Sales].[MakeMarketingType]  WITH CHECK 
ADD CONSTRAINT [FK_MakeMarketingType_Make] FOREIGN KEY ([MakeID])
REFERENCES [Inventory].[Make] ([MakeID])

ALTER TABLE [Sales].[MakeMarketingType]  WITH CHECK 
ADD CONSTRAINT [FK_MakeMarketingType_MarketingType] FOREIGN KEY ([MarketingTypeID])
REFERENCES [Sales].[MarketingType] ([MarketingTypeID]);


-- create foreign keys
ALTER TABLE [Sales].[Customer] WITH CHECK
ADD CONSTRAINT [FK_Customer_Country] FOREIGN KEY([CountryID])
REFERENCES [Reference].[Country] ([CountryId])

ALTER TABLE [Sales].[SalesOrder] WITH CHECK
ADD CONSTRAINT [FK_SalesOrder_Customer] FOREIGN KEY([CustomerID])
REFERENCES [Sales].[Customer] ([CustomerID])

ALTER TABLE [Sales].[SalesOrderDetail] WITH CHECK
ADD CONSTRAINT [FK_SalesOrderDetail_SalesOrder] FOREIGN KEY([SalesOrderID])
REFERENCES [Sales].[SalesOrder] ([SalesOrderID])

ALTER TABLE [Sales].[SalesOrderDetail] WITH CHECK
ADD CONSTRAINT [FK_SalesOrderDetail_Stock] FOREIGN KEY([StockID])
REFERENCES [Inventory].[Stock] ([StockID])




-- drop oldcustomer id

ALTER TABLE Sales.Customer
DROP COLUMN OldCustomerID;

DROP TABLE [Data].[Country];
DROP TABLE [Data].[Customer];
DROP TABLE [Data].[Make];
DROP TABLE [Data].[Model];
DROP TABLE [Data].[Sales];
DROP TABLE [Data].[PivotTable];
DROP TABLE [Data].[SalesDetails];
DROP TABLE [Data].[Stock];
DROP TABLE [Data].[SalesRegion];
DROP VIEW Data.SalesByCountry;
DROP SCHEMA [Data];


DROP TABLE [DataTransfer].[Sales2015];
DROP TABLE [DataTransfer].[Sales2016];
DROP TABLE [DataTransfer].[Sales2017];
DROP TABLE [DataTransfer].[Sales2018];
DROP SCHEMA [DataTransfer];

DROP TABLE [Output].[StockPrices];
DROP SCHEMA [Output];

DROP TABLE [Reference].[Budget];
DROP TABLE [Reference].[Forex];
DROP TABLE [Reference].[MarketingCategories];
DROP TABLE [Reference].[MarketingInformation];
DROP TABLE [Reference].[SalesBudgets];
DROP TABLE [Reference].[SalesCategory];
DROP TABLE [Reference].[Staff];
DROP TABLE [Reference].[StaffHierarchy];
DROP TABLE [Reference].[YearlySales];

DROP TABLE [SourceData].[SalesInPounds];
DROP TABLE [SourceData].[SalesText];
DROP SCHEMA [SourceData];

## Inline Functions 

In [None]:
-- Data.PivotTable

CREATE FUNCTION [Sales].[fn_SalesByColorByYear] ()
RETURNS TABLE
AS
RETURN
(
    WITH SalesByColorYear AS
    (
        SELECT
            S.Color,
            YEAR(SO.SaleDate) AS SaleYear,
            SUM(SOD.SalePrice - ISNULL(SOD.LineItemDiscount, 0.00)) AS TotalSaleAmount
        FROM [Inventory].[Stock] AS S
        INNER JOIN [Sales].[SalesOrderDetail] AS SOD ON S.StockID = SOD.StockID
        INNER JOIN [Sales].[SalesOrder] AS SO ON SOD.SalesOrderID = SO.SalesOrderID
        GROUP BY S.Color, YEAR(SO.SaleDate)
    )
    SELECT
        p.Color,
        ISNULL([2015], 0.00) AS [2015],
        ISNULL([2016], 0.00) AS [2016],
        ISNULL([2017], 0.00) AS [2017],
        ISNULL([2018], 0.00) AS [2018]
    FROM
        SalesByColorYear
    PIVOT
    (
        SUM(TotalSaleAmount)
        FOR SaleYear IN ([2015], [2016], [2017], [2018])
    ) AS p
)

SELECT * FROM Sales.fn_SalesByColorByYear()



---------------------------------------------------------------------
-- Output.StockPrices


CREATE FUNCTION [Inventory].[fn_GetStockPrices] ()
RETURNS TABLE
AS
RETURN
(
    SELECT DISTINCT
        M.MakeName,
        MD.ModelName,
        S.Cost
    FROM [Inventory].[Stock] AS S
    INNER JOIN [Inventory].[Model] AS MD ON S.ModelID = MD.ModelID
    INNER JOIN [Inventory].[Make] AS M ON MD.MakeID = M.MakeID
    WHERE S.Cost IS NOT NULL
)

SELECT * FROM Inventory.fn_GetStockPrices()
---------------------------------------------------------------------
-- Reference.StaffHierarchy

CREATE FUNCTION [HumanResources].[fn_StaffHierarchy] ()
RETURNS TABLE
AS
RETURN
(
    WITH StaffHierarchyCTE AS
    (
        SELECT
            S.StaffID,
            S.StaffName,
            S.ManagerID,
            S.DepartmentID,
            CAST(S.StaffName AS NVARCHAR(MAX)) AS HierarchyPath
        FROM [HumanResources].[Staff] AS S
        WHERE S.ManagerID IS NULL

        UNION ALL
        
        SELECT
            S.StaffID,
            S.StaffName,
            S.ManagerID,
            S.DepartmentID,
            CAST(C.HierarchyPath + ' -> ' + S.StaffName AS NVARCHAR(MAX)) AS HierarchyPath
        FROM [HumanResources].[Staff] AS S
        INNER JOIN StaffHierarchyCTE AS C ON S.ManagerID = C.StaffID
    )
    SELECT
        CTE.StaffID,
        CTE.StaffName,
        CTE.ManagerID,
        COALESCE(D.Name, N'(No Department)') AS DepartmentName,
        CTE.HierarchyPath
    FROM StaffHierarchyCTE AS CTE
    LEFT JOIN [HumanResources].[Department] AS D ON CTE.DepartmentID = D.DepartmentID
)

SELECT * FROM HumanResources.fn_StaffHierarchy()

---------------------------------------------------------------------
-- Reference.YearlySales

CREATE FUNCTION [Sales].[fn_GetSalesByYear]
(
    @SaleYear CHAR(4)
)
RETURNS TABLE
AS
RETURN
(
    SELECT
        M.MakeName,
        MD.ModelName,
        CUS.CustomerName,
        CR.CountryName,
        S.Cost,
        S.RepairsCost,
        S.PartsCost,
        S.TransportInCost,
        SOD.SalePrice,
        SO.SaleDate
    FROM [Sales].[SalesOrderDetail] AS SOD
    INNER JOIN [Sales].[SalesOrder] AS SO ON SOD.SalesOrderID = SO.SalesOrderID
    INNER JOIN [Inventory].[Stock] AS S ON SOD.StockID = S.StockID
    INNER JOIN [Inventory].[Model] AS MD ON S.ModelID = MD.ModelID
    INNER JOIN [Inventory].[Make] AS M ON MD.MakeID = M.MakeID
    INNER JOIN [Sales].[Customer] AS CUS ON SO.CustomerID = CUS.CustomerID
    INNER JOIN [Reference].[Country] AS CR ON CUS.CountryID = CR.CountryId
    WHERE YEAR(SO.SaleDate) = CAST(@SaleYear AS INT)
)

SELECT * FROM Sales.fn_GetSalesByYear('2017')

---------------------------------------------------------------------
-- SourceData.SalesInPounds

CREATE FUNCTION [Inventory].[fn_SalesInPounds] ()
RETURNS TABLE
AS
RETURN
(
    SELECT DISTINCT
        M.MakeName,
        MD.ModelName,
        CONCAT(N'Â£', CONVERT(VARCHAR(50), CAST(S.Cost AS DECIMAL(18, 2)))) AS VehicleCost
    FROM [Inventory].[Stock] AS S
    INNER JOIN [Inventory].[Model] AS MD ON S.ModelID = MD.ModelID
    INNER JOIN [Inventory].[Make] AS M ON MD.MakeID = M.MakeID
    WHERE S.Cost IS NOT NULL
)

SELECT * FROM Inventory.fn_SalesInPounds()