# Data Warehouse Reporting

***

## Project Description

In this assignment you will:

    Load data into Data Warehouse
    Write aggregation queries
    Create MQTs


## Import Libraries

In [1]:
import numpy as np
from numpy import count_nonzero, median, mean
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random


import datetime
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns',None)
#pd.set_option('display.max_rows',None)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format','{:.2f}'.format)

random.seed(0)
np.random.seed(0)
np.set_printoptions(suppress=True)

### Task 1 - Load data into the dimension table DimDate

In [2]:
dimdate = pd.read_csv("DimDate.csv", parse_dates=["date"])

In [3]:
dimdate.head()

Unnamed: 0,dateid,date,Year,Quarter,QuarterName,Month,Monthname,Day,Weekday,WeekdayName
0,1,2019-01-01,2019,1,Q1,1,January,1,3,Tuesday
1,2,2019-01-02,2019,1,Q1,1,January,2,4,Wednesday
2,3,2019-01-03,2019,1,Q1,1,January,3,5,Thursday
3,4,2019-01-04,2019,1,Q1,1,January,4,6,Friday
4,5,2019-01-05,2019,1,Q1,1,January,5,7,Saturday


In [4]:
dimdate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1096 entries, 0 to 1095
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   dateid       1096 non-null   int64         
 1   date         1096 non-null   datetime64[ns]
 2   Year         1096 non-null   int64         
 3   Quarter      1096 non-null   int64         
 4   QuarterName  1096 non-null   object        
 5   Month        1096 non-null   int64         
 6   Monthname    1096 non-null   object        
 7   Day          1096 non-null   int64         
 8   Weekday      1096 non-null   int64         
 9   WeekdayName  1096 non-null   object        
dtypes: datetime64[ns](1), int64(6), object(3)
memory usage: 85.8+ KB


### Task 2 - Load data into the dimension table DimCategory

In [5]:
dimcat = pd.read_csv("DimCategory.csv")

In [6]:
dimcat.head()

Unnamed: 0,categoryid,category
0,1,Electronics
1,2,Books
2,3,Toys
3,4,Sports
4,5,Software


### Task 3 - Load data into the dimension table DimCountry

In [7]:
dimcountry = pd.read_csv("DimCountry.csv")

In [8]:
dimcountry.head()

Unnamed: 0,countryid,country
0,1,Argentina
1,2,Australia
2,3,Austria
3,4,Azerbaijan
4,5,Belgium


### Task 4 - Load data into the fact table FactSales

In [9]:
factsales = pd.read_csv("FactSales.csv")

In [10]:
factsales.head()

Unnamed: 0,dateid,categoryid,countryid,totalsales
0,1,4,44,47.84
1,1,2,30,33.22
2,1,3,28,51.51
3,1,3,16,50.48
4,1,3,34,51.3


### Task 5 - Create a grouping sets query

In [11]:
df = pd.merge(factsales,dimcountry,on='countryid')

In [12]:
df

Unnamed: 0,dateid,categoryid,countryid,totalsales,country
0,1,4,44,47.84,Sweden
1,1,1,44,42.50,Sweden
2,6,1,44,33.32,Sweden
3,7,4,44,45.61,Sweden
4,8,4,44,41.19,Sweden
...,...,...,...,...,...
4995,1026,1,6,62.47,Brazil
4996,1037,4,6,58.15,Brazil
4997,1041,3,6,52.81,Brazil
4998,1050,2,6,33.57,Brazil


In [13]:
df2 = pd.merge(df,dimcat,on='categoryid')

In [14]:
df2

Unnamed: 0,dateid,categoryid,countryid,totalsales,country,category
0,1,4,44,47.84,Sweden,Sports
1,7,4,44,45.61,Sweden,Sports
2,8,4,44,41.19,Sweden,Sports
3,84,4,44,38.34,Sweden,Sports
4,221,4,44,38.56,Sweden,Sports
...,...,...,...,...,...,...
4995,801,2,6,57.79,Brazil,Books
4996,960,2,6,70.36,Brazil,Books
4997,980,2,6,39.53,Brazil,Books
4998,1005,2,6,54.76,Brazil,Books


In [15]:
pd.DataFrame(df2.groupby(["country","category"])["totalsales"].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,totalsales
country,category,Unnamed: 2_level_1
Argentina,Books,1040.14
Argentina,Electronics,1256.45
Argentina,Sports,1235.77
Argentina,Toys,1414.56
Australia,Books,1097.23
...,...,...
Uruguay,Toys,640.17
Uzbekistan,Books,1181.51
Uzbekistan,Electronics,1302.73
Uzbekistan,Sports,620.87


### Task 6 - Create a rollup query

In [16]:
df3 = pd.merge(dimdate,df2,on='dateid')

In [17]:
df3

Unnamed: 0,dateid,date,Year,Quarter,QuarterName,Month,Monthname,Day,Weekday,WeekdayName,categoryid,countryid,totalsales,country,category
0,1,2019-01-01,2019,1,Q1,1,January,1,3,Tuesday,4,44,47.84,Sweden,Sports
1,1,2019-01-01,2019,1,Q1,1,January,1,3,Tuesday,1,44,42.50,Sweden,Electronics
2,1,2019-01-01,2019,1,Q1,1,January,1,3,Tuesday,3,28,51.51,Netherlands,Toys
3,1,2019-01-01,2019,1,Q1,1,January,1,3,Tuesday,3,16,50.48,Germany,Toys
4,1,2019-01-01,2019,1,Q1,1,January,1,3,Tuesday,3,34,51.30,Poland,Toys
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,1095,2021-12-30,2021,4,Q4,12,December,30,5,Thursday,4,48,46.51,Thailand,Sports
4996,1095,2021-12-30,2021,4,Q4,12,December,30,5,Thursday,4,50,47.47,Ukraine,Sports
4997,1095,2021-12-30,2021,4,Q4,12,December,30,5,Thursday,4,46,39.53,Taiwan,Sports
4998,1095,2021-12-30,2021,4,Q4,12,December,30,5,Thursday,1,24,45.73,Japan,Electronics


In [18]:
pd.DataFrame(df3.groupby(["Year","country"])["totalsales"].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,totalsales
Year,country,Unnamed: 2_level_1
2019,Argentina,1787.65
2019,Australia,1346.63
2019,Austria,1206.38
2019,Azerbaijan,1768.30
2019,Belgium,1610.13
...,...,...
2021,United Arab Emirates,1711.34
2021,United Kingdom,1487.01
2021,United States,1382.54
2021,Uruguay,1096.02


### Task 7 - Create a cube query

In [19]:
pd.DataFrame(df3.groupby(["Year","country"])["totalsales"].mean())

Unnamed: 0_level_0,Unnamed: 1_level_0,totalsales
Year,country,Unnamed: 2_level_1
2019,Argentina,51.08
2019,Australia,49.88
2019,Austria,52.45
2019,Azerbaijan,47.79
2019,Belgium,50.32
...,...,...
2021,United Arab Emirates,50.33
2021,United Kingdom,51.28
2021,United States,51.21
2021,Uruguay,49.82


### Task 8 - Create an MQT

In [20]:
pd.DataFrame(df3.groupby(["country"])["totalsales"].sum()).head(10)

Unnamed: 0_level_0,totalsales
country,Unnamed: 1_level_1
Argentina,4946.92
Australia,4255.85
Austria,4217.06
Azerbaijan,5404.97
Belgium,4688.93
Brazil,4032.77
Bulgaria,4369.05
Canada,4533.07
Cyprus,4790.88
Czech Republic,4405.4


#### Python code done by Dennis Lam