# Week 6: SQL and NoSQL

## Exercise 4.1 (simple query)
**The customer with customerID ALFKI has made a number of orders containing some products. Query for, and return, all orders made by ALFKI and the products they contain. Return at least the product id and the name of the products.**


### Expected outcome

#10643 with Rössle Sauerkraut, Spegesild and Chartreuse verte
#10692 with Vegie-spread
#10702 with Aniseed Syrup and Lakkalikööri
#10835 with Raclette Courdavault and Original Frankfurter grüne Soße
#10952 with Grandma's Boysenberry Spread and Rössle Sauerkraut
#11011 with Escargots de Bourgogne and Flotemysost


### Mongo db

In [1]:
from pymongo import MongoClient
# pprint library is used to make the output look more pretty
from pprint import pprint

mongodb_url = "mongodb://localhost:27017"

# connect to MongoDB
client = MongoClient(mongodb_url)
db=client.Northwind
# Issue the serverStatus command and print the results
serverStatusResult=db.command("serverStatus")
#pprint(serverStatusResult)

In [2]:
# Get the collections from the Northwind database
collection = db.Northwind
# print out all of the collections
pprint(db.collection_names(include_system_collections=False))

[u'suppliers',
 u'territories',
 u'regions',
 u'northwind',
 u'categories',
 u'products',
 u'employee-territories',
 u'orders',
 u'customers',
 u'shippers',
 u'order-details']


In [76]:
orders = db["order-details"].aggregate([
        {
            "$lookup":
                {
                    "from" : "orders",
                    "localField" : "OrderID",
                    "foreignField" : "OrderID",
                    "as" : "order"
                }
        },
        { "$match" : { "order.CustomerID":"ALFKI" } },
        {
            "$lookup":
                {
                    "from" : "products",
                    "localField" : "ProductID",
                    "foreignField" : "ProductID",
                    "as" : "productInfo"
                }
        },
        { "$project" : {"_id":0 ,"OrderID": 1, "ProductID": 1, "order":{"CustomerID":1}, "productInfo":{"ProductName":1}}},
        { "$unwind" : "$order"},
        { "$unwind" : "$productInfo" },
        { "$project" : {"_id":0 ,"OrderID": 1, "ProductID": 1, "CustomerID":"$order.CustomerID", "ProductName":"$productInfo.ProductName"} }
    ])

    
print "Results for exercise 4.2 MongoDB"
print
print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
    OrderID = "Order ID",ProductID = "Product ID", ProductName = "Product Name")
print "----------------------------------------"
for order in orders:
    print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
        OrderID = order['OrderID'],ProductID = order['ProductID'], ProductName = order['ProductName'].encode('utf-8'))

print
print "Total number of lines: {}".format(len(data))

Results for exercise 4.2 MongoDB

Order ID   Product ID   Product Name  
----------------------------------------
10643      28           Rössle Sauerkraut
10643      46           Spegesild     
10643      39           Chartreuse verte
10692      63           Vegie-spread  
10702      76           Lakkalikööri
10702      3            Aniseed Syrup 
10835      59           Raclette Courdavault
10835      77           Original Frankfurter grüne Soße
10952      6            Grandma's Boysenberry Spread
10952      28           Rössle Sauerkraut
11011      71           Flotemysost   
11011      58           Escargots de Bourgogne

Total number of lines: 12


### Sql lite db

In [75]:
# start with the sql query
import sqlite3

con = sqlite3.connect('/Users/GretarAtli/Documents/GitHub/Dtu/Dtu-ToolsForBigData/week_six/northwind.db')
con.text_factory = str

with con:
    
    cur = con.cursor()
   
    cur.execute(""" 
        SELECT ode.OrderID, o.CustomerID, ode.ProductID, p.ProductName
        FROM "Order Details" ode
        INNER JOIN  orders o on ode.OrderID = o.OrderID
        INNER JOIN Products p on ode.ProductID = p.ProductID
        WHERE CustomerId = "ALFKI" 
        """)
    
    data = cur.fetchall()

print "Results for exercise 4.1 Sql lite"
print
print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
    OrderID = "Order ID",ProductID = "Product ID", ProductName = "Product Name")
print "----------------------------------------"
for d in data:
    print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
        OrderID = d[0],ProductID = d[2], ProductName = d[3])

print
print "Total number of lines: {}".format(len(data))


Results for exercise 4.1 Sql lite

Order ID   Product ID   Product Name  
----------------------------------------
10643      28           R�ssle Sauerkraut
10643      39           Chartreuse verte
10643      46           Spegesild     
10692      63           Vegie-spread  
10702      3            Aniseed Syrup 
10702      76           Lakkalik��ri  
10835      59           Raclette Courdavault
10835      77           Original Frankfurter gr�ne So�e
10952      6            Grandma's Boysenberry Spread
10952      28           R�ssle Sauerkraut
11011      58           Escargots de Bourgogne
11011      71           Flotemysost   

Total number of lines: 12


## Exercise 5.2 (harder query)
**Get all orders (with products) made by ALFKI that contain at least 2 different product types.**

### Expected outcome

#10643 with Rössle Sauerkraut, Spegesild and Chartreuse verte
#10702 with Aniseed Syrup and Lakkalikööri
#10835 with Raclette Courdavault and Original Frankfurter grüne Soße
#10952 with Grandma's Boysenberry Spread and Rössle Sauerkraut
#11011 with Escargots de Bourgogne and Flotemysost


### Sql lite db

In [77]:
# start with the sql query
import sqlite3

con = sqlite3.connect('/Users/GretarAtli/Documents/GitHub/Dtu/Dtu-ToolsForBigData/week_six/northwind.db')
con.text_factory = str

with con:
    
    cur = con.cursor()
   
    cur.execute(""" 
        SELECT ode.OrderID, o.CustomerID, ode.ProductID, p.ProductName
        FROM "Order Details" ode
        INNER JOIN
            (
            SELECT  COUNT(p.ProductID) ,ode.OrderID, o.CustomerID
            From "Order Details" ode
            INNER JOIN  orders o on ode.OrderID = o.OrderID
            INNER JOIN Products p on ode.ProductID = p.ProductID
            WHERE CustomerId = "ALFKI"
            GROUP BY ode.OrderID
            HAVING COUNT(p.ProductID) > 1) filter
            ON ode.OrderID =filter.OrderID
        INNER JOIN Orders o on o.OrderID = ode.OrderID
        INNER JOIN Products p on ode.ProductID = p.ProductID
        """)
    
    data = cur.fetchall()
    
print "Results for exercise 4.2 Sql lite"
print
print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
    OrderID = "Order ID",ProductID = "Product ID", ProductName = "Product Name")
print "----------------------------------------"
for d in data:
    print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
        OrderID = d[0],ProductID = d[2], ProductName = d[3])

print
print "Total number of lines: {}".format(len(data))


Results for exercise 4.2 Sql lite

Order ID   Product ID   Product Name  
----------------------------------------
10643      28           R�ssle Sauerkraut
10643      39           Chartreuse verte
10643      46           Spegesild     
10702      3            Aniseed Syrup 
10702      76           Lakkalik��ri  
10835      59           Raclette Courdavault
10835      77           Original Frankfurter gr�ne So�e
10952      6            Grandma's Boysenberry Spread
10952      28           R�ssle Sauerkraut
11011      58           Escargots de Bourgogne
11011      71           Flotemysost   

Total number of lines: 11


### Mongo db

In [6]:
from pymongo import MongoClient
# pprint library is used to make the output look more pretty
from pprint import pprint

mongodb_url = "mongodb://localhost:27017"

# connect to MongoDB
client = MongoClient(mongodb_url)
db=client.Northwind
# Issue the serverStatus command and print the results
serverStatusResult=db.command("serverStatus")
#pprint(serverStatusResult)

In [73]:
orders = db["order-details"].aggregate([
        {
            "$lookup":
                {
                    "from" : "orders",
                    "localField" : "OrderID",
                    "foreignField" : "OrderID",
                    "as" : "order"
                }
        },
        { "$match" : { "order.CustomerID":"ALFKI" } },
        { "$group" : {"_id":"$OrderID", "OrderID_counter":{"$sum":1}} },
        {
            "$match": {
                "OrderID_counter": {
                    "$gt": 1
                }
            }
        },
        {
            "$lookup":
                {
                    "from" : "order-details",
                    "localField" : "_id",
                    "foreignField" : "OrderID",
                    "as" : "order"
                }
        },
        { "$unwind" : "$order" },
        { "$project" : {"OrderId":"$_id", "ProductID":"$order.ProductID", "_id":0} },
        {
            "$lookup":
                {
                    "from" : "products",
                    "localField" : "ProductID",
                    "foreignField" : "ProductID",
                    "as" : "productinfo"
                }
        },
        { "$unwind" : "$productinfo" },
        { "$project" : {"OrderId":1, "ProductID":1, "ProductName":"$productinfo.ProductName"} },
    ])

print "Results for exercise 4.2 MongoDB"
print
print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
    OrderID = "Order ID",ProductID = "Product ID", ProductName = "Product Name")
print "----------------------------------------"
for order in orders:
    print "{OrderID: <10} {ProductID: <12} {ProductName: <14}".format(
        OrderID = order['OrderId'],ProductID = order['ProductID'], ProductName = order['ProductName'].encode('utf-8'))

print
print "Total number of lines: {}".format(len(data))

Results for exercise 4.2 MongoDB

Order ID   Product ID   Product Name  
----------------------------------------
11011      71           Flotemysost   
11011      58           Escargots de Bourgogne
10835      59           Raclette Courdavault
10835      77           Original Frankfurter grüne Soße
10952      6            Grandma's Boysenberry Spread
10952      28           Rössle Sauerkraut
10702      76           Lakkalikööri
10702      3            Aniseed Syrup 
10643      28           Rössle Sauerkraut
10643      46           Spegesild     
10643      39           Chartreuse verte

Total number of lines: 10


## Make your own Exercises

Find the 10 customers that ordered most orders and find the total quantity each of them bought

### Mongodb

In [None]:
from pymongo import MongoClient
# pprint library is used to make the output look more pretty
from pprint import pprint

mongodb_url = "mongodb://localhost:27017"

# connect to MongoDB
client = MongoClient(mongodb_url)
db=client.Northwind
# Issue the serverStatus command and print the results
serverStatusResult=db.command("serverStatus")
#pprint(serverStatusResult)

In [84]:
top_ten_most_orders = db['order-details'].aggregate([
    
    {
    # find the data trough two documents we need, similar to join in sql
    "$lookup":
        {
          "from": "orders",
          "localField": "OrderID",
          "foreignField": "OrderID",
          "as": "orders_info"
        }
    },
    
    { "$group" : {"_id":"$orders_info.CustomerID", "OrderID_counter":{"$sum":1}, "Total_Quantity": {"$sum":"$Quantity"}}},  
    { "$sort": { "OrderID_counter": -1} },
    { "$limit": 10 }
])


print "Results for exercise 4.3 (Make your own) MongoDB"
print
print "{CustomerID: <12} {TotalOrders: <14} {TotalQty: <14}".format(
    CustomerID = "Customer Id",TotalOrders = "Total orders", TotalQty = "Total quantity")
print "------------------------------------------"
for order in top_ten_most_orders:
    print "{CustomerID: <12} {TotalOrders: <14} {TotalQty: <14}".format(
        CustomerID = order["_id"][0], TotalOrders = order["OrderID_counter"], TotalQty = order["Total_Quantity"])

print
print "Total number of lines: {}".format(len(data))


Results for exercise 4.3 (Make your own) MongoDB

Customer Id  Total orders   Total quantity
------------------------------------------
SAVEA        116            4958          
ERNSH        102            4543          
QUICK        86             3961          
RATTC        71             1383          
HUNGO        55             1684          
BERGS        52             1001          
FRANK        48             1525          
HILAA        45             1096          
FOLKO        45             1234          
BONAP        44             980           

Total number of lines: 10


### Sql lite db

In [80]:
# start with the sql query, þetta er rett hit var misskilningur
import sqlite3

# get connection to the database
con = sqlite3.connect('/Users/GretarAtli/Documents/GitHub/Dtu/Dtu-ToolsForBigData/week_six/northwind.db')

#sql did not support utf-8
con.text_factory = str

# open the database
with con:
    
    # get the cursor object
    cur = con.cursor()
    
    # make a querey search
    cur.execute(""" 
    SELECT CustomerID, COUNT(Orders.OrderID), SUM(ODE.Quantity)
        FROM  Orders
        INNER JOIN "Order Details" Ode on Orders.OrderID = Ode.OrderID
        GROUP BY CustomerID
        ORDER BY COUNT(Orders.OrderID) desc
        LIMIT 10
    """)
    
    data = cur.fetchall()
    
print "Results for exercise 4.3 (Make your own) Sql lite"
print
print "{CustomerID: <12} {TotalOrders: <14} {TotalQty: <14}".format(
    CustomerID = "Customer Id",TotalOrders = "Total orders", TotalQty = "Total quantity")
print "------------------------------------------"
for d in data:
    print "{CustomerID: <12} {TotalOrders: <14} {TotalQty: <14}".format(
        CustomerID = d[0],TotalOrders = d[1], TotalQty = d[2])

print
print "Total number of lines: {}".format(len(data))

    
    # print the results
print

Results for exercise 4.3 (Make your own) Sql lite

Customer Id  Total orders   Total quantity
------------------------------------------
SAVEA        116            4958          
ERNSH        102            4543          
QUICK        86             3961          
RATTC        71             1383          
HUNGO        55             1684          
BERGS        52             1001          
FRANK        48             1525          
FOLKO        45             1234          
HILAA        45             1096          
BONAP        44             980           

Total number of lines: 10

