Install libraries

In [None]:
!pip install pandas==1.3.0 dnspython==2.2.1 pymongo==4.1.1

Import Libraries

In [2]:
import datetime
import pymongo
import pandas as pd

Connect to DB

In [3]:
mongo_db_client = pymongo.MongoClient("mongodb+srv://nosql:nosql@cluster0.v4pfc.mongodb.net/progetto_db_2022?retryWrites=true&w=majority")
db = mongo_db_client.progetto_db_2022

 ### 1. For a specific customer, determine the quantities of each product purchased in a given period

In [4]:
input_cf = 'SNTFLC93K92P982F'
input_start_date = datetime.datetime(2021, 1, 1)
input_end_date = datetime.datetime(2021, 12, 31)

query_result = db.order.aggregate(
    [
        {
            '$match': {
                'customer': input_cf,
                'date': { '$gte' : input_start_date, '$lte' : input_end_date}
            }
        },
        {'$unwind': '$order_composition'},
        {
         '$lookup':
           {
             'from': 'product',
             'localField': "order_composition.product",
             'foreignField': "_id",
             'as': "product"
           }
        },
        {
            '$group':
             {
               '_id': { '$first': '$product._id'},
               'description': { '$first': {'$first': '$product.description' }},
               'count': { '$sum': '$order_composition.quantity' }
             }
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,description,count
0,5,Batteria TESLA,1
1,1,Paraurti Posteriore P4X,2
2,6,Allarme Springfield,2
3,7,Allarme WakeMeUp,2
4,2,Paraurti anteriore B5X,2
5,4,Tergicristalli CleanUp,1


 ### 2. For a given product, determine the number of separate customers who purchased it in a given period

In [5]:
input_start_date = datetime.datetime(2021, 1, 1)
input_end_date = datetime.datetime(2021, 12, 31)
input_product_code = 4

query_result = db.customer.aggregate(
    [
        {
            '$lookup': {
                'from': 'order',
                'localField': '_id',
                'foreignField': 'customer',
                'as': 'order'
            }
        },
        {
          '$unwind': '$order'
        },
        {
          '$unwind': '$order.order_composition'
        },
        {
          '$unwind': '$order.order_composition.product'
        },
        {
            '$lookup': {
                'from': 'product',
                'localField': 'order.order_composition.product',
                'foreignField': '_id',
                'as': 'product'
            }
        },
        {
          '$unwind': '$product'
        },
        {
            '$match': {
                'product._id': input_product_code,
                'order.date': {'$lte': input_end_date, '$gte': input_start_date}
            }
        },
        {
            '$group': {
                '_id': {'prod_id':'$product._id', 'customer_id':'$_id'},
                'description': {'$first': '$product.description'},
                'customer_count': {'$sum': 1 }
            }
        },
        {
            '$group': {
                '_id': '$_id.prod_id',
                'description': {'$first': '$description'},
                'customer_count': {'$sum': 1 }
            }
        },
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,description,customer_count
0,4,Tergicristalli CleanUp,3


### 3. Identify all customers who have purchased a product in promotion, indicating the cap of residence

In [6]:
query_result = db.customer.aggregate(
    [

        {
            '$lookup': {
                'from': 'order',
                'localField': '_id',
                'foreignField': 'customer',
                'as': 'order'
            }
        },
        {
            '$lookup': {
                'from': 'product',
                'localField': 'order.order_composition.product',
                'foreignField': '_id',
                'as': 'product'
            }
        },
        {
            '$match': {
                'product.promotion': {'$elemMatch': {'$not': {'$size': 0 } }}
            }
        },
        {
            '$project': {
                '_id': '$_id',
                'name': '$name',
                'cap': '$cap'
            }
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,name
0,TNICRT65J02S720J,Tonio Cartonio
1,CSTCSTO97J93F387C,Costantina Costa
2,PLOCRS54P83D223F,Paolo Crisanti
3,SNTFLC93K92P982F,Santina Filocomo


### 4. For a specific CAP, identify customers who have made purchases in a given period

In [7]:
input_start_date = datetime.datetime(2021, 1, 1)
input_end_date = datetime.datetime(2021, 12, 31)
input_cap = 65321

query_result = db.customer.aggregate(
    [
        {
            '$match': {'CAP': input_cap}
        },
        {
            '$lookup': {
                'from': 'order',
                'localField': '_id',
                'foreignField': 'customer',
                'as': 'orders'
            }
        },
        {
            '$match': {'orders.date': {'$lte': input_end_date, '$gte': input_start_date}}
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,name,birth_date,sex,CAP,address,city,province,region,orders
0,SNTFLC93K92P982F,Santina Filocomo,1993-01-20 00:00:00,F,65321,Via Santina,Reggio Calabria,RC,Calabria,"[{'_id': 3, 'date': 2021-01-09 00:00:00, 'cust..."


### 5. For a specific car model, identify the revenue of the items in a given period

In [8]:
input_vehicle_id = 2

query_result = db.product.aggregate(
    [
        {
         '$lookup':
           {
             'from': 'vehicle',
             'localField': "vehicle_compatible",
             'foreignField': "_id",
             'as': "vehicle"
           }
        },
        {
          '$unwind': '$vehicle'
        },
        {
         '$lookup':
           {
             'from': 'order',
             'localField': "_id",
             'foreignField': "order_composition.product",
             'as': "product_in_order"
           }
        },
        {
          '$unwind': '$product_in_order'
        },
        {
          '$unwind': '$product_in_order.order_composition'
        },
        {
            '$match': {
                '$expr': {
                    '$eq': ['$product_in_order.order_composition.product', '$vehicle._id'],
                    '$eq': ['$vehicle._id', input_vehicle_id]
                },
            }
        }, 
        {
            '$group': {
                '_id': '$_id',
                'description': {'$first': '$description'},
                'total_per_article': {'$sum': '$product_in_order.order_composition.price'}
            }
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,description,total_per_article
0,5,Batteria TESLA,3952.4
1,2,Paraurti anteriore B5X,3293.4
2,4,Tergicristalli CleanUp,2929.1
3,3,Tergicristalli BOSCH T06,2058.6


### 6. For each item in promo determine the quantity sold in a given period 

In [9]:
input_start_date = datetime.datetime(2021, 2, 5)
input_end_date = datetime.datetime(2021, 4, 5)

query_result = db.order.aggregate(
    [
        {
            '$unwind': '$order_composition'
        }, 
        {
            '$match': {'date': {'$lte': input_end_date, '$gte': input_start_date}}
        },
        {
            '$lookup': {
                'from': 'product',
                'localField': 'order_composition.product',
                'foreignField': '_id',
                'as': 'product'
            }
        },
        {
            '$addFields': {'product': {'$first': '$product'}}
        }, 
        {
            '$unwind': '$product.promotion'
        }, 
        {
            '$match': {
                '$expr': {
                    '$and': [
                        {'$gt': ['$date', '$product.promotion.start']}, 
                        {'$lt': ['$date', '$product.promotion.end']}
                    ]
                }
            }
        }, 
        {
            '$group': {
                '_id': '$order_composition.product',
                'description': {'$first': '$product.description'},
                'quantity': {'$sum': '$order_composition.quantity'}
            }
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,description,quantity
0,6,Allarme Springfield,1


### 7. Identify the customers who have not made an electronic payment in the current year

In [10]:
customer_pay_with_card = db.order.aggregate(
    [
        {
            '$match': {
                'payment_type': { '$eq' : 'card'},
                'date': { '$gte' : datetime.datetime(datetime.date.today().year, 1, 1)}
            }
        }
    ]
)
customer_pay_with_card = [elem['customer'] for elem in customer_pay_with_card]

query_result = db.customer.aggregate(
    [
        {
            '$match': { '_id': { '$nin': customer_pay_with_card } }
        },
    ]
)
pd.DataFrame(query_result)

Unnamed: 0,_id,name,birth_date,sex,CAP,address,city,province,region
0,CSTCSTO97J93F387C,Costantina Costa,1997-07-20 00:00:00,F,20020,Cia Costantina,Milano,MI,Lombardia
1,PLOCRS54P83D223F,Paolo Crisanti,1954-11-09 00:00:00,M,22350,Via Crisanti,Vicenza,VI,Veneto
2,SNTFLC93K92P982F,Santina Filocomo,1993-01-20 00:00:00,F,65321,Via Santina,Reggio Calabria,RC,Calabria
3,TNIVTL87I23P732H,Tania Vitale,1987-02-13 00:00:00,F,25460,via vitale,Rome,RM,Lazio


### 8. Identify the customer who spent more in a given period

In [11]:
input_start_date = datetime.datetime(2021, 10, 9)
input_end_date = datetime.datetime(2021, 11, 25)

query_result = db.order.aggregate(
    [
        {
            '$match': {'date': {'$lte': input_end_date, '$gte': input_start_date}}
        },
        {
            '$group': {
                '_id': '$customer',
                'total': {'$sum': '$total'}
            }
        },
        {
            '$sort': {'total': -1}
        },
        {
            '$limit': 1
        },
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,total
0,PLOCRS54P83D223F,644.0


### 9. For a specific CAP the average expense per invoice in a given period

In [12]:
input_cap = 65321
input_start_date = datetime.datetime(2021, 2, 20)
input_end_date = datetime.datetime(2021, 10, 22)


query_result = db.customer.aggregate(
    [
        {
            '$lookup': {
                'from': 'order',
                'localField': '_id',
                'foreignField': 'customer',
                'as': 'order'
            }
        }, {
            '$match': {
                'CAP': input_cap,
                'order.date': {'$lte': input_end_date, '$gte': input_start_date}
            }
        }, 
        {
            '$unwind': '$order'
        }, 
        {
            '$group': {
                '_id': '$_id',
                'cap': {'$first': '$CAP'},
                'avg': {'$avg': '$order.total'}
            }
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,cap,avg
0,SNTFLC93K92P982F,65321,330.0


### 10. For each category of products the total price paid for those sold in a given period.


In [13]:
input_start_date = datetime.datetime(2021, 1, 1)
input_end_date = datetime.datetime(2021, 12, 31)

query_result = db.product.aggregate(
    [
        {
            '$lookup': {
                'from': 'order',
                'localField': '_id',
                'foreignField': 'order_composition.product',
                'as': 'order'
            }
        }, 
        {
            '$unwind': '$order'
        },
        {
            '$unwind': '$order.order_composition'
        }, 
        {
            '$match': {
                '$and': [
                    {'$expr': {'$eq': ['$_id', '$order.order_composition.product']}},
                    {'order.date': {'$lte': input_end_date, '$gte': input_start_date}}
                ]
            }
        }, 
        {
            '$group': {
                '_id': '$category',
                'total': {'$sum': '$order.order_composition.price'}
            }
        }
    ]
)

pd.DataFrame(query_result)

Unnamed: 0,_id,total
0,tergicristalli,62.6
1,batterie,630.0
2,paraurti,611.0
3,allarmi,3300.0
