Funciones agregadas MongoDB en cluster multidomain

In [426]:
import pandas as pd
import numpy as np
import os
import json
import random
import pymongo
from pymongo import MongoClient
import time
from pprint import pprint
from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne
import matplotlib.pyplot as plt
import psutil
import uuid
from bson import objectid

In [427]:
#Los resultados de medidas de tiempo en carga por dominios se almacenan en estos objetos.
#Se itera durante 100 iteraciones para sacar medias
#repeticiones
repeats = 100

In [428]:
#Ficheros de salida
resultados_etl_agg = '../Results/MongoDB/MongoDBAgregadas_test_{}.csv'

In [429]:
def save_results_to_csv(results,file):
    #Guardamos los resultados en csv
    from datetime import datetime
    
    csv_df = pd.DataFrame(results, columns=['Registros', 'Tiempo', 'CPU','Memoria'])
    dia = datetime.now().strftime("%d%m%Y_%H_%M_%S")
    csv_df.to_csv(file.format(str(dia)))

In [430]:
#Conexion unica a colecciones
connection = MongoClient('localhost', 27017, w=3) #Conexion con WriteConcern a 3 (primario y dos nodos consolidados)

#Creamos una base de datos para el trabajo llamada TFM
tfm_mongo_db = connection["tfm_mongo_database"]
tfm_mongo_db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, w=3), 'tfm_mongo_database')

In [431]:
CustomerProfileCollection = tfm_mongo_db["CustomerProfileCollection"]
CurrentAccountCollection = tfm_mongo_db["CurrentAccountCollection"]
PositionKeepingCollection = tfm_mongo_db["PositionKeepingCollection"]
print(connection.list_database_names())

['admin', 'config', 'local', 'tfm_mongo_database']


In [432]:
print(CustomerProfileCollection.count_documents({}))
print(CurrentAccountCollection.count_documents({}))
print(PositionKeepingCollection.count_documents({}))

1000000
1000000
1000000


In [433]:
#Generamos indices para adecuar el rendimiento
CustomerProfileCollection.create_index("PartyId", unique = True, dropDups = True)
CurrentAccountCollection.create_index("AccountId", unique = True, dropDups = True)
PositionKeepingCollection.create_index("AccountId", unique = True, dropDups = True)

'AccountId_1'

In [137]:
query = """SELECT AVG(Amount),MIN(Amount),MAX(Amount) FROM PositionKeepingDomainSchema.Amount am
INNER JOIN PositionKeepingDomainSchema.PositionKeeping pk ON pk.AmountId = am.AmountId
INNER JOIN CurrentAccountDomainSchema.CurrentAccount ca ON ca.AccountId = pk.AccountId
INNER JOIN CustomerProfileDomainSchema.CustomerProfile cp ON cp.PartyId = ca.PartyId
INNER JOIN CurrentAccountDomainSchema.AccountInfo ai ON ai.AccountId = ca.AccountId
INNER JOIN PositionKeepingDomainSchema.Currency cr ON cr.CurrencyId = am.CurrencyId
WHERE cr.Code = '{}'
AND cp.AccountRole = 'UK.OBIE.Principal'
AND ai.SchemeName LIKE 'UK.%'"""

# Funciones agregadas test multidomain

In [516]:
lookup_PositionKeeping = {
                '$lookup': {
                    'from' : 'PositionKeepingCollection',
                    'localField' : 'AccountId',
                    'foreignField' : 'AccountId',
                    'as' : 'PositionKeeping'
                }
            }
unwind_pk = {"$unwind": "$PositionKeeping"}
match_2 = { '$match': { 'Status': 'Enabled'}  }  

group = { '$group': { 
                '_id': '$PositionKeeping.AccountId', 
                'avg_amount': { '$avg': '$PositionKeeping.Amount.Amount' },
                'max_amount': { '$max': '$PositionKeeping.Amount.Amount' },
                'min_amount': { '$min': '$PositionKeeping.Amount.Amount' } 
                } 
        }

projection_pk = { '$project' : { '_id':0, 'AccountId': 1, 'PartyId':1 , 'PositionKeeping.Amount.Amount':1 } } 

limit = { '$limit' : 10}

pipeline = [match_2, lookup_PositionKeeping, group]
print(pipeline)

[{'$match': {'Status': 'Enabled'}}, {'$lookup': {'from': 'PositionKeepingCollection', 'localField': 'AccountId', 'foreignField': 'AccountId', 'as': 'PositionKeeping'}}, {'$group': {'_id': '$PositionKeeping.AccountId', 'avg_amount': {'$avg': '$PositionKeeping.Amount.Amount'}, 'max_amount': {'$max': '$PositionKeeping.Amount.Amount'}, 'min_amount': {'$min': '$PositionKeeping.Amount.Amount'}}}]


In [518]:
time_inicial = 0
time_final = 0
registers = []
lista = []
for iteracion in range(0,10): 
   
    time_inicial = time.time()
    result = CurrentAccountCollection.aggregate(pipeline)
    time_final = time.time() 
    used_cpu = psutil.cpu_percent()
    mem_used = psutil.virtual_memory().percent
    # Tupla con numero de registros, tiempo parcial de la transacción y tiempo acumulado de trxs
    total_time = round(time_final - time_inicial,3)
    registers.append((iteracion + 1, total_time ,used_cpu, mem_used))

In [519]:
#Guardamos los resultados Customer Profile
save_results_to_csv(registers,resultados_etl_agg)

In [520]:
connection.close()
print('Conexion cerrada')

Conexion cerrada
