Script de update datos MongoDB en cluster multidomain

In [117]:
import pandas as pd
import numpy as np
import os
import json
import random
import pymongo
from pymongo import MongoClient
import time
from pprint import pprint
from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne
import matplotlib.pyplot as plt
import psutil
import uuid
from bson import objectid

In [118]:
#Los resultados de medidas de tiempo en carga por dominios se almacenan en estos objetos.
#Se itera durante 100 iteraciones para sacar medias
#repeticiones
repeats = 100

In [119]:
#Ficheros de salida
resultados_etl_update = '../Results/MongoDB/MongoDBUpdate_test_{}.csv'

In [120]:
def save_results_to_csv(results,file):
    #Guardamos los resultados en csv
    from datetime import datetime
    
    csv_df = pd.DataFrame(results, columns=['Registros', 'Tiempo', 'CPU','Memoria', 'Rgs'])
    dia = datetime.now().strftime("%d%m%Y_%H_%M_%S")
    csv_df.to_csv(file.format(str(dia)))

In [121]:
#Conexion unica a colecciones
connection = MongoClient('localhost', 27017, w=3) #Conexion con WriteConcern a 3 (primario y dos nodos consolidados)

#Creamos una base de datos para el trabajo llamada TFM
tfm_mongo_db = connection["tfm_mongo_database"]
tfm_mongo_db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, w=3), 'tfm_mongo_database')

In [122]:
CustomerProfileCollection = tfm_mongo_db["CustomerProfileCollection"]
CurrentAccountCollection = tfm_mongo_db["CurrentAccountCollection"]
PositionKeepingCollection = tfm_mongo_db["PositionKeepingCollection"]
print(connection.list_database_names())

['admin', 'config', 'local', 'tfm_mongo_database']


In [123]:
#Generamos indices para adecuar el rendimiento
CustomerProfileCollection.create_index("PartyId", unique = True, dropDups = True)
CurrentAccountCollection.create_index("AccountId", unique = True, dropDups = True)
PositionKeepingCollection.create_index("AccountId", unique = True, dropDups = True)

'AccountId_1'

# Update test multidomain

In [124]:
#Obtenemos los posibles valores de pais. Se iterará por ellos para cambiar en bucle los registros
country_list= ['IND', 'GBP', 'EUR', 'USA']
country_list

['IND', 'GBP', 'EUR', 'USA']

In [125]:
    update_query = """UPDATE PositionKeepingDomainSchema.Amount a 
INNER JOIN PositionKeepingDomainSchema.PositionKeeping pk ON pk.AmountId = a.AmountId
INNER JOIN CurrentAccountDomainSchema.CurrentAccount ca ON ca.AccountId = pk.AccountId
INNER JOIN CurrentAccountDomainSchema.AccountInfo ai ON ai.AccountId = ca.AccountId
SET a.CurrencyId = (SELECT CurrencyId FROM PositionKeepingDomainSchema.Currency WHERE Code = '{}')
WHERE ca.Status = 'Enabled' AND ai.SchemeName LIKE 'UK.%'"""

In [None]:
time_inicial = 0
time_final = 0
registers = []
lista = []

match = {'$match': { 'PositionKeeping.Amount.Currency.Code': random.choice(country_list)  }  }
lookup_PositionKeeping = {
                    '$lookup': {
                        'from' : 'PositionKeepingCollection',
                        'localField' : 'AccountId',
                        'foreignField' : 'AccountId',
                        'as' : 'PositionKeeping'
                    }
                }
lookup_CustomerProfile = {
                    '$lookup': {
                        'from' : 'CustomerProfileCollection',
                        'localField' : 'PartyId',
                        'foreignField' : 'PartyId',
                        'as' : 'CustomerProfile'
                    }
                }
unwind_ai = {"$unwind": "$AccountInfo"}
unwind_pk = {"$unwind": "$PositionKeeping"}
match_2 = { '$match': {
                      'AccountInfo.SchemeName': { '$regex': 'UK.*' }
                  }  
                }

projection_ca = { '$project' : { '_id':1 } } 

pipeline = [match_2, lookup_CustomerProfile, unwind_ai, match_2, lookup_PositionKeeping, match]
result = CurrentAccountCollection.aggregate(pipeline)

for iteracion in range(0,repeats): 
           

    time_inicial = time.time()    

    for doc in result:
        #Update
        lista.append(doc['AccountId'])
        doc['AccountId']
        CurrentAccountCollection.update_one({'AccountId':doc['AccountId']},{'$set': {'Status':'Modified'}})
        
    time_final = time.time() 
    used_cpu = psutil.cpu_percent()
    mem_used = psutil.virtual_memory().percent
    # Tupla con numero de registros, tiempo parcial de la transacción y tiempo acumulado de trxs
    total_time = round(time_final - time_inicial,3)
    registers.append((iteracion + 1, total_time ,used_cpu, mem_used,CurrentAccountCollection.count_documents({'Status': 'Modified'})))

In [None]:
print(CurrentAccountCollection.count_documents({'Status': 'Modified'}))

In [None]:
registers[10:20]

In [None]:
#Guardamos los resultados Customer Profile
save_results_to_csv(registers,resultados_etl_update)

In [None]:
connection.close()
print('Conexion cerrada')