Script de update datos MongoDB en cluster multidomain

In [1]:
import pandas as pd
import numpy as np
import os
import json
import random
import pymongo
from pymongo import MongoClient
import time
from pprint import pprint
from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne
import matplotlib.pyplot as plt
import psutil
import uuid
from bson import objectid

In [2]:
#Los resultados de medidas de tiempo en carga por dominios se almacenan en estos objetos.
#Se itera durante 100 iteraciones para sacar medias
#repeticiones
repeats = 100

In [3]:
#Ficheros de salida
resultados_etl_update = '../Results/MongoDB/MongoDBUpdate_test_{}.csv'

In [4]:
def save_results_to_csv(results,file):
    #Guardamos los resultados en csv
    from datetime import datetime
    
    csv_df = pd.DataFrame(results, columns=['Registros', 'Tiempo', 'CPU','Memoria'])
    dia = datetime.now().strftime("%d%m%Y_%H_%M_%S")
    csv_df.to_csv(file.format(str(dia)))

In [5]:
#Conexion unica a colecciones
connection = MongoClient('localhost', 27017, w=3) #Conexion con WriteConcern a 3 (primario y dos nodos consolidados)

#Creamos una base de datos para el trabajo llamada TFM
tfm_mongo_db = connection["tfm_mongo_database"]
tfm_mongo_db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, w=3), 'tfm_mongo_database')

In [6]:
CustomerProfileCollection = tfm_mongo_db["CustomerProfileCollection"]
CurrentAccountCollection = tfm_mongo_db["CurrentAccountCollection"]
PositionKeepingCollection = tfm_mongo_db["PositionKeepingCollection"]
print(connection.list_database_names())

['admin', 'config', 'local', 'tfm_mongo_database']


# Update test multidomain

In [7]:
pipeline = [
        {
          '$match': {
              'Status': 'Enabled'
          }  
        },
        {
            '$unwind': '$AccountInfo'
        }, 
        {
            '$lookup': {
                'from' : 'PositionKeepingCollection',
                'localField' : 'AccountId',
                'foreignField' : 'AccountId',
                'as' : 'PositionKeeping'
            }
        },
        {
            '$unwind': '$PositionKeeping'
        },
        { 
            '$limit': 1
        },
        {
          '$match': {
              'AccountInfo.SchemeName': { '$regex': 'UK.*' },              
              'PositionKeeping.Amount.Currency': { '$regex': ".*USA.*" }
          }  
        },
        {
            '$project': {
                '_id':0,
                'AccountId': 1            
            }
        }
]

In [None]:
time_inicial = 0
time_final = 0
registers = []
 
for iteracion in range(0,repeats): 
   
    time_inicial = time.time()
    result = CurrentAccountCollection.aggregate(pipeline)

    for doc in result:
        #Update
        #print(doc['AccountId'])
        CurrentAccountCollection.update_one({'AccountId':doc['AccountId']},{'$set': {'Status':'Disabled_ch'}})
        
    time_final = time.time() 
    used_cpu = psutil.cpu_percent()
    mem_used = psutil.virtual_memory().percent
    # Tupla con numero de registros, tiempo parcial de la transacción y tiempo acumulado de trxs
    total_time = round(time_final - time_inicial,3)
    registers.append((iteracion + 1, total_time ,used_cpu, mem_used))

In [None]:
#Guardamos los resultados Customer Profile
save_results_to_csv(registers,resultados_etl_update)

In [None]:
connection.close()
print('Conexion cerrada')