Script de select datos MySQL en cluster multidomain

In [27]:
!pip install mysql-connector==2.1.7
!pip install pandas
!pip install sqlalchemy
#requiere instalación adicional, consultar https://github.com/PyMySQL/mysqlclient
!pip install mysqlclient
!pip install numpy
!pip install pymysql



In [1]:
import pandas as pd
import numpy as np
import os
import json
import random
import pymongo
from pymongo import MongoClient
import time
from pprint import pprint
from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne
import matplotlib.pyplot as plt
import psutil
import uuid

In [2]:
#Los resultados de medidas de tiempo en carga por dominios se almacenan en estos objetos.
#Se itera durante 100 iteraciones para sacar medias
#repeticiones
repeats = 100

In [3]:
#Ficheros de salida
resultados_mongodb_select = '../Results/MongoDB/MongoDBSelect_test_{}.csv'

In [4]:
def save_results_to_csv(results,file):
    #Guardamos los resultados en csv
    from datetime import datetime
    
    csv_df = pd.DataFrame(results, columns=['Registros', 'Tiempo', 'CPU','Memoria'])
    dia = datetime.now().strftime("%d%m%Y_%H_%M_%S")
    csv_df.to_csv(file.format(str(dia)))

In [5]:
#Conexion unica a colecciones
connection = MongoClient('localhost', 27017, w=3) #Conexion con WriteConcern a 3 (primario y dos nodos consolidados)

#Creamos una base de datos para el trabajo llamada TFM
tfm_mongo_db = connection["tfm_mongo_database"]

In [6]:
CustomerProfileCollection = tfm_mongo_db["CustomerProfileCollection"]
CurrentAccountCollection = tfm_mongo_db["CurrentAccountCollection"]
PositionKeepingCollection = tfm_mongo_db["PositionKeepingCollection"]
print(connection.list_database_names())

['admin', 'config', 'local', 'tfm_mongo_database']


# Select test multidomain

In [7]:
partyId_list = []
for partyId in CustomerProfileCollection.find({},{ "_id": 0, "PartyId": 1}):
    partyId_list.append(partyId)
len(partyId_list)

1000000

In [8]:
    select_query = """SELECT * FROM CustomerProfileDomainSchema.CustomerProfile cp
INNER JOIN CurrentAccountDomainSchema.CurrentAccount ca ON ca.PartyId = cp.PartyId
INNER JOIN CurrentAccountDomainSchema.AccountInfo ai ON ai.AccountId = ca.AccountId
INNER JOIN PositionKeepingDomainSchema.PositionKeeping pk ON pk.AccountId = ca.AccountId
INNER JOIN PositionKeepingDomainSchema.Amount am ON am.AmountId = pk.AmountId
INNER JOIN PositionKeepingDomainSchema.CreditLine cl ON cl.CreditLineId = pk.CreditLineId
INNER JOIN PositionKeepingDomainSchema.Currency cr ON cr.CurrencyId = am.CurrencyId
WHERE cp.PartyId = {}""".format(random.choice(partyId_list))

In [34]:
def execute_mongo_find_test(collection):
    pipeline = [
        {
          '$match': {
              'PartyId': '4584fa76-b7fb-11ec-a2b2-9fdc6c308163'
          }  
        },
        {
            '$lookup': {
                'from' : 'CurrentAccountCollection',
                'localField' : 'PartyId',
                'foreignField' : 'PartyId',
                'as' : 'CurrentAccount'
            }
        },
        { 
            '$unwind': '$CurrentAccount'
        },    
        {
            '$lookup': {
                'from' : 'PositionKeepingCollection',
                'localField' : 'CurrentAccount.AccountId',
                'foreignField' : 'AccountId',
                'as' : 'PositionKeeping'
            }
        },
        { 
            '$unwind': '$PositionKeeping'
        },
        { 
            '$limit': 1
        }
    ]

    for doc in (collection.aggregate(pipeline)):
        pass

In [32]:
result = CustomerProfileCollection.aggregate([
        {
          '$match': {
              'PartyId': '4584fa76-b7fb-11ec-a2b2-9fdc6c308163'
          }  
        },
        {
            '$lookup': {
                'from' : 'CurrentAccountCollection',
                'localField' : 'PartyId',
                'foreignField' : 'PartyId',
                'as' : 'CurrentAccount'
            }
        },
        { 
            '$unwind': '$CurrentAccount'
        },    
        {
            '$lookup': {
                'from' : 'PositionKeepingCollection',
                'localField' : 'CurrentAccount.AccountId',
                'foreignField' : 'AccountId',
                'as' : 'PositionKeeping'
            }
        },
        { 
            '$unwind': '$PositionKeeping'
        },
        { 
            '$limit': 1
        }
])
for doc in result:
    pprint(doc)

{'AccountRole': 'US.OBIE.Principal',
 'Address': [{'AddressLine': '5409 Stroman Trafficway Apt. 397',
              'AddressType': 'Port',
              'BuildingNumber': 82242,
              'Country': [{'Code': 'GBP',
                           'Description': 'Id dolorem a soluta harum iusto qui '
                                          'repellat. Possimus est ea ut ipsum '
                                          'quia quisquam facere.',
                           'ShortName': 'USA'}],
              'CountrySubDivision': 506.0,
              'PostCode': '64088-6495',
              'StreetName': 'Ruby Manor',
              'TownName': 'Anastacioshire'}],
 'BeneficialOwnership': 6,
 'CurrentAccount': {'AccountId': '76733c24-b7fb-11ec-a2b2-9fdc6c308163',
                    'AccountInfo': [{'Identification': 7.0,
                                     'Name': 'Zemlak-Gleichner',
                                     'SchemeName': 'UK.business'},
                                    {'Id

In [35]:
time_inicial = 0
time_final = 0
registers = []
 
for iteracion in range(0,repeats): 
    time_inicial = time.time()
    execute_mongo_find_test(CustomerProfileCollection)
    time_final = time.time() 
    used_cpu = psutil.cpu_percent()
    mem_used = psutil.virtual_memory().percent
    # Tupla con numero de registros, tiempo parcial de la transacción y tiempo acumulado de trxs
    total_time = round(time_final - time_inicial,3)
    registers.append((iteracion + 1, total_time ,used_cpu, mem_used))

In [42]:
registers[10:20]

[(11, 1.028, 16.9, 72.3),
 (12, 1.026, 17.7, 72.3),
 (13, 1.016, 16.8, 72.3),
 (14, 1.022, 17.6, 72.3),
 (15, 1.022, 17.1, 72.3),
 (16, 1.03, 17.5, 72.3),
 (17, 1.031, 17.8, 72.3),
 (18, 1.019, 17.6, 72.3),
 (19, 1.03, 17.2, 72.3),
 (20, 1.022, 17.0, 72.3)]

In [37]:
#Guardamos los resultados Customer Profile
save_results_to_csv(registers,resultados_mongodb_select)

In [40]:
connection.close()
print('Conexion cerrada')

Conexion cerrada
