Script de select datos Cassandra en cluster multidomain

In [50]:
!pip install mysql-connector==2.1.7
!pip install pandas
!pip install sqlalchemy
#requiere instalación adicional, consultar https://github.com/PyMySQL/mysqlclient
!pip install mysqlclient
!pip install numpy
!pip install pymysql



In [1]:
import pandas as pd
import numpy as np
import os
import json
import random
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
import time
from pprint import pprint
import psutil
import uuid
from cassandra.query import tuple_factory
from cassandra.query import dict_factory
from cassandra.query import BatchStatement, SimpleStatement
from cassandra.policies import RetryPolicy

In [2]:
#Los resultados de medidas de tiempo en carga por dominios se almacenan en estos objetos.
#Se itera durante 100 iteraciones para sacar medias
#repeticiones
repeats = 100

In [3]:
#Ficheros de salida
resultados_etl_select = '../Results/Cassandra/CassandraSelect_test_{}.csv'

In [4]:
def save_results_to_csv(results,file):
    #Guardamos los resultados en csv
    from datetime import datetime
    
    csv_df = pd.DataFrame(results, columns=['Registros', 'Tiempo', 'CPU','Memoria'])
    dia = datetime.now().strftime("%d%m%Y_%H_%M_%S")
    print(file.format(str(dia)))
    csv_df.to_csv(file.format(str(dia)))

In [5]:
from cassandra.cluster import Cluster, ExecutionProfile, EXEC_PROFILE_DEFAULT
from cassandra.policies import WhiteListRoundRobinPolicy, DowngradingConsistencyRetryPolicy
from cassandra.query import tuple_factory
from cassandra import ConsistencyLevel

profile = ExecutionProfile(
    load_balancing_policy=WhiteListRoundRobinPolicy(['127.0.0.1','172.17.0.2','172.17.0.3','172.17.0.4' ]),
    retry_policy=DowngradingConsistencyRetryPolicy(),
    consistency_level=ConsistencyLevel.ALL,
    serial_consistency_level=ConsistencyLevel.LOCAL_SERIAL,
    request_timeout=3600,
    row_factory=tuple_factory
)
cluster = Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: profile})
session = cluster.connect()
print(session.execute("SELECT release_version FROM system.local").one())

session.execute('USE currentaccountkeyspace')

rows = session.execute('SELECT count(*) FROM currentaccount')

for row in rows:
    print(row) 

('4.0.3',)
(999879,)


# Select test multidomain

In [6]:
#Obtenemos los posibles valores de pais. Se iterará por ellos para cambiar en bucle los registros
session.execute('USE customerprofilekeyspace')
result = session.execute("""SELECT PartyId FROM customerprofilekeyspace.customerprofile limit 1000;""")
partyId_list= []
for partyId in result:
    partyId_list.append(partyId[0])
print(len(partyId_list))

1000


In [7]:
random.choice(partyId_list)

'bc429194-bfc8-11ec-b0d9-ff5473b2f02a'

In [10]:
from cassandra import ConsistencyLevel
from cassandra.query import SimpleStatement
from cassandra import concurrent

registers = []
batch = BatchStatement(consistency_level=ConsistencyLevel.ALL)
account_id_list = []

#Cargas Masiva con Many
def selectCollectionCustomerProfile():
    
    SELECT_CURRENTACCOUNT_STMT = """SELECT * FROM currentaccountkeyspace.currentaccount WHERE partyid = '{}';"""            
    SELECT_CUSTOMER_PROFILE_STMT = """SELECT * FROM customerprofilekeyspace.customerprofile WHERE partyid = '{}';"""    
    SELECT_POSITIONKEEPING_STMT = """SELECT * FROM positionkeepingkeyspace.positionkeeping WHERE accountid = '{}';"""            
    
    session = cluster.connect('customerprofilekeyspace')
    iter = 0;
    i = 1
        
    for i in range(0,repeats): 
        
        partyid = random.choice(partyId_list)
        
        time_inicial = time.time()        
        print("----------------------------------------------------------------")
        #print(SELECT_CUSTOMER_PROFILE_STMT.format(partyid))
        result_cp = session.execute(SELECT_CUSTOMER_PROFILE_STMT.format(partyid))
        #print(SELECT_CURRENTACCOUNT_STMT.format(partyid))
        result_ca = session.execute(SELECT_CURRENTACCOUNT_STMT.format(partyid))
        
        for res_cp in result_cp:
            print("RESULTADO CUSTOMER PROFILE", res_cp)
            
        for res_ca in result_ca:
            print("RESULTADO CURRENT ACCOUNT", res_ca)
            #print(SELECT_POSITIONKEEPING_STMT.format(res_ca[0]))
            result_pk = session.execute(SELECT_POSITIONKEEPING_STMT.format(res_ca[0]))
            for res_pk in result_pk:
                print("RESULTADO POSITION_KEEPING", res_pk)                        
                time_final = time.time()           
                data_time_collection = round(time_final - time_inicial,3)
                used_cpu = psutil.cpu_percent()
                mem_used = psutil.virtual_memory().percent
                registers.append((iter,data_time_collection,used_cpu,mem_used))
                print((iter,data_time_collection,used_cpu,mem_used))
                iter += 1;
                time_inicial = time.time()
                print("----------------------------------------------------------------")
        i = i + 1
        
    return registers

In [11]:
registers = selectCollectionCustomerProfile()

----------------------------------------------------------------
RESULTADO CUSTOMER PROFILE ('bc2f454e-bfc8-11ec-b0d9-ff5473b2f02a', 'US.OBIE.Principal', '3', 'jazmin.sporer@example.org', 'Richmond Kerluke V', 'Mrs.', None, 'Angelita', '8446', 'pri', '523.949.5272')
RESULTADO CURRENT ACCOUNT ('474e35bc-bfcf-11ec-b0d9-ff5473b2f02a', [accountinfo(name='Mohr Group', identification='nan', schemename='ES.OBIE.SortCodeAccountNumber'), accountinfo(name='Franecki-Hodkiewicz', identification='nan', schemename='nan'), accountinfo(name='Kuvalis-Turcotte', identification='4.0', schemename='nan'), accountinfo(name='Kassulke, Tremblay and Kiehn', identification='7.0', schemename='nan')], 'Business', 'envisioneer extensible architectures', datetime.datetime(2015, 12, 9, 17, 7, 3), 'bc2f454e-bfc8-11ec-b0d9-ff5473b2f02a', 'Disabled', datetime.datetime(2015, 7, 13, 0, 13, 46))
----------------------------------------------------------------
RESULTADO CUSTOMER PROFILE ('bc3f975a-bfc8-11ec-b0d9-ff5473b2f0

RESULTADO CUSTOMER PROFILE ('bc1be724-bfc8-11ec-b0d9-ff5473b2f02a', 'DE.OBIE.Principal', '7', 'aglae.ruecker@example.com', 'Adella Renner', 'Miss', None, 'Joyce', '8110', 'pri', '706.320.7637')
RESULTADO CURRENT ACCOUNT ('4738f7d8-bfcf-11ec-b0d9-ff5473b2f02a', [accountinfo(name='Goyette, Lindgren and Von', identification='5.0', schemename='UK.business'), accountinfo(name='Gorczany-Buckridge', identification='3.0', schemename='nan'), accountinfo(name='Pacocha, Terry and Will', identification='8.0', schemename='ES.OBIE.SortCodeAccountNumber')], 'Business', 'synergize interactive paradigms', datetime.datetime(1987, 7, 17, 13, 20, 58), 'bc1be724-bfc8-11ec-b0d9-ff5473b2f02a', 'Enabled', datetime.datetime(1990, 8, 23, 4, 43, 35))
RESULTADO POSITION_KEEPING ('4738f7d8-bfcf-11ec-b0d9-ff5473b2f02a', 'IND', 'GBP', 11.342245, 'Est quidem repellendus dolor qui. Eos temporibus ea sunt. Pariatur sun', 0.0, 'Debit', False, 'typed', datetime.datetime(2014, 1, 24, 4, 56, 9), 'Business')
(12, 0.011, 73.

----------------------------------------------------------------
RESULTADO CUSTOMER PROFILE ('bc1e5950-bfc8-11ec-b0d9-ff5473b2f02a', 'UK.OBIE.Secundary', '0', 'carley16@example.org', 'Alana Davis', 'Prof.', None, 'Gilberto', '5419', 'sol', '1-734-523-91')
RESULTADO CURRENT ACCOUNT ('473ca5fe-bfcf-11ec-b0d9-ff5473b2f02a', [accountinfo(name='Macejkovic, Tillman and Boyer', identification='2.0', schemename='ES.OBIE.SortCodeAccountNumber'), accountinfo(name='Kassulke, Tremblay and Kiehn', identification='7.0', schemename='nan'), accountinfo(name='Hagenes-Stracke', identification='6.0', schemename='nan'), accountinfo(name='Blick-Sanford', identification='5.0', schemename='nan')], 'Business', 'deploy granular e-tailers', datetime.datetime(2014, 2, 15, 8, 13, 27), 'bc1e5950-bfc8-11ec-b0d9-ff5473b2f02a', 'Disabled', datetime.datetime(1993, 7, 16, 14, 7, 17))
RESULTADO POSITION_KEEPING ('473ca5fe-bfcf-11ec-b0d9-ff5473b2f02a', 'EUR', 'IND', 2752.6661, 'Dolores sint vero vero maiores nisi. Et qua

RESULTADO CURRENT ACCOUNT ('473af1aa-bfcf-11ec-b0d9-ff5473b2f02a', [accountinfo(name='Huels and Sons', identification='2.0', schemename='nan'), accountinfo(name='Bednar, Moore and Hamill', identification='3.0', schemename='UK.business'), accountinfo(name='Satterfield, Dare and Lynch', identification='4.0', schemename='UK.OBIE.SortCodeAccountNumber')], 'Particular', 'cultivate cross-platform ROI', datetime.datetime(2003, 12, 24, 4, 16, 32), 'bc1d427c-bfc8-11ec-b0d9-ff5473b2f02a', 'Enabled', datetime.datetime(1983, 2, 6, 8, 27, 42))
RESULTADO POSITION_KEEPING ('473af1aa-bfcf-11ec-b0d9-ff5473b2f02a', 'EUR', 'USA', 73259.8543261, 'Ut cumque sint laudantium quis impedit. Qui dolore sit aut ut nulla hi', 231417.19139000002, 'Credit', False, 'typed', datetime.datetime(1984, 3, 29, 3, 54, 42), 'Particular')
(46, 0.009, 85.7, 92.2)
----------------------------------------------------------------
RESULTADO POSITION_KEEPING ('473af1aa-bfcf-11ec-b0d9-ff5473b2f02a', 'IND', 'EUR', 18.70654, 'Est qui

RESULTADO CUSTOMER PROFILE ('bc21fd30-bfc8-11ec-b0d9-ff5473b2f02a', 'DE.OBIE.Principal', '5', 'brandt54@example.org', 'Kurt Thiel', 'Prof.', None, 'Candelario', '3888', 'pri', '(948)824-930')
RESULTADO CURRENT ACCOUNT ('4740d872-bfcf-11ec-b0d9-ff5473b2f02a', [accountinfo(name='Konopelski PLC', identification='8.0', schemename='UK.business'), accountinfo(name='Connelly, Jaskolski and Heathcote', identification='5.0', schemename='UK.business'), accountinfo(name='Nolan-Collier', identification='5.0', schemename='ES.OBIE.SortCodeAccountNumber'), accountinfo(name='Walker, Steuber and Oberbrunner', identification='1.0', schemename='UK.OBIE.SortCodeAccountNumber')], 'Particular', 'revolutionize vertical paradigms', datetime.datetime(2006, 7, 3, 4, 41, 17), 'bc21fd30-bfc8-11ec-b0d9-ff5473b2f02a', 'Disabled', datetime.datetime(2007, 12, 13, 0, 16, 20))
----------------------------------------------------------------
RESULTADO CUSTOMER PROFILE ('bc2683dc-bfc8-11ec-b0d9-ff5473b2f02a', 'UK.OBIE.Se

RESULTADO POSITION_KEEPING ('47628328-bfcf-11ec-b0d9-ff5473b2f02a', 'GBP', 'USA', 64636.22148099, 'Est quidem repellendus dolor qui. Eos temporibus ea sunt. Pariatur sun', 17.697217000000006, 'Credit', False, 'typed', datetime.datetime(1978, 1, 3, 10, 23, 32), 'Business')
(76, 0.011, 83.3, 92.3)
----------------------------------------------------------------


In [12]:
#Guardamos los resultados Customer Profile
save_results_to_csv(registers,resultados_etl_select)

../Results/Cassandra/CassandraSelect_test_20042022_11_50_12.csv


In [76]:
cluster.shutdown()
print('Conexion cerrada')

Conexion cerrada
