In [22]:
import  pandas              as pd
import  numpy               as np
import  seaborn             as sns
from    matplotlib          import pyplot   as plt
from    sklearn.datasets    import make_blobs
from    sklearn.datasets    import make_moons
import  random

## 0.2. Helper Functions


In [40]:
# ===================================================================
# Configura os gráficos
def jupyter_settings():
    %matplotlib inline
    # %pylab inline
    
    plt.style.use('bmh')
    plt.rcParams['figure.figsize'] = [19, 7]
    plt.rcParams['font.size'] = 21

    # display(HTML('<style>.conteiner{width:100% !important;}</style>'))

    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option('display.expand_frame_repr', False)
    
    # configura o pandas para quantidade de casas decimais
    pd.set_option('display.float_format', lambda x: '%.2f' % x)

    sns.set()

jupyter_settings()

def generate_series(type="blobs", multiplier=100, samples=100000, n_centers=15, category=[None]) -> pd.Series:
    """
    Generated a Pandas Series object from especified type

    Attributes:
        type (str): Tipo dos dados a ser retornado na série: blobs, neg_blobs, moons, neg_moons ou cat
        multiplier (int): Valor a ser usado como multiplicador dos valores individuais
        samples (int): Quantidade de linhas a serem geradas
        n_centers (int): Número de centróides para o tipo blobs
        category (list): Quando selecionado o tipo type='cat' category deve ser uma 
            lista de valores que serão gerados
    """
    s = pd.Series(object)

    # Generate blobs
    if type == "blobs":
        X, y = make_blobs(n_samples=samples, centers=int(random.uniform(1, n_centers)), n_features=2)
        s1 = pd.Series( 
            np.concatenate(
            (
                np.abs(X[:, 0] * int(random.uniform(0, multiplier/4))), 
                np.abs(X[:, 0] * int(random.uniform(0, multiplier/2))), 
                np.abs(X[:, 0] * int(random.uniform(0, multiplier)))
            ),
            axis=0
            ))
        s = pd.Series( np.concatenate( (s1[:int(samples/1.6)].values, s1[int(samples/1.6)*2:].values), axis=0 ) )
        s = s[:samples]
    
    if type == "blobs2":
        list_a = [0 for x in range( int(samples * 0.1) )]
        list_b = [random.randint(0, multiplier * 0.2) for x in range( int(samples * 0.2) )]
        list_c = [random.randint(0, multiplier * 0.5) for x in range( int(samples * 0.3) )]
        list_d = [random.randint(0, multiplier) for x in range( int(samples * 0.4) )]
        s = pd.Series( list_a + list_b + list_c + list_d )

    # Generate negative blobs
    if type == "neg_blobs":
        X, y = make_blobs(n_samples=samples, centers=random.randint(1, n_centers), n_features=2)
        X = sorted( np.abs(X[:, 0]) * multiplier )
        metade = int( len(X) / 2 )
        quarto = int( len(X) / 4 )
        Y = [ i * random.randint(0, 1) for i in X[: metade] ]
        W = [ i * random.uniform(0, 1) for i in X[metade : metade + quarto] ]
        Z = [ i ** random.randint(1, 2) for i in X[metade + quarto :] ]
        s = pd.Series( X + Y + W + Z )[:samples]

    # Generate negative blobs
    if type == "neg":
        X, y = make_blobs(n_samples=samples, centers=random.randint(1, n_centers), n_features=2)
        X = sorted( np.abs(X[:, 0]) * multiplier )
        metade = int( len(X) / 2 )
        quarto = int( len(X) / 3 )
        Y = [ i * random.randint(0, 1) for i in X[: metade] ]
        W = [ i * random.uniform(0, 1) for i in X[metade : metade + quarto] ]
        Z = [ i ** random.randint(1, 2) for i in X[metade + quarto :] ]
        s = pd.Series( X + Y + W + Z )[abs(len( X + Y + W + Z ) - samples):]

    # Generate moons
    if type == "moons":
        X, y = make_moons(n_samples=samples, noise=0.5)
        s = pd.Series( np.abs( X[:, 1] * multiplier*10 ) )
        
    # Generate negative moons
    if type == "neg_moons":
        X, y = make_moons(n_samples=samples, noise=0.5)
        s = pd.Series( X[:, 1] * multiplier*10 )

    # Generate categoricals
    if type == "cat":
        X, y = make_blobs(n_samples=samples, centers=len(category), n_features=2)
        s = pd.Series(y)

    return s


In [39]:
samples = 99000
n_centers = 15
multiplier = 10

X, y = make_blobs(n_samples=samples, centers=random.randint(1, n_centers), n_features=2)
X = sorted( np.abs(X[:, 0]) * multiplier )
metade = int( len(X) / 2 )
quarto = int( len(X) / 3 )
Y = [ i * random.randint(0, 1) for i in X[: metade] ]
W = [ i * random.uniform(0, 1) for i in X[metade : metade + quarto] ]
Z = [ i ** random.randint(1, 2) for i in X[metade + quarto :] ]
s = pd.Series( X + Y + W + Z )[abs(len( X + Y + W + Z ) - samples):]

len(s)

99000

In [24]:
# samples = 20
# multi = 10
# list_a = [0 for x in range( int(samples * 0.1) )]
# list_b = [random.randint(0, multi * 0.2) for x in range( int(samples * 0.2) )]
# list_c = [random.randint(0, multi * 0.5) for x in range( int(samples * 0.3) )]
# list_d = [random.randint(0, multi) for x in range( int(samples * 0.4) )]
# final = list_a + list_b + list_c + list_d #np.concatenate(list_a, list_b, list_c, list_d)
# display(sorted(final))
# display(sum(final))
# display(sum(final) / len(final))
# display( sorted(final)[int(len(final) / 2)] )

In [25]:
# samples = 10
# multiplier = 10
# n_centers = 15
# X, y = make_blobs(n_samples=samples, centers=random.randint(1, n_centers), n_features=2)
# X = sorted( np.abs(X[:, 0]) )
# metade = int( len(X) / 2 )
# quarto = int( len(X) / 4 )
# Y = [ i * random.randint(0, 1) for i in X[: metade] ]
# W = [ i * random.uniform(0, 1) for i in X[metade : metade + quarto] ]
# Z = [ i ** random.randint(1, 2) for i in X[metade + quarto :] ]
# final = X + Y + W + Z

# display(sorted(final))
# # display(Y)
# display(sum(final))
# display(sum(final) / len(final))
# display( sorted(final)[int(len(final) / 2)] )

# Dados
## Movimentação
    - saldo
    - renda fixa
    - fundos
    - ações
    - coe
    - previdência
    - tesouro
    - mini índice
    - mini dolar
    - opções
    - trader
  
## RFM
    - lifetime
    - recency
    - frequency
    - investidor qualificado
    - classificação renda
  
## Social
    - idade
    - gênero
    - estado civil
    - filhos
    - formação
    - profissão
  
## Renda
    - renda pessoal
    - renda familiar
    - imóvel
    - valor imóvel
    - carro
    - valor carro
    - score de crédito

## Outros
    - perfil nps
    - plataformas digitais
    - 

In [26]:
# ## Movimentação
#     - saldo
#     - renda fixa
#     - fundos
#     - ações
#     - coe
#     - previdência
#     - tesouro
#     - mini índice
#     - mini dolar
#     - opções
#     - trader

In [27]:
df_movimentacao = pd.DataFrame(
    columns=[
        "saldo",
        "renda fixa",
        "fundos",
        "ações",
        "coe",
        "previdência",
        "tesouro",
        "mini índice",
        "mini dólar",
        "opções",
        "trader",
    ]
)

In [42]:
df_movimentacao.trader = generate_series( 'cat', 10, 100000, 15, ['sim', 'não'] )
df_movimentacao.saldo = generate_series( 'neg', 10, 99000, 15 )
df_movimentacao['renda fixa'] = generate_series( 'neg', 10, 91258, 10 )
df_movimentacao.fundos = generate_series( 'neg', 10, 41382, 25 )
df_movimentacao['ações'] = generate_series( 'neg', 100, 53970, 50 )
df_movimentacao.coe = generate_series( 'neg', 100, 23966, 15 )
df_movimentacao['previdência'] = generate_series( 'neg', 100, 21742, 15 )
df_movimentacao.tesouro = generate_series( 'neg', 1000, 76410, 15 )
df_movimentacao['mini índice'] = generate_series( 'neg', 1000, 6452, 15 )
df_movimentacao['mini dólar'] = generate_series( 'neg', 10000, 6236, 15 )
df_movimentacao['opções'] = generate_series( 'neg', 10000, 5270, 15 )
display(df_movimentacao.sample(5))
display(df_movimentacao.describe().T)

Unnamed: 0,saldo,renda fixa,fundos,ações,coe,previdência,tesouro,mini índice,mini dólar,opções,trader
29178,,,,,0.0,365.95,,,,,1
57926,,,0.0,155.46,,,,,,,0
15087,,,,,,,,,,,1
16712,,,,,,,,,,,0
64109,,,2.71,271.74,,,,,,,0


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
saldo,1000.0,0.09,0.11,0.0,0.0,0.01,0.17,0.33
renda fixa,8742.0,1.88,2.42,0.0,0.0,0.09,3.73,7.64
fundos,41382.0,710.12,2388.61,0.0,0.0,14.88,37.41,17764.66
ações,46030.0,6272.89,56330.91,0.0,0.0,211.59,381.87,539680.56
coe,23966.0,53242.45,187169.32,0.0,0.33,149.18,281.02,1474406.61
previdência,21742.0,59537.68,201594.15,0.0,0.0,258.84,463.44,1478952.54
tesouro,23590.0,576.26,732.83,0.0,0.0,6.75,1170.97,2234.0
mini índice,6452.0,3457208.57,11404548.14,0.0,0.0,3239.07,4488.63,81051647.1
mini dólar,6236.0,677265042.89,2266056638.94,0.0,2.07,21745.07,46954.84,14432005291.1
opções,5270.0,700110431.24,2419448922.21,0.0,248.62,24107.55,39146.86,14826349582.47
