In [4]:
import pandas as pd
import random

In [33]:
def gerar_dataframe_transacoes(num_rows, sender_pool, receiver_pool, amount_range, date_range):
  """Gera um DataFrame simulando transações.

  Args:
    num_rows: Número de linhas (transações) a serem geradas.
    sender_pool: Lista de possíveis remetentes.
    receiver_pool: Lista de possíveis destinatários.
    amount_range: Tupla indicando o intervalo mínimo e máximo do valor da transação.
    date_range: Tupla indicando a data inicial e final do intervalo de datas.

  Returns:
    Um DataFrame Pandas com as colunas 'sender', 'receiver', 'amount' e 'timestamp'.
  """

  # Gerando dados aleatórios
  senders = [random.choice(sender_pool) for _ in range(num_rows)]
  receivers = [random.choice(receiver_pool) for _ in range(num_rows)]
  amounts = [random.randint(*amount_range) for _ in range(num_rows)]
  timestamps = pd.date_range(start=date_range[0], periods=num_rows, freq='D')
  fraud_label = [random.randint(0, 1) for _ in range(num_rows)]

  # Criando o DataFrame
  df = pd.DataFrame({
      'sender': senders,
      'receiver': receivers,
      'amount': amounts,
      'timestamp': timestamps,
      'fraud_label': fraud_label
  })

  df = df.loc[df.sender != df.receiver]

  return df


In [6]:

# Exemplo de uso:
num_rows = 100
sender_pool = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
receiver_pool = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
amount_range = (100, 5000)
date_range = ('2023-01-01', '2023-12-31')



In [34]:
df = gerar_dataframe_transacoes(num_rows, sender_pool, receiver_pool, amount_range, date_range)


In [37]:
df.loc[df.sender != df.receiver]

Unnamed: 0,sender,receiver,amount,timestamp,fraud_label
0,F,B,3091,2023-01-01,0
1,G,J,3958,2023-01-02,0
2,J,H,2590,2023-01-03,1
3,C,H,1684,2023-01-04,1
4,J,E,3141,2023-01-05,1
...,...,...,...,...,...
94,I,F,1947,2023-04-05,1
95,B,I,4222,2023-04-06,0
96,I,C,3190,2023-04-07,1
97,G,J,1969,2023-04-08,0
