# Create sample transaction files to test trade finding methods
----

In this notebook we want to:
- Create trade reports for some sample companies.
- They should include: transaction timestamp (second level accuracy), traded instrument identifier, the price, the quantity, and identifiers for the counterparties (buyer and seller) involved in the trade.
- We need methods to create some sample files (csv format).
- We need methods to make imperfections in the files too.

In [1]:
# import libs
import pandas as pd
import numpy as np
import random
import os
import warnings
warnings.filterwarnings("ignore")
from datetime import timedelta, time, datetime

In [2]:
def get_different_co(name, dict_input):
    """
    Get a different trading company name than the one specified.
    """
    names = set(dict_input.values())
    new_name = list(names - set([name]))[random.randint(0, len(names) - 2)]
    return new_name
    

In [3]:
trading_companies = {
    1: "Acme Trading Inc",
    2: "Joe Bloggs Investments",
    3: "Trotters Independent Traders",
    4: "Another Company",
    5: "Glengarry Glen Ross"
}

instrument_identifiers = {
    1: "GB-01234567-01",
    2: "GB-01234567-02",
    3: "GB-01234567-03",
    4: "GB-01234567-04",
    5: "GB-01234567-05"
}

In [10]:
def create_sample_transaction_file(row_num, folder_name, 
                                   trading_companies=trading_companies,
                                   instrument_identifiers=instrument_identifiers):
    """
    Function to create a sample transactions file of a specified 
    number of rows.
    
    inputs:
    -------
    row_num (int): the number of rows of the created file.
    folder_name (string): where the file will be saved.
    
    outputs:
    --------
    None
    
    The file will be saved in the specified folder.
    """
    
    row_series = np.arange(0, row_num)
    
    df = pd.DataFrame(row_series)
    df["timestamp"] = df.apply(lambda x: datetime(2022, 12, random.randint(1,30),
         random.randint(0, 23), random.randint(0, 59),
         random.randint(0, 59), 0).isoformat(timespec='seconds'), axis=1)
    df["buyer"] = df.apply(lambda x: trading_companies[random.randint(1,5)], axis=1)
    df["seller"] = df.buyer.apply(lambda x: get_different_co(x, trading_companies))
    df["instrument"] = df.apply(lambda x: instrument_identifiers[random.randint(1,5)], axis=1)
    df["quantity"] = df.apply(lambda x: random.randint(1,9) * 10000, axis=1)
    df["price"] = df.apply(lambda x: random.randint(1,9) / 100, axis=1)
    
    for company in trading_companies.values():
        print(f"processing:  {company}")

        seller_df = df[df.buyer == company]
        seller_df['trade'] = "bought from"
        seller_df['counterparty'] = seller_df.seller
        seller_df = seller_df.drop(["buyer", "seller"], axis=1)

        buyer_df = df[df.seller == company]
        buyer_df['trade'] = "sold to"
        buyer_df['counterparty'] = buyer_df.buyer
        buyer_df = buyer_df.drop(["buyer", "seller"], axis=1)

        output_df = pd.concat([seller_df, buyer_df])
        
        output_df = output_df.sort_values("timestamp")
        
        output_df = output_df.drop([0], axis=1)
        
        output_df.to_csv(f"{folder_name}/{company}_trades_dec_2022.csv", index=False)
    
    return df
    

In [11]:
%%time
sample_df = create_sample_transaction_file(100, "sample_data")

processing:  Acme Trading Inc
processing:  Joe Bloggs Investments
processing:  Trotters Independent Traders
processing:  Another Company
processing:  Glengarry Glen Ross
CPU times: total: 62.5 ms
Wall time: 65.4 ms


In [12]:
sample_df.sort_values("timestamp")

Unnamed: 0,0,timestamp,buyer,seller,instrument,quantity,price
6,6,2022-12-01T11:35:21,Joe Bloggs Investments,Trotters Independent Traders,GB-01234567-01,20000,0.01
0,0,2022-12-01T13:29:59,Another Company,Acme Trading Inc,GB-01234567-05,30000,0.08
85,85,2022-12-02T15:56:14,Joe Bloggs Investments,Trotters Independent Traders,GB-01234567-03,90000,0.09
76,76,2022-12-02T18:19:09,Acme Trading Inc,Another Company,GB-01234567-02,50000,0.02
46,46,2022-12-02T21:05:49,Another Company,Acme Trading Inc,GB-01234567-05,90000,0.09
...,...,...,...,...,...,...,...
72,72,2022-12-29T07:56:12,Trotters Independent Traders,Joe Bloggs Investments,GB-01234567-05,90000,0.04
73,73,2022-12-29T21:33:25,Trotters Independent Traders,Acme Trading Inc,GB-01234567-01,80000,0.08
52,52,2022-12-30T01:51:13,Glengarry Glen Ross,Trotters Independent Traders,GB-01234567-02,30000,0.03
97,97,2022-12-30T07:24:40,Acme Trading Inc,Glengarry Glen Ross,GB-01234567-05,50000,0.07
