In [130]:
import json
from pathlib import Path

import pandas as pd
import numpy as np
from datetime import datetime as datetime

In [133]:
input_filename='/users/jkimball/Desktop/CAX/all.txt'
csv_data=""
output_file="/tmp/real_output.csv"

In [134]:
csv_data = """
7970 Hello World Equity|114714|1000|300|
9057 JP Equity|157888|1000|300|
9058 JP Equity|157888|1000|300|
8054 JP Equity|114805|0|0|206076815|ACQUIS|U|RAC Electric Vehicles Inc|N.A.|N.A.|N.A.|N.A.|N.A.|20191112|20191112|20191129|BBG000BB6ZY1|BBG001FFF251|8053|JP|6|CP_FLAG|1|CP_TKR|8053 JP|CP_NAME|Sumitomo Corp|CP_ID_BB_COMP|114805|CP_UNIT|RAC Electric Vehicles Inc|CP_TOT_AMT|500.000000
8053 JP Equity|114805|0|0|206076815|ACQUIS|U|RAC Electric Vehicles Inc|N.A.|N.A.|N.A.|N.A.|N.A.|20191112|20191112|20191129|BBG000BB6ZY1|BBG001FFF251|8053|JP|6|CP_FLAG|1|CP_TKR|8053 JP|CP_NAME|Sumitomo Corp|CP_ID_BB_COMP|114805|CP_UNIT|RAC Electric Vehicles Inc|CP_TOT_AMT|500.000000
7887 JP Equity|157889|1000|300|
8058 Goodbye World Equity|114151|1000|300|
"""

In [135]:
import io
if len(csv_data) > 0:
    input_filename = io.StringIO(csv_data)

In [136]:
# Read BBG File

cols = [
      'BB_Code'
    , 'BB_Company_Id'
    , 'BB_Security_Id'
    , 'Rcode'
    , 'BB_Action_Id'
    , 'Mnemonic'
    , 'BB_Flag'
    , 'BB_Company_Name'
    , 'BB_Vendor_Id_Type'
    , 'BB_Vendor_Id'
    , 'BB_Currency'
    , 'Market_Sector_Desc'
    , 'BB_Unique'
    , 'Ann_Date'
    , 'Eff_Date'
    , 'Amd_Date'
    , 'BB_Global'
    , 'BB_Global_Company_Id'
    , 'BB_Security_Id_Desc'
    , 'BB_Feed_Source'
    , 'Nfields'
]

count = len(cols)

def field_dict_func(value):
    fields = value.split('|')[count:]
    return dict(zip(fields[::2], fields[1::2]))

def standard_field_func(value):
    fields = value.split('|')
    if len(fields) < count:
        fields += [''] * (count - len(fields))
    return tuple(fields[:count])

bbg = pd.DataFrame()

In [138]:
chunks = pd.read_csv(
    input_filename
    , sep = 'f~o~o~b~a~r~'
    , engine = 'python'
    , header = None
    , skip_blank_lines = True
    , iterator = True
    , chunksize = 10000
)

for chunk in chunks:
    df = pd.DataFrame()
    df = chunk[0].str.split('|', expand=True)
    for i in range(len(df.columns), count):
        df[i] = None
    df = df.iloc[:, 0:count]
    df.columns = cols
    df['field_dict'] = chunk[0].apply(field_dict_func)
    bbg = pd.concat([bbg, df], sort=False)

In [140]:
# Validate

check = bbg[~bbg['Nfields'].isnull()].apply(lambda r: int(r['Nfields']) == len(r['field_dict']), axis=1)
check = list(check[check == False].index)

if check:
    raise ValueError('Bloomberg Nfields value check failure on rows: {0}'.format(str(check)))    


In [142]:
bbg.shape

(7, 22)

In [144]:
bbg.to_csv(output_file, index=False, sep="|", escapechar=None)

In [145]:
try:
    if TESTING:
        import unittest

        class TestNotebook(unittest.TestCase):

            def test_validate_dividend(self):
                usd_div = bbg.loc[bbg.BB_Code == '8053 JP Equity']
                self.assertEqual(usd_div.Eff_Date.values[0], '20191112')

        rc = unittest.main(argv=[''], verbosity=2, exit=False)

        if (len(rc.result.failures)+len(rc.result.errors) > 0):
            raise Exception("Tests failed!")
except NameError:
    pass