In [1]:
from glob import glob
from ddl_parser import DDL2AlembicParser

## Tables

In [2]:
files_list = [
    'btg_fl_retailer_assgnt.txt',
    'btg_pbc_retailer_assgnt.txt',
    'btg_pbc_route_data.txt',
    'btg_pep_calendar.txt',
    'btg_ref_calendar.txt',
    'fl_cust.txt',
    'fl_prod.txt',
    'fl_sales_2018.txt',
    'fl_sales_2019.txt',
    'fl_sales_2020.txt',
    'fl_sales_2021.txt',
    'gsc_msa_demo.txt',
    'mdm_msa_fl_xref.txt',
    'mdm_msa_pbc_xref.txt',
    'new_pbc_mkt_loc_struct.txt',
    'pbc_cust.txt',
    'pbc_prod.txt',
    'pbc_sales_2019.txt',
    'pbc_sales_2020.txt',
    'pbc_sales_2021.txt',
]

In [3]:
tables_list = [file.split('.')[0] for file in files_list]
tables_list

['btg_fl_retailer_assgnt',
 'btg_pbc_retailer_assgnt',
 'btg_pbc_route_data',
 'btg_pep_calendar',
 'btg_ref_calendar',
 'fl_cust',
 'fl_prod',
 'fl_sales_2018',
 'fl_sales_2019',
 'fl_sales_2020',
 'fl_sales_2021',
 'gsc_msa_demo',
 'mdm_msa_fl_xref',
 'mdm_msa_pbc_xref',
 'new_pbc_mkt_loc_struct',
 'pbc_cust',
 'pbc_prod',
 'pbc_sales_2019',
 'pbc_sales_2020',
 'pbc_sales_2021']

In [4]:
len(tables_list)

20

## Convert: DDL File --> Alembic

In [5]:
# read the file
file_contents = open('sql/btg_all_tpt_tbls_hive_txt_ddl.hql').read()

# split into list of DDLs
ddl_list = file_contents.split(';')

# remove trailing '\n' entry if it exists
ddl_list = ddl_list[:-1] if ddl_list[-1] == '\n' else ddl_list

In [6]:
found_tables = [DDL2AlembicParser().convert(raw_table)[0] for raw_table in ddl_list if DDL2AlembicParser().convert(raw_table)[0] in tables_list]
found_tables

['fl_cust',
 'fl_prod',
 'fl_sales_2020',
 'fl_sales_2021',
 'fl_sales_2019',
 'fl_sales_2018',
 'pbc_cust',
 'pbc_prod',
 'pbc_sales_2020',
 'pbc_sales_2021',
 'pbc_sales_2019',
 'gsc_msa_demo',
 'mdm_msa_fl_xref',
 'mdm_msa_pbc_xref',
 'btg_pep_calendar',
 'btg_ref_calendar',
 'btg_pbc_route_data',
 'btg_fl_retailer_assgnt',
 'btg_pbc_retailer_assgnt']

In [7]:
len(found_tables)

19

### Missing tables 
Are there any tables whose DDL I can't find?

In [8]:
set(tables_list) - set(found_tables)

{'new_pbc_mkt_loc_struct'}

## Write Alembic notation to file

In [9]:
type(DDL2AlembicParser().convert(ddl_list[0]))

tuple

In [10]:
%%time
with open('alembic_file.py', 'w') as py_file:
    for idx, raw_table in enumerate(ddl_list):
        stripped_raw_table = raw_table.strip()
        
        try:
            table_name, formatted_table = DDL2AlembicParser().convert(stripped_raw_table)
        except:
            print(f"Failed on: \"{stripped_raw_table.strip()[:30]}\"")
            print('-'*50)
            continue
            
        if table_name in tables_list:
            print("#"*30,             file=py_file)
            print(f"#\t{table_name}", file=py_file)
            print("#"*30,             file=py_file)
            print(formatted_table,    file=py_file)
            
            print(f"wrote table:\t({idx+1})\t{table_name}")
        
        else:
            print(f"skipped table:\t({idx+1})\t{table_name}")
            
        print('-'*50)

wrote table:	(1)	fl_cust
--------------------------------------------------
wrote table:	(2)	fl_prod
--------------------------------------------------
wrote table:	(3)	fl_sales_2020
--------------------------------------------------
wrote table:	(4)	fl_sales_2021
--------------------------------------------------
wrote table:	(5)	fl_sales_2019
--------------------------------------------------
skipped table:	(6)	fl_sales_2017
--------------------------------------------------
wrote table:	(7)	fl_sales_2018
--------------------------------------------------
wrote table:	(8)	pbc_cust
--------------------------------------------------
wrote table:	(9)	pbc_prod
--------------------------------------------------
wrote table:	(10)	pbc_sales_2020
--------------------------------------------------
wrote table:	(11)	pbc_sales_2021
--------------------------------------------------
wrote table:	(12)	pbc_sales_2019
--------------------------------------------------
skipped table:	(13)	pbc_sales_