# Hermitage I5 migration notebook

This notebook app will run the ETL process to migrate Heremitage's I5 to a new Odoo 15 system.
It will read csv/tsv files from the input_csv_files, make the necessary transformations to make it importable in Odoo 15 and then load the data it into Hermitage's new Odoo 15 instance.

### Imports
- pandas: to make transformations on the data
- Models' migration config
- Import function

In [1]:
import pandas

from models_migration_config import models_migration_config

from import_functions import import_data, import_ignored_fields

INPUT_CSV_FILES_PATH = 'input_csv_files/'


**Transform Vendors from I5**

In [None]:
def separate_address_into_different_columns():
    vendors_file_path = f'{INPUT_CSV_FILES_PATH}APMASTER - Vendors.tsv'
    vendors_dataframe = pandas.read_csv(vendors_file_path, sep='\t')

    # AMVEND	AMNAME	AMADR1	AMADR2	AMADR3	AMTELE	AMFRGT	AMTERM
    # 1	BROAN MFG OR EDN              	4641 PAYSPHERE CIRCLE    	CHICAGO,IL 60674         	                         	8778470145	1250	2%10TH 
    # 172	HUNTER FAN COMPANY            	P O BOX 19773            	                         	PALATINE IL 60055-9773   	9017441200	1000	1%2OTH 
    # 10003	LEDVANCE OR IMARK             	P O BOX 72524            	(OSRAM)                  	CLEVELAND OH 44192       	8002555042	1000	2%90D  
    # 10004	ADVANCE TRANSFORMER CO        	P O BOX 100332           	ATLANTA GA  30384        	                         	0	750	2%10TH 
    # 10005	LUTRON ELECTRONICS CO INC     	P O BOX 644396           	                         	PITTSBURGH, PA 15264-4396	8005239466	500	1%10TH 

    col_names = ['AMVEND', 'AMNAME', 'AMADR1', 'AMADR2', 'AMADR3', 'AMTELE', 'AMFRGT', 'AMTERM']
    for col_name in col_names:
        vendors_dataframe[col_name] = vendors_dataframe[col_name].astype(str)
        vendors_dataframe[col_name] = vendors_dataframe[col_name].str.strip()

    #Change AMADR1 column to the end for better visibility
    vendors_dataframe = vendors_dataframe[['AMVEND', 'AMNAME', 'AMADR2', 'AMADR3', 'AMTELE', 'AMFRGT', 'AMTERM', 'AMADR1']]
    vendors_dataframe.rename(columns={'AMADR1': 'street'}, inplace=True)

    vendors_dataframe['street2'] = ''
    vendors_dataframe['city'] = ''
    vendors_dataframe['state_id'] = ''
    vendors_dataframe['zip'] = ''

    import re
    def is_zip(zip_code : str) -> bool:
        """
        >>> is_zip('6067')
        True
        >>> is_zip('60674')
        True
        >>> is_zip('60674-1234')
        True
        """
        return bool(re.match(r'^\d{5}(-\d{4})?$', zip_code)) or bool(re.match(r'^\d{4}', zip_code))

    us_states = {
        'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY',
        'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
        'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'
    }

    def is_a_state(state : str) -> bool:
        return state in us_states

    def clean_address_field(address_field : str) -> str:
        address_field = address_field.strip()
        address_field = address_field.replace('     ', ' ')
        address_field = address_field.replace('    ', ' ')
        address_field = address_field.replace('   ', ' ')
        address_field = address_field.replace('  ', ' ')
        address_field = address_field.replace(',', ' ')
        address_field = address_field.replace('  ', ' ')
        return address_field

    for row_index, row in vendors_dataframe.iterrows():
        row_address2 = row.AMADR2 or ''
        row_address2 = clean_address_field(row_address2)
        row_address3 = row.AMADR3 or ''
        row_address3 = clean_address_field(row_address3)

        if row_address3:
            row.street2 = row_address2
            city_state_zip = row_address3
        else:
            city_state_zip = row_address2
        if city_state_zip:
            city_state_zip_splitted = city_state_zip.split(' ')[::-1]

            for i, address_field in enumerate(city_state_zip_splitted):
                if is_zip(address_field):
                    row.zip = address_field
                elif is_a_state(address_field):
                    row.state_id = address_field
                else:
                    row.city = ' '.join(city_state_zip_splitted[i:][::-1])
                    break
            if not row.zip or not row.state_id or not row.city and row.street2:
                city_state_zip_splitted = row.street2.split(' ')[::-1]
                for i, address_field in enumerate(city_state_zip_splitted):
                    if not row.zip and is_zip(address_field):
                        row.zip = address_field
                    elif not row.state_id and is_a_state(address_field):
                        row.state_id = address_field
                    elif not row.city:
                        row.city = ' '.join(city_state_zip_splitted[i:][::-1])
                        break
    vendors_dataframe.to_csv(f'{GENERATED_CSV_FILES_PATH}i5.vendors.csv', index=False, sep=',')


separate_address_into_different_columns()