# ETLs

## Imports

In [776]:
import pandas as pd
import sqlite3
import pyodbc

select_tables = "SELECT name FROM sqlite_master WHERE type='table'"

sales_con = sqlite3.connect("go_sales.sqlite")
sales_tables = pd.read_sql_query(select_tables, sales_con)

sales_country       = pd.read_sql_query("SELECT * FROM country;", sales_con)
order_details       = pd.read_sql_query("SELECT * FROM order_details;", sales_con)
order_header        = pd.read_sql_query("SELECT * FROM order_header;", sales_con)
order_method        = pd.read_sql_query("SELECT * FROM order_method;", sales_con)
product             = pd.read_sql_query("SELECT * FROM product;", sales_con)
product_line        = pd.read_sql_query("SELECT * FROM product_line;", sales_con)
product_type        = pd.read_sql_query("SELECT * FROM product_type;", sales_con)
sales_retailer_site = pd.read_sql_query("SELECT * FROM retailer_site;", sales_con)
return_reason       = pd.read_sql_query("SELECT * FROM return_reason;", sales_con)
returned_item       = pd.read_sql_query("SELECT * FROM returned_item;", sales_con)
sales_branch        = pd.read_sql_query("SELECT * FROM sales_branch;", sales_con)
sales_staff         = pd.read_sql_query("SELECT * FROM sales_staff;", sales_con)
SALES_TARGETData    = pd.read_sql_query("SELECT * FROM SALES_TARGETData;", sales_con)
sqlite_sequence     = pd.read_sql_query("SELECT * FROM sqlite_sequence;", sales_con)
print("Imported sales tables")

staff_con = sqlite3.connect("go_staff.sqlite")
staff_tables = pd.read_sql_query(select_tables, staff_con)

course            = pd.read_sql_query("SELECT * FROM course;", staff_con)
sales_branch      = pd.read_sql_query("SELECT * FROM sales_branch;", staff_con)
sales_staff       = pd.read_sql_query("SELECT * FROM sales_staff;", staff_con)
satisfaction      = pd.read_sql_query("SELECT * FROM satisfaction;", staff_con)
satisfaction_type = pd.read_sql_query("SELECT * FROM satisfaction_type;", staff_con)
training          = pd.read_sql_query("SELECT * FROM training;", staff_con)
print("Imported staff tables")

crm_con = sqlite3.connect("go_crm.sqlite")
crm_tables = pd.read_sql_query(select_tables, crm_con)
                           
age_group             = pd.read_sql_query("SELECT * FROM age_group;", crm_con)
crm_country           = pd.read_sql_query("SELECT * FROM country;", crm_con)
retailer              = pd.read_sql_query("SELECT * FROM retailer;", crm_con)
retailer_contact      = pd.read_sql_query("SELECT * FROM retailer_contact;", crm_con)
retailer_headquarters = pd.read_sql_query("SELECT * FROM retailer_headquarters;", crm_con)
retailer_segment      = pd.read_sql_query("SELECT * FROM retailer_segment;", crm_con)
crm_retailer_site     = pd.read_sql_query("SELECT * FROM retailer_site;", crm_con)
retailer_type         = pd.read_sql_query("SELECT * FROM retailer_type;", crm_con)
sales_demographic     = pd.read_sql_query("SELECT * FROM sales_demographic;", crm_con)
sales_territory       = pd.read_sql_query("SELECT * FROM sales_territory;", crm_con)
print("Imported crm tables")

inventory_level = pd.read_csv("GO_SALES_INVENTORY_LEVELSData.csv")
print("Imported inventory table")

sales_forecast = pd.read_csv("GO_SALES_PRODUCT_FORECASTData.csv")
print("Imported sales product forecast table")

Imported sales tables
Imported staff tables
Imported crm tables
Imported inventory table
Imported sales product forecast table


## SQL Server connection

In [777]:
servername = 'DESKTOP-9F8A8PF\\MSSQLSERVER01'
database = 'Datawarehouse'

sql_server_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={servername};DATABASE={database};Trusted_Connection=yes")
cursor = sql_server_conn.cursor()

try:
    cursor.execute("SELECT * FROM Test")
    test = cursor.fetchall()
    print(test)
except pyodbc.Error as e:
    print(e)

[(1, 'test      '), (2, 'test2     ')]


In [778]:
"""
Flexible method to merge two tables
- NaN values of one dataframe can be filled by the other dataframe
- Uses all available columns
- Errors when a row of the two dataframes doesn't match (df1 has 'A' and df2 has 'B' in row)
"""
def merge_tables(df1, df2, index_col):
    # Ensure 'CODE' is set as the index for both DataFrames
    if index_col not in df1.columns or index_col not in df2.columns:
        raise KeyError(f"{index_col} must be a column in both DataFrames.")
    
    df1 = df1.set_index(index_col)
    df2 = df2.set_index(index_col)

    # Identify common and exclusive columns
    common_columns = df1.columns.intersection(df2.columns)
    exclusive_df1 = df1.columns.difference(df2.columns)
    exclusive_df2 = df2.columns.difference(df1.columns)

    # Concatenate exclusive columns from each DataFrame onto the other
    df1_combined = pd.concat([df1, df2[exclusive_df2]], axis=1, sort=False)
    df2_combined = pd.concat([df2, df1[exclusive_df1]], axis=1, sort=False)

    # Resolve common columns with nulls and conflicts
    for col in common_columns:
        # Align the Series from both DataFrames for comparison
        series1, series2 = df1_combined[col].align(df2_combined[col])

        # Check for conflicts (non-null values that do not match)
        conflict_mask = (~series1.isnull() & ~series2.isnull() & (series1 != series2))
        if conflict_mask.any():
            raise ValueError(f"Merge failed due to conflict in column '{col}'")

        # Use values from df2 where df1 is null (prioritizing df1 values)
        df1_combined[col] = series1.combine_first(series2)

    return df1_combined

# Merge duplicate tables into single table
retailer_site = merge_tables(sales_retailer_site, crm_retailer_site, 'RETAILER_SITE_CODE')
# Column name mismatch
sales_country = sales_country.rename(columns={'COUNTRY': 'COUNTRY_EN'})
country = merge_tables(sales_country, crm_country, 'COUNTRY_CODE')

## Rename Dictionary

In [779]:
rename_mapping = {
    'ACTIVE_INDICATOR': 'ACTIVE_INDICATOR_bool',
    'ADDRESS1': 'ADDRESS1_address',
    'ADDRESS2': 'ADDRESS2_address',
    'CITY': 'CITY_name',
    'COMPANY_NAME': 'COMPANY_name',
    'COUNTRY_CODE': 'COUNTRY_id',
    'COUNTRY_EN': 'COUNTRY_name',
    'COURSE_CODE': 'COURSE_id',
    'COURSE_DESCRIPTION': 'COURSE_description',
    'COUNTRY_LANGUAGE_code': 'COUNTRY_LANGUAGE_code',
    'CURRENCY_NAME': 'CURRENCY_name',
    'DATE_HIRED': 'DATE_HIRED_date',
    'DESCRIPTION': 'PRODUCT_description',
    'EMAIL': 'EMAIL_address',
    'EXPECTED_VOLUME': 'EXPECTED_VOLUME_volume',
    'EXTENSION': 'EXTENSION_number',
    'E_MAIL': 'EMAIL_address',
    'FAX': 'FAX_phone',
    'FIRST_NAME': 'FIRST_NAME_name',
    'FLAG_IMAGE': 'FLAG_image',
    'GENDER': 'GENDER_char',
    'INTRODUCTION_DATE': 'PRODUCT_INTRODUCTION_DATE_date',
    'JOB_POSITION_EN': 'JOB_POSITION_name',
    'LANGUAGE': 'LANGUAGE_name',
    'LAST_NAME': 'LAST_NAME_name',
    'MANAGER_CODE': 'MANAGER_code',
    'MARGIN': 'PRODUCT_MARGIN_percentage',
    'MONTH': 'MONTH_number',
    'ORDER_DATE': 'ORDER_DATE_date',
    'ORDER_DETAIL_CODE': 'ORDER_DETAIL_id',
    'ORDER_METHOD_CODE': 'ORDER_METHOD_id',
    'ORDER_METHOD_EN': 'ORDER_METHOD_name',
    'ORDER_NUMBER': 'ORDER_number',
    'PHONE': 'PHONE_phone',
    'POSITION_EN': 'POSITION_name',
    'POSTAL_ZONE': 'POSTAL_ZONE_code',
    'PRODUCTION_COST': 'PRODUCT_PRODUCTION_COST_money',
    'PRODUCT_IMAGE': 'PRODUCT_image',
    'PRODUCT_LINE_CODE': 'PRODUCT_LINE_code',
    'PRODUCT_LINE_EN': 'PRODUCT_LINE_name',
    'PRODUCT_NAME': 'PRODUCT_name',
    'PRODUCT_NUMBER': 'PRODUCT_id',
    'QUANTITY': 'QUANTITY_number',
    'REGION': 'REGION_name',
    'RETAILER_CODE': 'RETAILER_id',
    'RETAILER_CODEMR': 'RETAILER_MR_id',
    'RETAILER_CONTACT_CODE': 'RETAILER_CONTACT_id',
    'RETAILER_NAME': 'RETAILER_name',
    'RETAILER_SITE_CODE': 'RETAILER_SITE_code',
    'RETAILER_TYPE_CODE': 'RETAILER_TYPE_code',
    'RETAILER_TYPE_EN': 'RETAILER_TYPE_name',
    'RETURN_CODE': 'RETURN_code',
    'RETURN_DATE': 'RETURN_DATE_date',
    'RETURN_DESCRIPTION_EN': 'RETURN_REASON_description',
    'RETURN_QUANTITY': 'RETURN_QUANTITY_number',
    'RETURN_REASON_CODE': 'RETURN_REASON_code',
    'SALES_BRANCH_CODE': 'SALES_BRANCH_code',
    'SALES_STAFF_CODE': 'SALES_STAFF_code',
    'SALES_TERRITORY_CODE': 'SALES_TERRITORY_code',
    'SATISFACTION_TYPE_CODE': 'SATISFACTION_TYPE_id',
    'SATISFACTION_TYPE_DESCRIPTION': 'SATISFACTION_TYPE_description',
    'SEGMENT_CODE': 'SEGMENT_code',
    'SEGMENT_LANGUAGE_code': 'SEGMENT_LANGUAGE_code',
    'TERRITORY_NAME_EN': 'TERRITORY_name',
    'UNIT_COST': 'UNIT_COST_money',
    'UNIT_PRICE': 'UNIT_PRICE_money',
    'UNIT_SALE_PRICE': 'UNIT_SALE_PRICE_money',
    'WORK_PHONE': 'WORK_PHONE_phone',
    'YEAR': 'YEAR_number'
}

# List of all vetted columns
valid_columns = list(rename_mapping.values())

# Filters out all columns of dataframe that aren't typed
def filterColumns(dataframe):
    valid_columns_set = set(valid_columns)
    actual_columns_set = set(dataframe.columns)
    intersection_columns = list(actual_columns_set.intersection(valid_columns_set))

    # Use the intersection result to filter columns from dataframe
    return dataframe[intersection_columns]

# Filters out all columns of dataframe that aren't typed
def excludeColumns(dataframe, column_names):
    return dataframe[dataframe.columns.difference(column_names)]

def sizeCheck(dataframe, expected_column_count):
    actual_column_count = len(dataframe.columns)
    if actual_column_count == expected_column_count:
        print(f'Table has {expected_column_count} columns')
    else:
        raise Exception(f'Table has {actual_column_count} columns, expected {expected_column_count}')


column_types = {
    'name': 'VARCHAR(80)',
    'image': 'VARCHAR(60)',
    'id': 'INT',
    'description': 'NTEXT',
    'money': 'DECIMAL(19,4)',
    'percentage': 'DECIMAL(12,12)',
    'date': 'a',
    'code': 'a',
    'char': 'CHAR(1)',
    'number': 'INT',
    'phone': 'VARCHAR(30)',
    'address': 'VARCHAR(80)',
    'bool': 'BIT',
    'volume': 'a'
}

def getTypes():
    types = {}
    for column in rename_mapping.values():
        column_type = column.rsplit('_', 1)[1]
        types[column_type] = ''
    return types

column_types

{'name': 'VARCHAR(80)',
 'image': 'VARCHAR(60)',
 'id': 'INT',
 'description': 'NTEXT',
 'money': 'DECIMAL(19,4)',
 'percentage': 'DECIMAL(12,12)',
 'date': 'a',
 'code': 'a',
 'char': 'CHAR(1)',
 'number': 'INT',
 'phone': 'VARCHAR(30)',
 'address': 'VARCHAR(80)',
 'bool': 'BIT',
 'volume': 'a'}

## Product ETL

In [780]:
product_etl = pd.merge(product, product_type, on="PRODUCT_TYPE_CODE")
product_etl = pd.merge(product_etl, product_line, on="PRODUCT_LINE_CODE")\
    [[
        "PRODUCT_NAME", "PRODUCT_IMAGE", "PRODUCT_NUMBER",
        "DESCRIPTION", "LANGUAGE", "PRODUCTION_COST",
        "MARGIN", "INTRODUCTION_DATE", "PRODUCT_LINE_CODE",
        "PRODUCT_LINE_EN"
    ]]\
    .rename(columns=rename_mapping)

product_etl

Unnamed: 0,PRODUCT_name,PRODUCT_image,PRODUCT_id,PRODUCT_description,LANGUAGE_name,PRODUCT_PRODUCTION_COST_money,PRODUCT_MARGIN_percentage,PRODUCT_INTRODUCTION_DATE_date,PRODUCT_LINE_code,PRODUCT_LINE_name
0,TrailChef Water Bag,P01CE1CG1.jpg,1,"Lightweight, collapsible bag to carry liquids ...",EN,4,.33,15-2-2011,1,Camping Equipment
1,TrailChef Utensils,P10CE1CG1.jpg,10,"Spoon, fork and knife set made of a light yet ...",EN,10,.4,15-2-2011,1,Camping Equipment
2,Insect Bite Relief,P100OP4FA17.jpg,100,The Insect Bite Relief helps the itching and s...,EN,3,.5,15-2-2011,4,Outdoor Protection
3,Hailstorm Steel Irons,P101GE5IR18.jpg,101,Iron is 17-4 stainless steel. Shafts are grap...,EN,305.54,.43,15-12-2019,5,Golf Equipment
4,Hailstorm Titanium Irons,P102GE5IR18.jpg,102,Made entirely of pure titanium. The ultimate i...,EN,380.95,.51,10-12-2019,5,Golf Equipment
...,...,...,...,...,...,...,...,...,...,...
110,Sun Shield,P91OP4SS16.jpg,95,"PABA free sunscreen, SPF 30, poison oak and iv...",EN,3,.5,15-2-2011,4,Outdoor Protection
111,Compact Relief Kit,P96OP4FA17.jpg,96,A personal first aid kit is recommended for ev...,EN,16.43,.28,15-2-2011,4,Outdoor Protection
112,Deluxe Family Relief Kit,P96OP4FA17.jpg,97,A complete medical kit suitable for families w...,EN,25,.28,5-3-2013,4,Outdoor Protection
113,Calamine Relief,P98OP4FA17.jpg,98,Use the Calamine Relief for allergic skin reac...,EN,3,.5,15-2-2011,4,Outdoor Protection


## Sales Staff ETL

## Satisfaction type ETL

In [782]:
satisfaction_type_etl = satisfaction_type.rename(columns=rename_mapping)

satisfaction_type_etl = filterColumns(satisfaction_type_etl)

sizeCheck(satisfaction_type_etl,2)
satisfaction_type_etl

Table has 2 columns


Unnamed: 0,SATISFACTION_TYPE_description,SATISFACTION_TYPE_id
0,Not satisfied,1
1,Less than satisfied,2
2,Satisfied,3
3,Very Satisfied,4
4,More than satisfied,5


## Course ETL

In [783]:
course_etl = course.rename(columns=rename_mapping)

course_etl = filterColumns(course_etl)

sizeCheck(course_etl,2)
course_etl

Table has 2 columns


Unnamed: 0,COURSE_description,COURSE_id
0,GO Orientation,1
1,GO Communication,2
2,GO Sales 1,3
3,GO Sales 2,4
4,GO Marketing 1,5
5,GO Marketing 2,6
6,GO Marketing 3,7
7,GO Management 1,8
8,GO Management 2,9


## Sales Forecast ETL

In [765]:
sales_forecast_etl = sales_forecast[["YEAR", "MONTH", "EXPECTED_VOLUME"]]\
        .rename(columns=rename_mapping)

sales_forecast_etl = filterColumns(sales_forecast_etl)

sizeCheck(sales_forecast_etl,3)
sales_forecast_etl

Table has 3 columns


Unnamed: 0,MONTH_number,EXPECTED_VOLUME_volume,YEAR_number
0,12,383,2022
1,1,80,2021
2,2,51,2021
3,3,214,2021
4,4,300,2021
...,...,...,...
3867,8,282,2022
3868,9,920,2022
3869,10,1081,2022
3870,11,398,2022


## Retailer Contact ETL

In [762]:
retailer_contact_etl = pd.merge(retailer_contact, retailer_site, on='RETAILER_SITE_CODE')
retailer_contact_etl = pd.merge(retailer_contact_etl, country, on='COUNTRY_CODE')
retailer_contact_etl = pd.merge(retailer_contact_etl, sales_territory, on='SALES_TERRITORY_CODE')\
    .rename(columns=rename_mapping)

retailer_contact_etl = filterColumns(retailer_contact_etl)

sizeCheck(retailer_contact_etl,23)
retailer_contact_etl

Table has 23 columns


Unnamed: 0,RETAILER_CONTACT_id,EXTENSION_number,ADDRESS2_address,RETAILER_id,RETAILER_SITE_code,CITY_name,LAST_NAME_name,GENDER_char,JOB_POSITION_name,LANGUAGE_name,...,REGION_name,TERRITORY_name,FLAG_image,EMAIL_address,ACTIVE_INDICATOR_bool,COUNTRY_name,POSTAL_ZONE_code,ADDRESS1_address,FIRST_NAME_name,CURRENCY_name
0,10,2489,Bureau 1061,93,14,Montréal,Smith,M,Chief Purchaser,EN,...,Québec,Americas,F04,FSmith@legolfeurinc.com,1,Canada,H2Y 2W2,"500, Place d'Armes",Frank,dollars
1,100,,,104,85,Las Vegas,Yates,F,Stock Manager,EN,...,Nevada,Americas,F03,Y2883@emertxe.com,1,United States,89118,738 Stoney Road,Amanda,dollars
2,101,,,104,84,Carson City,Schmidt,F,Assistant Purchaser,EN,...,Nevada,Americas,F03,S8832@emertxe.com,1,United States,89763,7543 South Carson Street,Paula,dollars
3,102,445,,104,88,Houston,Tao,M,District Manager,EN,...,Texas,Americas,F03,T8839@emertxe.com,1,United States,77112,83 Nirson Road,Micheal,dollars
4,103,,,105,92,Miami,Murphy,M,District Manager,EN,...,Florida,Americas,F03,Murphy@eyedimensions3.com,0,United States,33021,1809 Tusalane Avenue,Jack,dollars
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,95,,P.O. Box 4390,104,87,Chattanooga,Gomes,F,Site Manager,EN,...,Tennessee,Americas,F03,G1289@emertxe.com,1,United States,37405-0955,1210 Somerville Avenue,Jennifer,dollars
387,96,,,104,91,Charleston,Moon,M,Site Assistant Manager,EN,...,West Virginia,Americas,F03,M0019@emertxe.com,1,United States,25301-1299,1733 Christopher Street,Timothy,dollars
388,97,,,104,90,Seattle,Fischer,M,Stock Manager,EN,...,Washington,Americas,F03,F9943@emertxe.com,1,United States,98154,17432 Aurora Ave. N.,Paul,dollars
389,98,,,104,89,Alexandria,Wilkes,M,Site Assistant Manager,EN,...,Virginia,Americas,F03,W3219@emertxe.com,1,United States,22323,628 Prince Street,Derek,dollars


## Retailer ETL

In [763]:
# Merge tables
retailer_etl = pd.merge(retailer, retailer_headquarters, on='RETAILER_CODEMR')
retailer_etl = pd.merge(retailer_etl, retailer_type, on='RETAILER_TYPE_CODE')

# Merge and rename language columns
retailer_etl = pd.merge(retailer_etl, retailer_segment, on='SEGMENT_CODE').rename(columns={'LANGUAGE':'SEGMENT_LANGUAGE_code'})
retailer_etl = pd.merge(retailer_etl, country, on='COUNTRY_CODE').rename(columns={'LANGUAGE':'COUNTRY_LANGUAGE_code'})

# Remove columns early due to naming conflicts
retailer_etl = excludeColumns(retailer_etl, ['TRIAL219','TRIAL222_x','TRIAL222_y','TRIAL222'])

# Rename columns
retailer_etl = pd.merge(retailer_etl, sales_territory, on='SALES_TERRITORY_CODE')\
    .rename(columns=rename_mapping)

# Exclude invalid columns
retailer_etl = filterColumns(retailer_etl)

sizeCheck(retailer_etl,22)
retailer_etl

Table has 22 columns


Unnamed: 0,COUNTRY_LANGUAGE_code,SEGMENT_code,ADDRESS2_address,RETAILER_id,CITY_name,COMPANY_name,SALES_TERRITORY_code,COUNTRY_id,FAX_phone,REGION_name,...,SEGMENT_LANGUAGE_code,FLAG_image,RETAILER_TYPE_name,COUNTRY_name,POSTAL_ZONE_code,RETAILER_name,ADDRESS1_address,RETAILER_TYPE_code,RETAILER_MR_id,CURRENCY_name
0,EN,3,,100,Lincoln,Golf Masters,1,3,1 (402) 475-4717,Nebraska,...,EN,F03,Golf Shop,United States,68538,Golf Masters,2845 South Second Street,1,70,dollars
1,EN,5,,101,New York,The Marketplace,1,3,1 (212) 477-9716,New York,...,EN,F03,Department Store,United States,10039,The Marketplace,1902 Lancaster Street,2,71,dollars
2,EN,7,P.O. Box 224,102,Hanover,Camping Equipment Online,1,3,1 (603) 448-6412,New Hampshire,...,EN,F03,Direct Marketing,United States,03761,Camping Equipment Online,332 South Main Street,3,72,dollars
3,EN,9,,103,Portland,Tamarack Outfitter Rentals,1,3,1 (503) 285-0894,Oregon,...,EN,F03,Equipment Rental Store,United States,97295,Tamarack Outfitter Rentals,582 NE Tomahawk Island Drive,5,73,dollars
4,EN,2,,104,Houston,Extreme Outdoors,1,3,1 (713) 524-3215,Texas,...,EN,F03,Outdoors Shop,United States,77112,Extreme Outdoors,83 Nirson Road,6,74,dollars
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,EN,3,Bureau 1061,93,Montréal,Le Golfeur,1,4,1 (514) 741-1610,Québec,...,EN,F04,Golf Shop,Canada,H2Y 2W2,Le Golfeur,"500, Place d'Armes",1,8,dollars
83,EN,1,,95,Ottawa,Falcon Outfitters,1,4,1 (613) 740-6701,Ontario,...,EN,F04,Outdoors Shop,Canada,K1G 4Z0,Falcon Outfitters,9755 Riverside Drive,6,5,dollars
84,EN,12,,96,Toronto,Maximum Sports,1,4,1 (416) 491-8738,Ontario,...,EN,F04,Sports Store,Canada,M8Y 4K8,Maximum Sports,"Suite 12, 801 Dundas Street",8,6,dollars
85,EN,8,,97,Toronto,Act'N'Up Fitness,1,4,1 (416) 496-5066,Ontario,...,EN,F04,Warehouse Store,Canada,M2P 4F6,Act'N'Up Fitness,"87, 2399 Sommerset Avenue",4,7,dollars


## Order ETL

In [764]:
order_etl = pd.merge(order_header, order_method, on='ORDER_METHOD_CODE').rename(columns=rename_mapping)

order_etl = excludeColumns(order_etl, ['RETAILER_SITE_code', 'SALES_BRANCH_code'])
order_etl = filterColumns(order_etl)

sizeCheck(order_etl,7)
order_etl

Table has 7 columns


Unnamed: 0,ORDER_METHOD_name,RETAILER_name,ORDER_METHOD_id,RETAILER_CONTACT_id,ORDER_DATE_date,ORDER_number,SALES_STAFF_code
0,Sales visit,Ultra Sports,7,6,2020-04-14,1153,50
1,E-mail,Ultra Sports,4,6,2020-10-01,1154,50
2,Sales visit,Ultra Sports,7,65,2020-04-21,1155,49
3,Web,Ultra Sports,5,65,2020-09-09,1156,49
4,Telephone,Ultra Sports,2,66,2020-04-10,1157,50
...,...,...,...,...,...,...,...
5355,E-mail,Preben's T°y,4,329,2020-01-13,9475,29
5356,Web,Preben's T°y,5,327,2021-01-02,9476,29
5357,Telephone,Preben's T°y,2,329,2021-01-18,9477,29
5358,Sales visit,Preben's T°y,7,327,2022-01-15,9478,27


## Return reason ETL

In [688]:
# Rename all columns
return_reason_etl = return_reason.rename(columns=rename_mapping)

# Filter out columns
return_reason_etl = filterColumns(return_reason_etl)

print(len(return_reason_etl.columns))
return_reason_etl

2


Unnamed: 0,RETURN_REASON_code,RETURN_REASON_description
0,1,Defective product
1,2,Incomplete product
2,3,Wrong product ordered
3,4,Wrong product shipped
4,5,Unsatisfactory product


## Returned Item ETL

In [791]:
# Rename columns
returned_item_etl = returned_item.rename(columns=rename_mapping)

# Exclude invalid columns
returned_item_etl = filterColumns(returned_item_etl)

sizeCheck(returned_item_etl,5)
returned_item_etl

Table has 5 columns


Unnamed: 0,RETURN_REASON_code,RETURN_DATE_date,ORDER_DETAIL_id,RETURN_QUANTITY_number,RETURN_code
0,5,1-8-2021 04:10:24,84858,8,1491
1,2,6-12-2020 18:46:19,84440,2,1492
2,4,24-6-2021 09:23:14,84867,22,1493
3,3,21-7-2021 00:00:09,84873,20,1494
4,1,7-10-2020 05:13:58,84488,2,1496
...,...,...,...,...,...
701,3,6-6-2021 09:22:12,114972,24,2465
702,5,6-10-2021 12:31:25,114983,66,2466
703,4,31-3-2022 15:39:38,115215,64,2467
704,1,23-12-2022 19:48:50,115171,2,2468


## Order Details ETL

In [793]:
# Rename columns
order_detail_etl = order_details.rename(columns=rename_mapping)

# Remove unnecessary columns
order_detail_etl = filterColumns(order_detail_etl)

order_detail_etl

Unnamed: 0,PRODUCT_id,UNIT_COST_money,ORDER_DETAIL_id,UNIT_PRICE_money,UNIT_SALE_PRICE_money,QUANTITY_number,ORDER_number
0,96,16.1,100000,22.54,22.54,16,8462
1,96,16.1,100001,22.54,22.54,20,9111
2,96,16.1,100002,22.54,22.54,24,8451
3,96,16.1,100003,22.54,22.54,18,8453
4,96,16.1,100004,22.54,22.54,20,8439
...,...,...,...,...,...,...,...
43058,95,2.76,99995,5.55,5.55,146,8459
43059,95,2.76,99996,5.55,5.55,172,8457
43060,95,2.76,99997,5.55,5.55,192,9267
43061,95,2.76,99998,5.55,5.55,192,8441
