In [1]:
# System imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import hvplot.pandas
import panel as pn
from pathlib import Path
from dotenv import load_dotenv
from panel.interact import interact
from panel import widgets
from string import digits
import csv
import json
import numpy as np
import dateparser

pn.extension()

%matplotlib inline

In [2]:
# Local imports
import sys
sys.path.append("../lib2")

from Constants import Constants
from PreprocessingTools import CustomerNameCleaningFunctions
from PreprocessingTools import MappingFunctions


In [3]:
# Construct tools
debug_level = 0
constants = Constants()
tool_customer = CustomerNameCleaningFunctions(debug_level)
tool_mapping = MappingFunctions(debug_level)


In [4]:
# Build the anonymous customer name mapping

## Read the datasets into Pandas DataFrames
atlas               = pd.read_csv(constants.RAW_ATLAS_FILE_PATH, index_col="Customers")
forecast            = pd.read_csv(constants.RAW_FORECAST_DATA_FILE_PATH, index_col="Organization Name")
revenue2020         = pd.read_csv(constants.RAW_REVENUE2020_FILE_PATH, index_col="Name")
revenue2020A        = pd.read_csv(constants.RAW_REVENUE2020A_FILE_PATH, index_col="Payee Name")
atlas2              = pd.read_csv(constants.RAW_ATLAS_2_FILE_PATH, index_col="Customers")

## Extract paying customers
atlas_customers_raw             = atlas.index.to_list()
forecast_customers_raw          = forecast.index.to_list()
revenue2020_customers_raw       = revenue2020.index.to_list()
revenue2020A_customers_raw      = revenue2020A.index.to_list()
atlas2_customers_raw            = atlas2.index.to_list()

## Clean up names
atlas_customers_cleaned         = tool_customer.cleanup_customer_names(atlas_customers_raw)
forecast_customers_cleaned = tool_customer.cleanup_customer_names(forecast_customers_raw)
revenue2020_customers_cleaned   = tool_customer.cleanup_customer_names(revenue2020_customers_raw)
revenue2020A_customers_cleaned  = tool_customer.cleanup_customer_names(revenue2020A_customers_raw)
atlas2_customers_cleaned        = tool_customer.cleanup_customer_names(atlas2_customers_raw)

## Build name mapping
name_mapping_atlas          = tool_mapping.build_name_mapping(atlas_customers_cleaned)
name_mapping_forecast       = tool_mapping.build_name_mapping(forecast_customers_cleaned)
name_mapping_revenue2020    = tool_mapping.build_name_mapping(revenue2020_customers_cleaned)
name_mapping_revenue2020A   = tool_mapping.build_name_mapping(revenue2020A_customers_cleaned)
name_mapping_atlas2         = tool_mapping.build_name_mapping(atlas2_customers_cleaned)

## Combine name maps
customer_name_mapping = { **name_mapping_atlas, **name_mapping_forecast, **name_mapping_revenue2020, **name_mapping_revenue2020A, **name_mapping_atlas2 }

## Export the name mapping
tool_mapping.write_customer_name_mapping(customer_name_mapping)


FileNotFoundError: [Errno 2] No such file or directory: '..\\..\\Resources\\01_Raw\\ATLAS.csv'

In [5]:
# Anonymize the raw data files

## Import name mapping
customer_name_mapping = tool_mapping.read_customer_name_mapping()

## Clean up names
atlas_customers_cleaned         = tool_customer.cleanup_customer_names(atlas_customers_raw)
forecast_customers_cleaned      = tool_customer.cleanup_customer_names(forecast_customers_raw)
revenue2020_customers_cleaned   = tool_customer.cleanup_customer_names(revenue2020_customers_raw)
revenue2020A_customers_cleaned  = tool_customer.cleanup_customer_names(revenue2020A_customers_raw)
atlas2_customers_cleaned        = tool_customer.cleanup_customer_names(atlas2_customers_raw)

## Map the customer names
# atlas_customers_anonymized              = tool_customer.anonymize_customer_list(atlas_customers_cleaned, customer_name_mapping)
# forecast_customers_anonymized           = tool_customer.anonymize_customer_list(forecast_customers_cleaned, customer_name_mapping)
# revenue2020_customers_anonymized        = tool_customer.anonymize_customer_list(revenue2020_customers_cleaned, customer_name_mapping)
# revenue2020A_customers_anonymized       = tool_customer.anonymize_customer_list(revenue2020A_customers_cleaned, customer_name_mapping)
# atlas2_customers_anonymized             = tool_customer.anonymize_customer_list(atlas2_customers_cleaned, customer_name_mapping)

atlas_customers_anonymized = atlas_customers_cleaned
forecast_customers_anonymized = forecast_customers_cleaned
revenue2020_customers_anonymized = revenue2020_customers_cleaned
revenue2020A_customers_anonymized = revenue2020A_customers_cleaned
atlas2_customers_anonymized = atlas2_customers_cleaned

# Fix the customers in the DataFrames
atlas           = tool_customer.set_customers_index(atlas, atlas_customers_anonymized, "Customers")
forecast        = tool_customer.set_customers_index(forecast, forecast_customers_anonymized, "Organization Name")
revenue2020     = tool_customer.set_customers_index(revenue2020, revenue2020_customers_anonymized, "Name")
revenue2020A    = tool_customer.set_customers_index(revenue2020A, revenue2020A_customers_anonymized, "Payee Name")
atlas2          = tool_customer.set_customers_index(atlas2, atlas2_customers_anonymized, "Customers")

## Export
atlas.to_csv(constants.ANON_ATLAS_FILE_PATH)
forecast.to_csv(constants.ANON_FORECAST_DATA_FILE_PATH)
revenue2020.to_csv(constants.ANON_REVENUE2020_FILE_PATH)
revenue2020A.to_csv(constants.ANON_REVENUE2020A_FILE_PATH)
atlas2.to_csv(constants.ANON_ATLAS_2_FILE_PATH)
