In [None]:
"""
There is the customer_master.xlsx file as a definition table for data preparation. 
It helps to exlude wrong inforamtion in an internal data source which cannot be corrected.

columns: ['Customer', 'Tier', 'Accounts', 'Company', 'the_newest_OPCO',
       'Customer Name', 'ONE_NAME', 'Indirect/Direct', 'Channel3', 'New Type4']

There is a budget file which budget information for two fiscal years. 

columns: ['sold_to_customer', 'sold_to_customer_n', 'Country', 'Country Name',
       'City', 'Type', 'OI Bgt FY21', 'OI Bgt FY22', 'Comments']

Main goal of this code to combine these two files in one which would be used as one of 
Tableau data sources.

"""

In [45]:
import pandas as pd
import numpy as np

#budget df preparation
budget_df = pd.read_excel('data_files/budget_file.xlsx', sheet_name='Budget')
budget_df = budget_df.loc[:,['sold_to_customer','sold_to_customer_n','Type', 'OI Bgt FY21', 'OI Bgt FY22']]
budget_df.sold_to_customer = budget_df.sold_to_customer.astype('str') # the internal database has ID beginning with 0 in str format
budget_df.sold_to_customer = "0" + budget_df.sold_to_customer

#customer master df preparation
customer_df = pd.read_excel('data_files/customer_master.xlsx', sheet_name='data')
customer_df = customer_df.loc[:,['Customer', 'Tier', 'the_newest_OPCO',
       'Customer Name', 'ONE_NAME', 'Indirect/Direct', 'Channel3', 'New Type4']]
customer_df = customer_df.rename(columns={'Customer':'sold_to_customer','the_newest_OPCO':'OPCO', 'Channel3':'Channel', 'New Type4':'Type'})
customer_df.sold_to_customer = customer_df.sold_to_customer.astype('str') # the internal database has ID beginning with 0 in str format
customer_df.sold_to_customer = "0" + customer_df.sold_to_customer
customer_df = customer_df.set_index('sold_to_customer')

In [46]:
df = budget_df.join(customer_df, on='sold_to_customer', how = 'left', lsuffix = 'l_', rsuffix = 'r_')

In [47]:
writer = pd.ExcelWriter('budget_file.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='budget_by_month')
writer.save()

In [51]:

df2=customer_df.dropna(subset=['ONE_NAME'])
df2

Unnamed: 0_level_0,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Type
sold_to_customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0200004036,A,YEF-NL,HORN-EC&P LTD,Horn Engineering,Indirect,Channel,Distributor
0200071729,B,YEF-NL,ABB Switzerland Ltd.IFS-S,ABB Switzerland,Indirect,Channel,VAR
0200072362,A,YEF-E,"APLIQUEM MICROONES 21, S.L.",APLIQUEM MICROONES 21,Indirect,Channel,Distributor
0200072549,A?,YEF-I,ASCON TECNOLOGIC SRL,ASCON TECNOLOGIC,Indirect,Channel,Distributor
0200072578,B,YEF-NL,ASML NETHERLANDS B.V.,ASML NETHERLANDS,Indirect,Channel,OEM
...,...,...,...,...,...,...,...
0nan,,YEF-I,MARTEC SRL Martec,MARTEC,Indirect,Channel,Agent
0nan,,YEF-I,MEDITER SAS G Corradi,MEDITER,Indirect,Channel,Agent
0nan,,YEF-I,Musumeci Felice,MUSUMECI,Indirect,Channel,Agent
0nan,,YEF-I,Musumeci Group Srl,MUSUMECI,Indirect,Channel,Agent


In [52]:
budget_df

Unnamed: 0,sold_to_customer,sold_to_customer_n,Type,OI Bgt FY21,OI Bgt FY22
0,0200261563,AB Amber Grid,VAR,0.0,0.0
1,0200071729,ABB Switzerland Ltd.IFS-S,VAR,30000.0,30000.0
2,0200071769,AC SOLUTION SRL,,0.0,0.0
3,0200071769,AC SOLUTION SRL,0,0.0,0.0
4,0200079037,ADAMA MAKHTESHIM LTD,Distributer,0.0,0.0
...,...,...,...,...,...
144,00,SONNEK Engineering,OEM,0.0,150000.0
145,00,Guerra,Agent,0.0,80000.0
146,00,Guerra,distributor,0.0,20000.0
147,00,Baglieri Maurizio,Agent,0.0,600000.0
