In [None]:
"""
There is the customer_master.xlsx file as a definition table for data preparation. 
It helps to exlude wrong inforamtion in an internal data source which cannot be corrected.

columns: ['Customer', 'Tier', 'Accounts', 'Company', 'the_newest_OPCO',
       'Customer Name', 'ONE_NAME', 'Indirect/Direct', 'Channel3', 'New Type4']

There is a budget file which budget information for two fiscal years. 

columns: ['sold_to_customer', 'sold_to_customer_n', 'Country', 'Country Name',
       'City', 'Type', 'OI Bgt FY21', 'OI Bgt FY22', 'Comments']

Main goal of this code to combine these two files in one which would be used as one of 
Tableau data sources.

"""

In [112]:
import pandas as pd
import numpy as np

#budget df preparation
budget_df = pd.read_excel('data_files/budget_file.xlsx', sheet_name='budget_by_month', index_col='sold_to_customer')
budget_df = budget_df.reset_index()
budget_df.sold_to_customer = budget_df.sold_to_customer.astype('str') # the internal database has ID beginning with 0 in str format
budget_df.sold_to_customer = "0" + budget_df.sold_to_customer

#customer master df preparation
customer_df = pd.read_excel('data_files/customer_master.xlsx', sheet_name='data')
customer_df = customer_df.loc[:,['Customer', 'Tier', 'the_newest_OPCO',
       'Customer Name', 'ONE_NAME', 'Indirect/Direct', 'Channel3', 'New Type4']]
customer_df = customer_df.rename(columns={'Customer':'sold_to_customer','the_newest_OPCO':'OPCO', 'Channel3':'Channel', 'New Type4':'Type'})
customer_df.sold_to_customer = customer_df.sold_to_customer.astype('str') # the internal database has ID beginning with 0 in str format
customer_df.sold_to_customer = "0" + customer_df.sold_to_customer
customer_df=customer_df.dropna(subset=['ONE_NAME'])
#customer_df = customer_df.set_index('sold_to_customer')

In [114]:
customer_df

Unnamed: 0,sold_to_customer,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Type
0,0200004036,A,YEF-NL,HORN-EC&P LTD,Horn Engineering,Indirect,Channel,Distributor
1,0200004637,,YEF-F,JGC HOLDINGS CORPORATION,,Direct,Direct,End User
2,0200005189,,YEF-GB,CAMERON INTERNATIONAL,,Direct,Direct,End User
3,0200007722,,YEF-NL,SHELL GLOBAL SOLUTIONS US INC,,Direct,Direct,End User
4,0200008176,,YEF-TR,CAYELI BAKIR ISLETMELERI A.S.,,Direct,Direct,Others
...,...,...,...,...,...,...,...,...
14310,0200339571,,,,,Direct,Direct,Others
14311,0200343982,,,,,Direct,Direct,End User
14312,0200301189,,,,,Direct,Direct,End User
14313,0200077094,,,,,Direct,Direct,End User


In [93]:
df=customer_df.dropna(subset=['ONE_NAME'])

In [94]:
df

Unnamed: 0,sold_to_customer,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Type
0,0200004036,A,YEF-NL,HORN-EC&P LTD,Horn Engineering,Indirect,Channel,Distributor
44,0200071729,B,YEF-NL,ABB Switzerland Ltd.IFS-S,ABB Switzerland,Indirect,Channel,VAR
254,0200072362,A,YEF-E,"APLIQUEM MICROONES 21, S.L.",APLIQUEM MICROONES 21,Indirect,Channel,Distributor
340,0200072549,A?,YEF-I,ASCON TECNOLOGIC SRL,ASCON TECNOLOGIC,Indirect,Channel,Distributor
350,0200072578,B,YEF-NL,ASML NETHERLANDS B.V.,ASML NETHERLANDS,Indirect,Channel,OEM
...,...,...,...,...,...,...,...,...
14134,0nan,,YEF-I,MARTEC SRL Martec,MARTEC,Indirect,Channel,Agent
14135,0nan,,YEF-I,MEDITER SAS G Corradi,MEDITER,Indirect,Channel,Agent
14136,0nan,,YEF-I,Musumeci Felice,MUSUMECI,Indirect,Channel,Agent
14137,0nan,,YEF-I,Musumeci Group Srl,MUSUMECI,Indirect,Channel,Agent


In [103]:
dft = budget_df.merge(df, how = 'cross', on='sold_to_customer', suffixes=('_x', '_y'))

MergeError: Can not pass on, right_on, left_on or set right_index=True or left_index=True

In [98]:
writer = pd.ExcelWriter('/Users/aleksejgukov/Desktop/budget_file.xlsx', engine='xlsxwriter')
dft.to_excel(writer, sheet_name='budget_by_month')
writer.save()

In [88]:
df2=customer_df.dropna(subset=['ONE_NAME'])

In [89]:
df2

Unnamed: 0_level_0,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Type
sold_to_customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0200004036,A,YEF-NL,HORN-EC&P LTD,Horn Engineering,Indirect,Channel,Distributor
0200071729,B,YEF-NL,ABB Switzerland Ltd.IFS-S,ABB Switzerland,Indirect,Channel,VAR
0200072362,A,YEF-E,"APLIQUEM MICROONES 21, S.L.",APLIQUEM MICROONES 21,Indirect,Channel,Distributor
0200072549,A?,YEF-I,ASCON TECNOLOGIC SRL,ASCON TECNOLOGIC,Indirect,Channel,Distributor
0200072578,B,YEF-NL,ASML NETHERLANDS B.V.,ASML NETHERLANDS,Indirect,Channel,OEM
...,...,...,...,...,...,...,...
0nan,,YEF-I,MARTEC SRL Martec,MARTEC,Indirect,Channel,Agent
0nan,,YEF-I,MEDITER SAS G Corradi,MEDITER,Indirect,Channel,Agent
0nan,,YEF-I,Musumeci Felice,MUSUMECI,Indirect,Channel,Agent
0nan,,YEF-I,Musumeci Group Srl,MUSUMECI,Indirect,Channel,Agent


In [65]:
writer = pd.ExcelWriter('/Users/aleksejgukov/Desktop/budget_file.xlsx', engine='xlsxwriter')
df3.to_excel(writer, sheet_name='budget_by_month')
writer.save()

In [70]:
budget_df = budget_df.set_index('sold_to_customer')

In [71]:
budget_df

Unnamed: 0_level_0,sold_to_customer_n,Type,OI Bgt FY21,OI Bgt FY22
sold_to_customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0200261563,AB Amber Grid,VAR,0.0,0.0
0200071729,ABB Switzerland Ltd.IFS-S,VAR,30000.0,30000.0
0200071769,AC SOLUTION SRL,,0.0,0.0
0200079037,ADAMA MAKHTESHIM LTD,Distributer,0.0,0.0
0200288068,Arte Teknoloji Sanayi Ve Ticaret A.S.,Distributer,300000.0,375000.0
...,...,...,...,...
0200311128,ITA.CA ENGINEERING SRL,VAR,0.0,250000.0
0200327885,Score VAC Limited,OEM,0.0,100000.0
0200285374,SONNEK Engineering GmbH,OEM,0.0,150000.0
0200283059,Baglieri Maurizio,Agent,0.0,600000.0


In [72]:
df = budget_df.join(df2, on='sold_to_customer', how = 'left', lsuffix = 'l_', rsuffix = 'r_')

In [73]:
writer = pd.ExcelWriter('/Users/aleksejgukov/Desktop/budget_file.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='budget_by_month')
writer.save()

In [75]:
new = pd.concat([df,df2])

In [78]:
new

Unnamed: 0_level_0,sold_to_customer_n,Typel_,OI Bgt FY21,OI Bgt FY22,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Typer_,Type
sold_to_customer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0200261563,AB Amber Grid,VAR,0.0,0.0,,,,,,,,
0200071729,ABB Switzerland Ltd.IFS-S,VAR,30000.0,30000.0,B,YEF-NL,ABB Switzerland Ltd.IFS-S,ABB Switzerland,Indirect,Channel,VAR,
0200071769,AC SOLUTION SRL,,0.0,0.0,,,,,,,,
0200079037,ADAMA MAKHTESHIM LTD,Distributer,0.0,0.0,,,,,,,,
0200288068,Arte Teknoloji Sanayi Ve Ticaret A.S.,Distributer,300000.0,375000.0,A,YEF-TR,Arte Teknoloji Sanayi Ve Ticaret A.S.,Arte Teknoloji,Indirect,Channel,Distributor,
...,...,...,...,...,...,...,...,...,...,...,...,...
0nan,,,,,,YEF-I,MARTEC SRL Martec,MARTEC,Indirect,Channel,,Agent
0nan,,,,,,YEF-I,MEDITER SAS G Corradi,MEDITER,Indirect,Channel,,Agent
0nan,,,,,,YEF-I,Musumeci Felice,MUSUMECI,Indirect,Channel,,Agent
0nan,,,,,,YEF-I,Musumeci Group Srl,MUSUMECI,Indirect,Channel,,Agent


In [76]:
writer = pd.ExcelWriter('/Users/aleksejgukov/Desktop/budget_file.xlsx', engine='xlsxwriter')
new.to_excel(writer, sheet_name='budget_by_month')
writer.save()

In [79]:
new2 = new.reset_index()
new2 = new.drop_duplicates(subset='sold_to_customer', keep = 'first')

KeyError: Index(['sold_to_customer'], dtype='object')

In [80]:
new2 = new.reset_index()

In [81]:
new2

Unnamed: 0,sold_to_customer,sold_to_customer_n,Typel_,OI Bgt FY21,OI Bgt FY22,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Typer_,Type
0,0200261563,AB Amber Grid,VAR,0.0,0.0,,,,,,,,
1,0200071729,ABB Switzerland Ltd.IFS-S,VAR,30000.0,30000.0,B,YEF-NL,ABB Switzerland Ltd.IFS-S,ABB Switzerland,Indirect,Channel,VAR,
2,0200071769,AC SOLUTION SRL,,0.0,0.0,,,,,,,,
3,0200079037,ADAMA MAKHTESHIM LTD,Distributer,0.0,0.0,,,,,,,,
4,0200288068,Arte Teknoloji Sanayi Ve Ticaret A.S.,Distributer,300000.0,375000.0,A,YEF-TR,Arte Teknoloji Sanayi Ve Ticaret A.S.,Arte Teknoloji,Indirect,Channel,Distributor,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
291,0nan,,,,,,YEF-I,MARTEC SRL Martec,MARTEC,Indirect,Channel,,Agent
292,0nan,,,,,,YEF-I,MEDITER SAS G Corradi,MEDITER,Indirect,Channel,,Agent
293,0nan,,,,,,YEF-I,Musumeci Felice,MUSUMECI,Indirect,Channel,,Agent
294,0nan,,,,,,YEF-I,Musumeci Group Srl,MUSUMECI,Indirect,Channel,,Agent


In [84]:
new2 = new2.drop_duplicates('sold_to_customer', keep = 'first')

In [85]:
new2

Unnamed: 0,sold_to_customer,sold_to_customer_n,Typel_,OI Bgt FY21,OI Bgt FY22,Tier,OPCO,Customer Name,ONE_NAME,Indirect/Direct,Channel,Typer_,Type
0,0200261563,AB Amber Grid,VAR,0.0,0.0,,,,,,,,
1,0200071729,ABB Switzerland Ltd.IFS-S,VAR,30000.0,30000.0,B,YEF-NL,ABB Switzerland Ltd.IFS-S,ABB Switzerland,Indirect,Channel,VAR,
2,0200071769,AC SOLUTION SRL,,0.0,0.0,,,,,,,,
3,0200079037,ADAMA MAKHTESHIM LTD,Distributer,0.0,0.0,,,,,,,,
4,0200288068,Arte Teknoloji Sanayi Ve Ticaret A.S.,Distributer,300000.0,375000.0,A,YEF-TR,Arte Teknoloji Sanayi Ve Ticaret A.S.,Arte Teknoloji,Indirect,Channel,Distributor,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
273,0200320323,,,,,B,YEF-GB,HALLIBURTON ENERGY SERVICES INC,Halliburton,Indirect,Other,,OEM
276,0200324731,,,,,C+,YEF-B,Powerspex Instrumentation B.V.,Powerspex Instrumentation,Indirect,Other,,VAR
278,0200326681,,,,,A,YEF-F,SEFI,SEFI,Indirect,Channel,,Distributor
281,0200338288,,,,,A,YEF-F,SEFI,SEFI,Indirect,Channel,,Distributor


In [86]:
writer = pd.ExcelWriter('/Users/aleksejgukov/Desktop/budget_file.xlsx', engine='xlsxwriter')
new2.to_excel(writer, sheet_name='budget_by_month')
writer.save()