# Catalog Content Analysis
The purpose of this Analysis is to compare one supplier catalog to other supplier catalogs; identifying whether shifting spend would benefit UC.  

In [1]:
import pandas as pd
import zipfile
import numpy as np
import jellyfish
import glob
import sys
import seaborn as sns
import ipysankeywidget
%matplotlib inline


print("Pandas Version:" + str(pd.__version__))
print("Numpy Version:" + str(np.__version__))
print('Python version: ' + sys.version)

Pandas Version:0.18.1
Numpy Version:1.11.1
Python version: 3.5.2 |Anaconda custom (64-bit)| (default, Jul  5 2016, 11:41:13) [MSC v.1900 64 bit (AMD64)]


### Helper Functions
These help process the data, normalize the data, and conduct minimal ETL.  They are seperated into functions because they are used multiple times throughout this analysis.

In [2]:
##################################################################################################################
####  CODE TO NORMALIZE MANUFACTURER NAME AND CREATE UNIQUE PART NUMBERS
##################################################################################################################
def sdx(word):
    norm = jellyfish.soundex(word)
    return norm

def NormalizedManufacturer(DFrame, ManufacturerColumnName, UOMColumnName, ManufacturerCatalogColumn):    
    #Create the soundex value for each manufacturer name
    DFrame[ManufacturerColumnName] = DFrame[ManufacturerColumnName].astype('str')
    DFrame['Normalized Mfgr'] = DFrame[ManufacturerColumnName].apply(sdx)
    DFrame['Normalized Mfr Part'] = DFrame[ManufacturerCatalogColumn].str.replace('-','')

    #Create a new column and fill it with the normalized part name.  I use the '\' for readability of code
    DFrame['UniqueID'] = DFrame[['Normalized Mfgr', 
                                 'Normalized Mfr Part', 
                                 UOMColumnName]].astype('str').apply(lambda x: '|'.join(x), axis=1)
    
    DFrame.drop('Normalized Mfr Part', axis=1)
    return DFrame

##################################################################################################################
####  CODE TO IMPORT CATALOG PRICING
##################################################################################################################

def PullCatalogs(Path):
    allFiles = glob.glob(Path + "/*.txt")
    frame = pd.DataFrame()
    list_ = []
    for file_ in allFiles:
        print("Reading in '{0}'".format(file_))
        df = pd.read_table(file_, index_col=None, header=0, low_memory=False, 
                           encoding='iso-8859-1', dtype={'Supplier Number': str})
        list_.append(df)
        frame = pd.concat(list_)
    return frame

##################################################################################################################
####  PULL DATA FUNCTION WHICH EXTRACTS THE DATA AND CONSOLIDATES IT
##################################################################################################################
def PullData(FolderPath,FileList,FileType):
    
    #for file in directory
    for fileArchive in FileList:
        #Set the zip file object
        zf = zipfile.ZipFile(FolderPath + fileArchive, 'r')

        if FileType == 'PO':
            
            #Set the filename by testing the path to see if PO or Invoice are in the path
            filename = 'POData_' + fileArchive[:-3] + 'csv'
        
            #Try to call the archive and pull the data from the from the archive 
            try:         
                Data = Data.append(pd.read_csv(zf.open(filename), low_memory=False, thousands=',', 
                                               parse_dates=True, encoding='iso-8859-1', 
                                               dtype={'Supplier Number': str,
                                                      'Supplier ID': str}),ignore_index=True)

                print('%s appended to POData' %filename)

            except:
                #Try to use the data as a seed for a new DataFrame
                try:
                    Data = pd.read_csv(zf.open(filename), low_memory=False, thousands=',', parse_dates=True, 
                                       encoding='iso-8859-1', dtype={'Supplier Number': str,
                                                                     'Supplier ID': str})

                    print('%s used to seed the Data' %filename)

                #conclude the file doesnt exist
                except:
                    print('%s doesnt have a Data file' %fileArchive)

            else:
                pass
        
    Data['File'] = FolderPath + fileArchive
    
    return Data

##################################################################################################################
####  FIX THE UOM
##################################################################################################################

def UOMFix(DataFrame, UOMColumn):
    Pack =['PACK', 'PK', 'PACKAGE']
    Case =['CASE','CS','STACK','UOS','PKS','RACKS','RK','TRAY','UNI','INSERTS','CA']
    Bag = ['BAG','BG']
    Box = ['BOX','BX']
    Each= ['UNIT','BOTTLE','EACH','EA','RACK','KIT','TUBES','PIECES','REFILLS','ROLL']
    
    DataFrame[UOMColumn] = DataFrame[UOMColumn].str.replace('.*('+ '|'.join(Pack) +')$','PK', case=False) \
                                               .str.replace('.*('+ '|'.join(Case)+')$','CA', case=False) \
                                               .str.replace('.*('+ '|'.join(Bag) +')$','BG', case=False) \
                                               .str.replace('.*('+ '|'.join(Box) +')$','BX', case=False) \
                                               .str.replace('.*('+ '|'.join(Each) +')$','EA', case=False)
    return DataFrame             

##################################################################################################################
####  FIX SOME OF THE MOST WILD MANUFACTURER NAMES
##################################################################################################################

def MfrFix(DataFrame, MfrCol):
    
    DataFrame[MfrCol] = DataFrame[MfrCol].str.replace('CORNING DISCOVERY LABWARE PHASE II', 'Discovery Labware', case=False) \
                                         .str.replace('Bd Vacutainer Labware Medical', 'BD', case=False) \
                                         .str.replace('Bd Diagnostic Systems', 'BD', case=False)
    
    return DataFrame



##################################################################################################################
####  FUNCTION TO COUNT DISTINCT 
##################################################################################################################

def UniqueCount(x):
    return len(x.unique())



In [29]:
DataSet.head(25).to_csv('C:/Users/Aclark/Desktop/UCSDSpend.csv')

### Read in the PO Data and Catalog Data

We first need to pull the items we actually procured during the previous year.  This allows us to later on gauge the impact, using quantities, of the price differences between the catalog suppliers. The data files are pulled from the zipfiles using the Catalog price extract and PO Full Extract. 

In [7]:
#Path to the files you are looking to import
SpendPath = 'C:/Users/aclark/Box Sync/Marketplace Content/Spend/'
CatalogPath = 'C:/Users/aclark/Box Sync/Marketplace Content/Comparison'


files = [
         'UCSDAugust2017.zip',
         'UCSDDecember2017.zip',
         'UCSDJanuary2018.zip',
         'UCSDNov2017.zip',
         'UCSDOctober2017.zip',
         'UCSDSeptember2017.zip'
        ]

#Read in the data from the Full Download Zip Files
print('Reading in the Spend Data')
DataSet = PullData(SpendPath,files,FileType='PO')

#Normalize the UOM
print()
print('Standardizing the Unit of Measures')
DataSet = UOMFix(DataSet, 'Amount/UOM & UOM')

#Establish the UniqueID for each item
print()
print('Setting Normalized Manufacturer Part Number')
DataSet = NormalizedManufacturer(DataSet, 'Manufacturer', 'Amount/UOM & UOM', 'Manufacturer Catalog No.')

print()
print('Spend file is done')

#Read in the data
print()
print('Reading in the Catalog Data')
Catalogs = PullCatalogs(CatalogPath)

#Normalize the data and cleanse it
#Remove price removed prices
print()
print('Standardizing Columns')
Catalogs = Catalogs[(Catalogs['Price']!= 'Price Removed') & (Catalogs['Price'] !=  'Call for price')]
Catalogs['Price'] = Catalogs['Price'].astype('float')

#Standardize the UOMs
print()
print('Standardizing the Unit of Measures')
Catalogs = UOMFix(Catalogs, 'Packaging UOM')

#Fix the Manufacturer Names that dont easily match 
print()
print('Standardizing the Manufacturer Names')
Catalogs = MfrFix(Catalogs, 'Manufacturer Name')


print()
print('Setting Normalized Manufacturer Part Number')
Catalogs = NormalizedManufacturer(Catalogs, 'Manufacturer Name', 'Packaging UOM', 'Manufacturer Part Number')

print()
print()
Catalogs.groupby('Supplier Number').agg({'List Price': ['count'],
                                         'Price': ['count'],
                                         'Manufacturer Name': [UniqueCount],

                                         'UniqueID': [UniqueCount]
                                         }).fillna(0)


Reading in the Spend Data
POData_UCSDAugust2017.csv used to seed the Data
POData_UCSDDecember2017.csv appended to POData
POData_UCSDJanuary2018.csv appended to POData
POData_UCSDNov2017.csv appended to POData
POData_UCSDOctober2017.csv appended to POData
POData_UCSDSeptember2017.csv appended to POData

Standardizing the Unit of Measures

Setting Normalized Manufacturer Part Number

Spend file is done

Reading in the Catalog Data
Reading in 'C:/Users/aclark/Box Sync/Marketplace Content/Comparison\E_K_SCIE_Organiza_V060_20171024140353509_001.txt'
Reading in 'C:/Users/aclark/Box Sync/Marketplace Content/Comparison\FISHER_S_Universi_V058_20171024125029455_001.txt'
Reading in 'C:/Users/aclark/Box Sync/Marketplace Content/Comparison\NETA_SCI_Universi_V032_20171024132656089_001.txt'
Reading in 'C:/Users/aclark/Box Sync/Marketplace Content/Comparison\SPECTRUM_Universi_V027_20171024134922010_001.txt'
Reading in 'C:/Users/aclark/Box Sync/Marketplace Content/Comparison\TAKARA_B_Organiza_V003_2017

Unnamed: 0_level_0,Manufacturer Name,List Price,Price,UniqueID
Unnamed: 0_level_1,UniqueCount,count,count,UniqueCount
Supplier Number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
30004713,7,3532,3532,3532
30006713,76,6223,6225,6174
30007623,1858,0,197894,197829
30019746,209,113795,113795,113788
30022011,21,0,1254,1245
30022401,1143,0,133179,132937
30186061,45,101140,104221,104206


In [39]:
#Modify the Catalog Supplier Number so it works --- Jenky Adhoc thing
Catalogs['Supplier Number'] = Catalogs['Supplier Number'].str.replace('0030007623','00089577E') \
                                                         .str.replace('0030022401','00039350E') \
                                                         .str.replace('0030102465','00031848E') \
                                                         .str.replace('0030013546','00039396E')
            
Catalogs['Supplier Number'].unique()

array(['0030006713', '00089577E', '0030186061', '0030019746', '0030004713',
       '0030022011', '00039350E'], dtype=object)

### Filter the Spend

In [40]:
#Append the supplier name from the spend data to the catalogs using UCSF Supplier Number
CatalogAnalysis = Catalogs.merge(DataSet.groupby(['IFIS Vendor Number', 'Supplier Name', 'SciQuest Supplier Number']).agg({'PO ID': UniqueCount}).reset_index(),
                                 how='inner',
                                 left_on='Supplier Number', 
                                 right_on='IFIS Vendor Number')


#Set the suppliers to include in the SpendData using the catalog content 
supplier_filter = '|'.join(CatalogAnalysis['Supplier Name'].unique())

#Apply the spend filter to weed out transactions that dont apply to this analysis
Filtered_Spend = DataSet[DataSet['Supplier Name'].str.contains(supplier_filter, case=False) &  
                                                             (DataSet['Normalized Mfgr'].notnull()) &
                                                             (DataSet['Extended Price'] >= 0) &
                                                             (~DataSet['Supplier Name'].str.contains('ASHEVILLE')) &
                                                             (~DataSet['UniqueID'].str.contains('n500')) &
                                                             (~DataSet['Manufacturer'].str.contains('nan')) &
                                                             (~DataSet['UniqueID'].str.contains('nan'))]
                       
#Summarize the filtered spend data to see the impact of the filter
Filtered_Spend.groupby(['Supplier Name','IFIS Vendor Number']).agg({'Extended Price': 'sum',
                                             'UniqueID':[UniqueCount],
                                             'PO No.': [UniqueCount]}).fillna(0)



Unnamed: 0_level_0,Unnamed: 1_level_0,PO No.,Extended Price,UniqueID
Unnamed: 0_level_1,Unnamed: 1_level_1,UniqueCount,sum,UniqueCount
Supplier Name,IFIS Vendor Number,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Fisher Scientific,00089577E,14359,4031810.67,11416
"VWR International, LLC",00039350E,5246,1129482.41,4432




### For each Catalog item, add columns to display the min channel price

In [42]:
#Find the lowest price for each unique ID
LowPrice = CatalogAnalysis.groupby(['UniqueID']).agg({'Price':'min'}).reset_index()


#Merge the Supplier Name, Part Number for each lowest price
MinPriceChannel = LowPrice.merge(CatalogAnalysis.groupby(['UniqueID','Supplier Name','Part Number','Supplier Number', 'SciQuest Supplier Number']).agg({'Price':'min'}).reset_index(), 
                                 how='inner', 
                                 left_on=['UniqueID','Price'], 
                                 right_on=['UniqueID','Price'],
                                 suffixes=('_Catalog', '_MinPrice'))


#merge the minimal price channel back to the Catalog, appending the information to additional columns
CatalogAnalysis = CatalogAnalysis.merge(MinPriceChannel, 
                          how='inner', 
                          left_on=['UniqueID'], 
                          right_on=['UniqueID'],
                          suffixes=('_Catalog', '_MinPrice'))


#Merge the quantity and extended price that went through each channel
CatalogAnalysis = CatalogAnalysis.merge(DataSet.groupby(['UniqueID', 'Supplier Name']).agg(sum)[['Extended Price','Quantity']],
                          how='inner',
                          left_on=['UniqueID','Supplier Name_Catalog'],
                          right_index=True)


CatalogAnalysis['Impact'] = CatalogAnalysis['Quantity'] * (CatalogAnalysis['Price_MinPrice']-CatalogAnalysis['Price_Catalog'])

### Overall Analysis
Identify the total dollars and Count of items that need to be rigged to correct suboptimal spend

In [43]:
Analysis = CatalogAnalysis[CatalogAnalysis['Price_MinPrice'] < CatalogAnalysis['Price_Catalog']] \
                        .groupby(['Supplier Name_Catalog',
                                  'Supplier Name_MinPrice',
                                  'UniqueID']).agg({'Extended Price':'sum',
                                                         # 'UniqueID': UniqueCount,
                                                          'Impact': sum}).reset_index()

Analysis[(Analysis['Supplier Name_MinPrice'] != Analysis['Supplier Name_Catalog']) & 
         (~Analysis['UniqueID'].str.contains('n500')) & 
         (Analysis['Impact'] <= -150)].sort_values('Impact', ascending=True)


Unnamed: 0,Supplier Name_Catalog,Supplier Name_MinPrice,UniqueID,Extended Price,Impact
279,Fisher Scientific,"VWR International, LLC",K112|KK2602|EA,13119.75,-1674.75
603,"VWR International, LLC",Fisher Scientific,M346|30243394|EA,2773.17,-589.34
271,Fisher Scientific,"VWR International, LLC",I532|4315|PK,1735.14,-580.14
167,Fisher Scientific,"VWR International, LLC",D241|W985863|CA,1238.9,-526.45
74,Fisher Scientific,"VWR International, LLC",B522|PM996|EA,1718.6,-516.75
42,Fisher Scientific,"VWR International, LLC",B355|41003|EA,2748.9,-514.29
293,Fisher Scientific,"VWR International, LLC",K516|34120|CA,1442.4,-510.4
168,Fisher Scientific,"VWR International, LLC",D241|W985865|CA,989.4,-419.44
274,Fisher Scientific,"VWR International, LLC",I532|4510|PK,923.58,-394.92
284,Fisher Scientific,"VWR International, LLC",K516|01804|CA,1228.54,-393.66


### Manufacturer Analysis
For a selected Manufacturer, Identify the spend that needs to be moved to correct suboptimal spend

In [None]:
MfrName = 'Kapa Bio'

MfrAnalysis = CatalogAnalysis[CatalogAnalysis['Price_MinPrice'] < CatalogAnalysis['Price_Catalog']] \
                              .groupby(['Supplier Name_Catalog',
                                        'Supplier Name_MinPrice',
                                        'Manufacturer Name',
                                        'Part Number_Catalog',
                                        'Part Number_MinPrice',
                                        'Quantity']).agg({'Extended Price':'sum',
                                                                   'Impact': sum}).reset_index()


MfrAnalysis[(MfrAnalysis['Manufacturer Name'].str.contains(MfrName, case=False)) &
            (MfrAnalysis['Supplier Name_Catalog'] != MfrAnalysis['Supplier Name_MinPrice'])]

### Product List From Supplier X to Supplier Y
Generate the list of items that need to be rigged from one supplier to the other.  This excludes price so that we can provide this to the suppliers as intel

In [17]:
#Identify the items from a pair of suppliers
FromSupplier = 'MEDLINE INDUSTRIES, INC'
ToSupplier = 'MEDLINE INDUSTRIES, INC'


#Price Lag is where we move stuff away from the supplier we identify
CatalogAnalysis[
#                (CatalogAnalysis['Supplier Name_MinPrice'] == ToSupplier) &
                (CatalogAnalysis['Price_MinPrice'] < CatalogAnalysis['Price_Catalog']) &
                (CatalogAnalysis['Supplier Name_Catalog'] == FromSupplier) &
                (CatalogAnalysis['Manufacturer Name'] != 'nan') &
                (CatalogAnalysis['Supplier Name_MinPrice'] != CatalogAnalysis['Supplier Name_Catalog'])][['Part Number_Catalog',
                                                              'Product Description',
                                                              'Packaging UOM',
                                                              'Manufacturer Name',
                                                              'Manufacturer Part Number',
                                                              'Supplier Name_MinPrice',
                                                              'Part Number_MinPrice',                            
                                                              'Extended Price',
                                                              'Quantity',
                                                              'Impact']].to_csv('C:/users/aclark/desktop/PriceLag.csv')


#Price Lag is where we move stuff away from the supplier we identify
CatalogAnalysis[
                (CatalogAnalysis['Supplier Name_MinPrice'] == ToSupplier) &
                (CatalogAnalysis['Price_MinPrice'] < CatalogAnalysis['Price_Catalog']) &
                (CatalogAnalysis['Manufacturer Name'] != 'nan') &
#                (CatalogAnalysis['Supplier Name_Catalog'] == FromSupplier) &
                (CatalogAnalysis['Supplier Name_MinPrice'] != CatalogAnalysis['Supplier Name_Catalog'])][['Part Number_Catalog',
                                                              'Product Description',
                                                              'Packaging UOM',
                                                              'Manufacturer Name',
                                                              'Manufacturer Part Number',
                                                              'Supplier Name_MinPrice',
                                                              'Part Number_MinPrice',                            
                                                              'Extended Price',
                                                              'Quantity',
                                                              'Impact']].to_csv('C:/users/aclark/desktop/PriceLead.csv')

### Find some items worth blocking

In [45]:
#Price Lag is where we move stuff away from the supplier we identify
blk = CatalogAnalysis[CatalogAnalysis['Price_MinPrice'] < CatalogAnalysis['Price_Catalog']] \
                              .groupby(['Supplier Name_Catalog',
                                        'Supplier Name_MinPrice',
                                        'Manufacturer Name',
                                        'Part Number_Catalog',
                                        'Part Number_MinPrice',
                                        'Quantity']).agg({'Extended Price':'sum',
                                                          'Impact': sum}).reset_index().sort_values('Impact', ascending=True)


blk[(blk['Manufacturer Name'] != 'nan') & (blk['Supplier Name_Catalog'] != blk['Supplier Name_MinPrice'])].to_csv('C:/Users/Aclark/Desktop/UCSDPriceAnalysis.csv')

### Generate the Rigging File for BearBuy

In [None]:
#We need the following format for the import file
#SupplierID, Part Number, *Viewable, *Easy Buy, *Easy Buy Text, *SS
#The fields with '*' are editable


#Eliminate extraneous Supplier ID's.  BearBuy will bounce these lines out as errors anyway, but its cleaner.
BogusSupplierID = ['13888834']


#Filter the spend to those items we want to move
CatalogTemplate = CatalogAnalysis[(CatalogAnalysis['Price_MinPrice'] < CatalogAnalysis['Price_Catalog']) &
                                  (~CatalogAnalysis['Supplier ID_MinPrice'].isin(BogusSupplierID)) & 
                                  (~CatalogAnalysis['UniqueID'].str.contains('nan'))] \
                                                 .groupby(['Supplier Number_MinPrice',
                                                           'Supplier ID_MinPrice',
                                                           'Part Number_MinPrice', 
                                                           'Part Number_Catalog',
                                                           ]).agg({'UniqueID': UniqueCount}).reset_index()

#Seed the lowest Price Items with the Part for the target item
CatalogTemplate['* Viewable'] = 'True'
CatalogTemplate['* Easy Buy'] = ''
CatalogTemplate['* Easy Buy Text'] = ''
CatalogTemplate.drop('UniqueID', axis=1, inplace=True)

CatalogTemplate = CatalogTemplate.rename(columns={'Part Number_Catalog': '* SS',
                                                  'Supplier ID_MinPrice': 'SupplierId',
                                                  'Part Number_MinPrice': 'Part Number'})

CatalogTemplate = CatalogTemplate[['SupplierId',
                           'Part Number', 
                           '* Viewable',
                           '* Easy Buy', 
                           '* Easy Buy Text',
                           '* SS']]

#Make the dtype of all columns strings
CatalogTemplate = CatalogTemplate.astype(str)

#concatenate all of the part numbers into *SS to seed search results
CatalogTemplate.groupby(['SupplierId',
                                           'Part Number',
                                           '* Viewable',
                                           '* Easy Buy',
                                           '* Easy Buy Text']).agg({'* SS': lambda x: "%s" % ', '.join(x)}).reset_index()


CatalogTemplate.to_csv('C:/Users/Aclark/Desktop/RiggingFile.txt', sep='\t', index=False)

### Shipping Analysis

In [None]:
DataSet. \
groupby(['Shipping Method']).agg({'PO #': UniqueCount}).sort_values(by='PO #',ascending=False)

In [None]:

#Set the suppliers to include in the SpendData
supplier_filter = '|'.join(['Abcam',
                            'Biolegend',
                            'Santa Cruz Bio',
                            'Stemcell Tech',
                            'Jacksom Immuno'
                            ])

#Apply the spend filter to weed out transactions that dont apply to this analysis
Filtered_Spend = DataSet[DataSet['Supplier Name'].str.contains(supplier_filter, case=False)]
                       

DataSet[DataSet['Supplier Name'].str.contains(supplier_filter, case=False)] \
.groupby(['Supplier Name', 'Shipping Method']).agg({'PO #': UniqueCount}).sort_values(by='PO #',ascending=False)

In [None]:
CatalogAnalysis.groupby('PO ID').agg({'Extended Price': sum}).sum()

### Blocked Item Analysis

In [19]:
#Identify all items where we shouldnt have them blocked

CatalogAnalysis[(CatalogAnalysis['Product visible'] ==False) & 
                (CatalogAnalysis['Price_Catalog'] <= CatalogAnalysis['Price_MinPrice'])
               ][['Supplier Name_Catalog','Part Number_Catalog', 
                  'Price_Catalog', 'Supplier Name_MinPrice','Part Number_Catalog','Price_MinPrice', 'UniqueID']]



Unnamed: 0,Supplier Name_Catalog,Part Number_Catalog,Price_Catalog,Supplier Name_MinPrice,Part Number_Catalog.1,Price_MinPrice,UniqueID
64299,ABCAM INC,ab735-250ul,416.13,ABCAM INC,ab735-250ul,416.13,A125|ab735|EA
158046,FISHER SCIENTIFIC,A18SK4,29.19,FISHER SCIENTIFIC,A18SK4,29.19,F262|A18SK4|EA
161436,FISHER SCIENTIFIC,HC7001GAL,10.20,FISHER SCIENTIFIC,HC7001GAL,10.20,F262|HC7001GAL|EA
190164,FISHER SCIENTIFIC,NC9745682,12.72,FISHER SCIENTIFIC,NC9745682,12.72,C416|0065304|EA
195452,FISHER SCIENTIFIC,NC9122380,105.39,FISHER SCIENTIFIC,NC9122380,105.39,P652|95120F|CA
195731,FISHER SCIENTIFIC,NC9355077,277.39,FISHER SCIENTIFIC,NC9355077,277.39,L241|S930025MG|EA
195785,FISHER SCIENTIFIC,NC9392943,280.84,FISHER SCIENTIFIC,NC9392943,280.84,I511|ANTPM2|EA
195861,FISHER SCIENTIFIC,NC9445282,653.31,FISHER SCIENTIFIC,NC9445282,653.31,L524|V4SC2096|EA
201610,FISHER SCIENTIFIC,NC9681029,142.78,FISHER SCIENTIFIC,NC9681029,142.78,H226|CAT 5616|EA
202268,FISHER SCIENTIFIC,50980495,34.27,FISHER SCIENTIFIC,50980495,34.27,E423|15714S|EA


### Supplier X is too expensive

In [36]:
Supplier = 'VWR INTERNATIONAL, INC.'

OverPriced = CatalogAnalysis[(CatalogAnalysis['Supplier Name_Catalog'] == Supplier) & 
                (CatalogAnalysis['Price_Catalog'] != CatalogAnalysis['Price_MinPrice']) & 
                (CatalogAnalysis['Supplier Name_Catalog'] != CatalogAnalysis['Supplier Name_MinPrice'])][['Supplier Name_Catalog',
                  'Part Number_Catalog', 'Product visible', 
                  'Price_Catalog', 'Supplier Name_MinPrice', 'Manufacturer Name',
                  'Part Number_MinPrice','Price_MinPrice', 'Extended Price']].sort_values('Extended Price', ascending=False)

#OverPriced.to_csv('C:/Users/Aclark/Desktop/Overpriced.csv')
OverPriced


Unnamed: 0,Supplier Name_Catalog,Part Number_Catalog,Product visible,Price_Catalog,Supplier Name_MinPrice,Manufacturer Name,Part Number_MinPrice,Price_MinPrice,Extended Price
702402,"VWR INTERNATIONAL, INC.",90000-762 (EA),True,641.22,FISHER SCIENTIFIC,BD,DF0140074,627.5300,13371.16
702422,"VWR INTERNATIONAL, INC.",90000-368 (EA),True,337.28,FISHER SCIENTIFIC,BD,DF0118072,294.9200,7274.74
171956,"VWR INTERNATIONAL, INC.",95042-736 (EA),True,168.33,FISHER SCIENTIFIC,LONZA WALKERSVILLE INC,NC9525043,165.4100,7100.59
802790,"VWR INTERNATIONAL, INC.",102037-BL (EA),True,220.69,FISHER SCIENTIFIC,Biolegend,50402945,207.3600,5972.29
726828,"VWR INTERNATIONAL, INC.",CV9841N (CS),True,137.76,FISHER SCIENTIFIC,DuPont,19147761,91.6100,5802.22
702410,"VWR INTERNATIONAL, INC.",90000-722 (EA),True,339.35,FISHER SCIENTIFIC,BD,DF0127071,280.4700,5566.14
194155,"VWR INTERNATIONAL, INC.",101369-400 (EA),True,220.42,FISHER SCIENTIFIC,BIOSYNTH INTERNATIONAL INC MS,50853139,197.2900,4992.15
202269,"VWR INTERNATIONAL, INC.",100496-496 (EA),True,37.51,FISHER SCIENTIFIC,Electron Microscopy Sciences,50980495,34.2700,4756.08
129871,"VWR INTERNATIONAL, INC.",82050-482 (CS),True,31.45,"E&K SCIENTIFIC PRODUCTS, INC.",GREINER BIO-ONE,EK-34180,27.0500,4691.59
129485,"VWR INTERNATIONAL, INC.",82050-842 (CS),True,77.37,FISHER SCIENTIFIC,GREINER BIO-ONE,07000208,76.1400,3974.88


In [4]:
[i for i in DataSet]

['PO ID',
 'PO #',
 'Creation Date',
 'Original Revision Date',
 'Last Revision Date',
 'Last Distribution Date',
 'Workflow Completion Date',
 'PO Closed Date',
 'Workflow Folder/Step Name',
 'Approver/Assignee Name',
 'Supplier ID',
 'Supplier Name',
 'Supplier Number',
 'Supplier Preference',
 'Suppl-Cust Acct #',
 'PR Line ID',
 'PO Line ID',
 'PO Line #',
 'Item Type',
 'Spot Buy Flag',
 'Form Type',
 'SKU/Catalog #',
 'Product Description',
 'Manufacturer',
 'Mfr Catalog #',
 'Amount/UOM & UOM',
 'Product Size',
 'Category Preference',
 'Category Level 1',
 'Category Level 2',
 'Category Level 3',
 'Category Level 4',
 'Category Level 5',
 'Category Name',
 'CAS #',
 'UNSPSC',
 'Commodity Code',
 'Radioactive',
 'Hazmat',
 'Controlled',
 'RadMinor',
 'Select Agent',
 'Toxin',
 'Recycled',
 'Green Product',
 'Green Product Description',
 'LEED Compliance Details',
 'Energy Star',
 'ProdFlag 10',
 'Quantity',
 'Unit Price',
 'Unit Price Date',
 'Extended Price',
 'List Price',
 'Li

In [6]:
DataSet.groupby(['Supplier Name','Form Type']).agg({'Extended Price': sum,
                                      'PO #': UniqueCount}).sort_values('Extended Price', ascending=False).to_csv('C:/Users/Aclark/Desktop/suppliers.csv')

In [21]:
DataSet['Commodity Code'].unique()

array([ 70,  25,  12, 107, 198, 111, 278, 130, 307, 332, 323,  27, 195,
       134,   5, 170,  82,  93, 150, 329, 166, 226, 322, 256, 147, 205,
       184, 208, 109, 330, 243, 331,  85, 262,  17,  89, 120,  55, 325,
       188, 298, 327, 265, 227, 328, 258, 324, 326, 333], dtype=int64)