In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your dataset
file_path = "../../data/input/Freight_Cost_Analysis_CY2024-03.25.csv"
df = pd.read_csv(file_path, encoding="latin1", low_memory=False)
df.head()

In [None]:
df.columns

Index(['SITE', 'SITE DESCRIPTION', 'SUPPLIER NO', 'SUPPLIER NAME',
       'INVOICE ID', 'INVOICE NO', 'DATE POSTED', 'PROJECT ID', 'PROJECT NAME',
       'ACCOUNT', 'ACCOUNT DESCRIPTION', 'PLANNED DELIVERY DATE',
       'SHIP TO ZIP', 'PO NO', 'PO LINE NO', 'PO REL NO', 'RECEIPT NO',
       'PART NO', 'PART DESCRIPTION', 'COMM 1', 'COMM 2', 'PO PURCH QTY',
       'PURCH UOM', 'PO INV QTY', 'INV UOM', 'INVOICED LINE QTY',
       'INVOICE LINE TOTAL', 'PO PRICE'],
      dtype='object')

In [None]:
summary = df.groupby(['ACCOUNT', 'ACCOUNT DESCRIPTION'])['PROJECT ID','INVOICE ID','PO NO'].nunique().reset_index().sort_values(by='INVOICE ID', ascending=False)
summary.rename(columns={'INVOICE ID': 'Unique Invoice Count'}, inplace=True)
summary

  summary = df.groupby(['ACCOUNT', 'ACCOUNT DESCRIPTION'])['PROJECT ID','INVOICE ID','PO NO'].nunique().reset_index().sort_values(by='INVOICE ID', ascending=False)


Unnamed: 0,ACCOUNT,ACCOUNT DESCRIPTION,PROJECT ID,Unique Invoice Count,PO NO
23,2008,Received Not Yet Invoiced,26528,152613,138849
72,5504,PROJECT Freight,17863,55942,45783
66,5205,PROJECT PO Variance M181 & M182,11269,24488,22183
63,5200,PROJECT Supplies and Materials,3723,7403,3868
28,2015,Sales Tax Payable,2246,7069,4040
...,...,...,...,...,...
59,3000,Distribution (Kiefers),0,1,0
122,7200,Accrued Credit Card Expense,0,1,0
1,1225,Other Receivable,0,1,0
86,5851,COGS - Procurement Services,0,1,1


In [None]:
pivot_table = df.pivot_table(
    index=['PROJECT NAME','PROJECT ID', 'INVOICE ID','SUPPLIER NO', 'SUPPLIER NAME'],
    values=['PO NO', 'INVOICE NO'],
    aggfunc=pd.Series.nunique
).sort_values(by=['PROJECT ID', 'INVOICE ID']).reset_index()

pivot_table

Unnamed: 0,PROJECT NAME,PROJECT ID,INVOICE ID,SUPPLIER NO,SUPPLIER NAME,INVOICE NO,PO NO
0,AMITA Health Holding,1000-D,442358,108164,"Mannington Mills, Inc.",1,1
1,AMITA Health Holding,1000-D,448262,125814,All Tile Holdings LLC,1,0
2,AMITA Health Holding,1000-D,448663,890,All Surfaces,1,1
3,AMITA Health Holding,1000-D,462578,100613,"Florstar Sales, Inc",1,1
4,AMITA Health Holding,1000-D,489433,100613,"Florstar Sales, Inc",1,1
...,...,...,...,...,...,...,...
161190,Longvine,SPWV33527,472107,123788,Castle Rock Interior Solutions LLC,1,1
161191,Longvine,SPWV33527,481195,103121,Professional Flooring Supply Co,1,1
161192,Longvine,SPWV33527,491662,124876,J & M Installers LLC,1,1
161193,Longvine,SPWV33527,503059,121591,Custom Floors,1,1


In [None]:
# Filter rows where ACCOUNT is 5504
matching_invoices = df[df['ACCOUNT'] == 5504]

# Check if INVOICE ID exists in the filtered DataFrame
df['Has Matching Account 5504'] = df['INVOICE ID'].isin(matching_invoices['INVOICE ID'])

# Display the result
df[['INVOICE ID', 'Has Matching Account 5504']]

Unnamed: 0,INVOICE ID,Has Matching Account 5504
0,433731,False
1,433340,True
2,433340,True
3,433340,True
4,433340,True
...,...,...
441702,1731885,False
441703,1731996,False
441704,1732937,False
441705,1736749,False


In [None]:
df.head(2)

Unnamed: 0,SITE,SITE DESCRIPTION,SUPPLIER NO,SUPPLIER NAME,INVOICE ID,INVOICE NO,DATE POSTED,PROJECT ID,PROJECT NAME,ACCOUNT,...,COMM 1,COMM 2,PO PURCH QTY,PURCH UOM,PO INV QTY,INV UOM,INVOICED LINE QTY,INVOICE LINE TOTAL,PO PRICE,Has Matching Account 5504
0,BNB,Beckers New Brighton,102548,Lonseal Flooring,433731,0007795-CM,3-Jan-24,2311121922,REGIONS HOSPITAL 4TH MRI,5400,...,,,,,,,,-600.0,,False
1,BNB,Beckers New Brighton,104716,Hank's Specialties,433340,173373,3-Jan-24,2312127706,UOFM MOLECULAR & CELLULAR BIOLOGY P,2008,...,1ACC,1ACC,4.0,EA,4.0,EA,4.0,18.4,18.4,True


In [None]:
# Merge the 'Has Matching Account 5504' column from df into the pivot_table
pivot_table = pivot_table.merge(
    df[['INVOICE ID', 'Has Matching Account 5504']],
    on='INVOICE ID',
    how='left'
)

# Display the updated pivot_table
pivot_table

Unnamed: 0,PROJECT NAME,PROJECT ID,INVOICE ID,SUPPLIER NO,SUPPLIER NAME,INVOICE NO,PO NO,Exists in Pivot Table,Has Matching Account 5504_x,Has Matching Account 5504_y
0,AMITA Health Holding,1000-D,442358,108164,"Mannington Mills, Inc.",1,1,,True,True
1,AMITA Health Holding,1000-D,442358,108164,"Mannington Mills, Inc.",1,1,,True,True
2,AMITA Health Holding,1000-D,442358,108164,"Mannington Mills, Inc.",1,1,,True,True
3,AMITA Health Holding,1000-D,442358,108164,"Mannington Mills, Inc.",1,1,,True,True
4,AMITA Health Holding,1000-D,448262,125814,All Tile Holdings LLC,1,0,,False,False
...,...,...,...,...,...,...,...,...,...,...
1664346,Longvine,SPWV33527,503059,121591,Custom Floors,1,1,True,False,False
1664347,Longvine,SPWV33527,503059,121591,Custom Floors,1,1,True,False,False
1664348,Longvine,SPWV33527,503059,121591,Custom Floors,1,1,True,False,False
1664349,Longvine,SPWV33527,503059,121591,Custom Floors,1,1,True,False,False
