This program is designed for the Fed-Ex Shipping report for Goodwill Central Coast E-commerce Department. It takes the Fed-Ex shipping report spreadsheet(xls format) and provides the following: Aggregated shipping totals amounts by invoice #, aggregated shipping totals for a specified date range, and aggregated residential shipping charges for specified date range. 

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_excel("fed_ex_05-10.xls")

Data Cleaning & Organizing 

In [3]:
#Reduce df to columns that are needed to simplify analysis
df = df[["Invoice Date", "Shipment Date", "Invoice Number","Express or Ground Tracking ID", "Original Customer Reference", 
        "Transportation Charge Amount", 'Tracking ID Charge Description',
 'Tracking ID Charge Amount',
 'Tracking ID Charge Description.1',
 'Tracking ID Charge Amount.1',
 'Tracking ID Charge Description.2',
 'Tracking ID Charge Amount.2',
 'Tracking ID Charge Description.3',
 'Tracking ID Charge Amount.3',
 'Tracking ID Charge Description.4',
 'Tracking ID Charge Amount.4',
 'Tracking ID Charge Description.5',
 'Tracking ID Charge Amount.5',
 'Tracking ID Charge Description.6',
 'Tracking ID Charge Amount.6',
 'Tracking ID Charge Description.7',
 'Tracking ID Charge Amount.7', "Net Charge Amount"]]

In [4]:
df["Invoice Date"] = df["Invoice Date"].astype(str) #convert to date
df["Invoice Date"] = pd.to_datetime(df["Invoice Date"])

df["Shipment Date"] = df["Shipment Date"].astype(str) #convert to date
df["Shipment Date"] = pd.to_datetime(df["Shipment Date"])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16750 entries, 0 to 16749
Data columns (total 23 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Invoice Date                      16750 non-null  datetime64[ns]
 1   Shipment Date                     16750 non-null  datetime64[ns]
 2   Invoice Number                    16750 non-null  int64         
 3   Express or Ground Tracking ID     16750 non-null  int64         
 4   Original Customer Reference       16729 non-null  object        
 5   Transportation Charge Amount      16739 non-null  float64       
 6   Tracking ID Charge Description    10209 non-null  object        
 7   Tracking ID Charge Amount         10209 non-null  float64       
 8   Tracking ID Charge Description.1  3851 non-null   object        
 9   Tracking ID Charge Amount.1       3851 non-null   float64       
 10  Tracking ID Charge Description.2  741 non-null

Aggregated Total Charges by Invoice #

In [6]:
invoice_nums = list(df["Invoice Number"].unique()) #create list of all invoice numbers
for i in invoice_nums: #for loop for each of the invoice numbers
    df_invoice = df[df["Invoice Number"]== i] 

    print("INVOICE NUMBER", i)
    print("Transportation Base:", df_invoice["Transportation Charge Amount"].sum())

    #takes all Tracking id columns and stacks them on top of each other creating one long column
    id_series = pd.concat([df_invoice['Tracking ID Charge Description'], 
                           df_invoice['Tracking ID Charge Description.1'], 
                           df_invoice['Tracking ID Charge Description.2'], 
                                      df_invoice['Tracking ID Charge Description.3'], 
                                      df_invoice['Tracking ID Charge Description.4'],
                                      df_invoice['Tracking ID Charge Description.5'],
                                      df_invoice['Tracking ID Charge Description.6']]) #Create a series of all id columns
    #Takes tracking amounts and stacks them on each other creating one long column
    amount_series = pd.concat([df_invoice['Tracking ID Charge Amount'], 
                               df_invoice['Tracking ID Charge Amount.1'], 
                               df_invoice['Tracking ID Charge Amount.2'], 
                               df_invoice['Tracking ID Charge Amount.3'], 
                               df_invoice['Tracking ID Charge Amount.4'],
                                df_invoice["Tracking ID Charge Amount.5"], 
                               df_invoice["Tracking ID Charge Amount.6"]]) #create a series of all amount columns

    #combined above amount & id into df
    combo = pd.DataFrame(pd.concat([id_series, amount_series], axis = 1)) 

    combo.rename(columns={1:"Totals", 0:"Additional_Charges"}, inplace=True) #renamed columns

    #groups the new df by the Extra Charges and aggregates (sum) each category
    print(combo["Totals"].groupby(combo["Additional_Charges"]).sum().sort_values(ascending=False)) 

    #grouped and sorted to get aggregate values for each shipping charge for March 
    print()
    print("Total 'Extra Charges' NOT including Transportation Base:", combo["Totals"].sum())

    print("Total Transportation Base + 'Extra Charges':", df_invoice["Transportation Charge Amount"].sum() + combo["Totals"].sum())

    print("Total Net Shipping Charges:", df_invoice["Net Charge Amount"].sum())
    print('*********************************************************************************')

INVOICE NUMBER 762252859
Transportation Base: 2547.37
Additional_Charges
Declared Value         178.00
DAS Resi               115.50
DAS Extended Resi       49.95
Adult Signature          8.32
DAS Comm                 8.25
Additional Handling      4.50
Name: Totals, dtype: float64

Total 'Extra Charges' NOT including Transportation Base: 364.52
Total Transportation Base + 'Extra Charges': 2911.89
Total Net Shipping Charges: 2911.8900000000003
*********************************************************************************
INVOICE NUMBER 762953919
Transportation Base: 4183.92
Additional_Charges
Declared Value         254.00
DAS Resi               193.05
DAS Extended Resi      101.75
Oversize Charge         61.79
Additional Handling     13.50
DAS Comm                11.55
AHS - Dimensions         4.50
Adult Signature          4.16
DAS Extended Comm        3.70
Name: Totals, dtype: float64

Total 'Extra Charges' NOT including Transportation Base: 648.0
Total Transportation Base + 'Extra 

Shipment Date Filter

In [7]:
date_range_ship = df[df["Shipment Date"].between('2022-04-01','2022-04-30')] #filter by date range

In [8]:
#Create a series of all id columns id_april_shipment = pd.concat([date_range_ship["Tracking_id_1"], date_range_ship["Tracking_id_2"], date_range_ship["Tracking_id_3"], 
id_april_shipment = pd.concat([date_range_ship['Tracking ID Charge Description'], date_range_ship['Tracking ID Charge Description.1'], 
date_range_ship['Tracking ID Charge Description.2'], date_range_ship['Tracking ID Charge Description.3'], 
date_range_ship['Tracking ID Charge Description.4'], date_range_ship['Tracking ID Charge Description.5'],
date_range_ship['Tracking ID Charge Description.6']])
 
#Takes tracking amounts and stacks them on each other creating one long column
amount_april_shipment = pd.concat([date_range_ship['Tracking ID Charge Amount'], 
date_range_ship['Tracking ID Charge Amount.1'], date_range_ship['Tracking ID Charge Amount.2'], 
date_range_ship['Tracking ID Charge Amount.3'], date_range_ship['Tracking ID Charge Amount.4'],
date_range_ship['Tracking ID Charge Amount.5'], date_range_ship['Tracking ID Charge Amount.6']]) 

combo_april = pd.DataFrame(pd.concat([id_april_shipment, amount_april_shipment], axis = 1)) #combined above amount & id into df
combo_april.rename(columns={1:"Totals", 0:"Additional_Charges"}, inplace=True)
print("TOTALS FOR ALL INVOICES WITHIN SPECIFIED SHIPPING DATE RANGE")
print("TRANSPORTATION BASE CHARGE:", date_range_ship["Transportation Charge Amount"].sum())
print(combo_april["Totals"].groupby(combo_april["Additional_Charges"]).sum().sort_values(ascending=False)) 
print("*****************************************************************************************")
print("Total 'Extra Charges' NOT including Transportation Base:", combo_april["Totals"].sum())
print("April Shipment TOTALS:", date_range_ship["Transportation Charge Amount"].sum() + combo_april["Totals"].sum())
print("Net Charge Amount:",date_range_ship["Net Charge Amount"].sum())

TOTALS FOR ALL INVOICES WITHIN SPECIFIED SHIPPING DATE RANGE
TRANSPORTATION BASE CHARGE: 20555.9
Additional_Charges
Residential            2858.30
Declared Value         1124.24
DAS Resi                787.76
DAS Extended Resi       488.76
Address Correction      163.24
Unauthorized OS         129.56
Oversize Charge         128.82
DAS Comm                 80.84
AHS - Dimensions         40.47
Additional Handling      31.53
Adult Signature          25.92
DAS Extended Comm        13.44
Direct Signature         13.00
DAS Hawaii Comm           6.25
Indirect Signature        4.32
Name: Totals, dtype: float64
*****************************************************************************************
Total 'Extra Charges' NOT including Transportation Base: 5896.45
April Shipment TOTALS: 26452.350000000002
Net Charge Amount: 26452.35


In [9]:
date_range_ship["Shipment Date"].max() #check to see latest date in report 

Timestamp('2022-04-29 00:00:00')

In [10]:
df["Invoice Date"].max() #Check to see latest invoice date

Timestamp('2022-05-06 00:00:00')

Residential Charges for Date Range 

In [11]:
#Create seperate df for each of the columns and concat together

id_april_shipment_1 = date_range_ship[["Shipment Date", 'Tracking ID Charge Description']]

id_april_shipment_2 = date_range_ship[["Shipment Date",'Tracking ID Charge Description.1']]

id_april_shipment_3 = date_range_ship[["Shipment Date", 'Tracking ID Charge Description.2']]

id_april_shipment_4 = date_range_ship[["Shipment Date", 'Tracking ID Charge Description.3']]

id_april_shipment_5 = date_range_ship[["Shipment Date", 'Tracking ID Charge Description.4']]

id_april_shipment_6 = date_range_ship[["Shipment Date", 'Tracking ID Charge Description.5']]

id_april_shipment_7 = date_range_ship[["Shipment Date", 'Tracking ID Charge Description.6']]

#concat above columns for Shipment Dates
date_april_totals  = pd.concat([id_april_shipment_1["Shipment Date"], id_april_shipment_2["Shipment Date"],
                              id_april_shipment_3["Shipment Date"],
                              id_april_shipment_4["Shipment Date"], id_april_shipment_5["Shipment Date"], id_april_shipment_6["Shipment Date"],
                              id_april_shipment_7["Shipment Date"]])

#concat above columns for shipment description
id_april_totals  = pd.concat([id_april_shipment_1['Tracking ID Charge Description'], id_april_shipment_2['Tracking ID Charge Description.1'],
                              id_april_shipment_3['Tracking ID Charge Description.2'],
                              id_april_shipment_4['Tracking ID Charge Description.3'], id_april_shipment_5['Tracking ID Charge Description.4'], 
                              id_april_shipment_6['Tracking ID Charge Description.5'],
                              id_april_shipment_7['Tracking ID Charge Description.6']])
#concat above for final df 
total_res = pd.concat([date_april_totals, id_april_totals], axis = 1)
total_res.rename(columns={0:"Shipment_Description"}, inplace=True)


In [12]:
#Filter for all Residential Charges 
Residential_Charges = total_res[total_res["Shipment_Description"]=='Residential'].sort_values(by="Shipment Date", ascending=False)

In [13]:
Residential_Charges.sort_values(by='Shipment Date')

Unnamed: 0,Shipment Date,Shipment_Description
13130,2022-04-01,Residential
13746,2022-04-01,Residential
13780,2022-04-01,Residential
13784,2022-04-01,Residential
13790,2022-04-01,Residential
...,...,...
16735,2022-04-28,Residential
16728,2022-04-28,Residential
16727,2022-04-28,Residential
16740,2022-04-29,Residential


In [14]:
len(Residential_Charges) #Total Residential Charges in specified date range

1415