In [1]:
# Packages
import os
import pandas as pd
from google.cloud import bigquery
from google.oauth2 import service_account

# Authenticate GCP/BQ and connect
credentials = service_account.Credentials.from_service_account_file(os.environ['gcp_credentials'])
project_id = 'freightwaves-data-science'
client = bigquery.Client(credentials=credentials, project=project_id)





In [2]:
# List of accessorial charges, ordered by number of occurrences
query_job = client.query("""
    select
      accessorial_charge_description
      ,count(accessorial_charge_description) count
    from `freightwaves-data-factory.warehouse.beetlejuice`
    group by accessorial_charge_description
    order by count(accessorial_charge_description) desc
    """)

query = query_job.result().to_dataframe()
query.head()



Unnamed: 0,accessorial_charge_description,count
0,FUEL SURCHARGE,75255549
1,BASE CHARGE,25097381
2,FUEL CHARGE,19805842
3,DISCOUNT,11886246
4,BASE CHARGE ...,9760971


In [3]:
# Percentage of all loads that have accessorial charges besides base & fuel
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
  from `freightwaves-data-factory.warehouse.beetlejuice`
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
)

select
  true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on 1=1
""")

query = query_job.result().to_dataframe()
query.head()


Unnamed: 0,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,47454195,113217127,0.419143


In [4]:
# Percentage of all loads that have accessorial charges besides base & fuel, grouped by transportation mode
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
    ,transportation_mode_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  group by transportation_mode_description
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
    ,transportation_mode_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
  group by transportation_mode_description
)

select
  al.transportation_mode_description
  ,true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on al.transportation_mode_description = true_accessorial.transportation_mode_description
order by safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) desc
""")

query = query_job.result().to_dataframe()
query.head()


Unnamed: 0,transportation_mode_description,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,TOWAWAY SERVICE,1,1,1.0
1,BREAKBULK OCEAN,69,69,1.0
2,LEASE CARS,2,2,1.0
3,CUST PICKUP/EXPENSE,52883,52883,1.0
4,RAIL FLAT CAR,7,7,1.0


In [5]:
# Percentage of all loads that have accessorial charges besides base & fuel, grouped by origin and destination state
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
    ,origin_state
    ,destination_state
  from `freightwaves-data-factory.warehouse.beetlejuice`
  group by origin_state
    ,destination_state
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
    ,origin_state
    ,destination_state
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
  group by origin_state, destination_state
)

select
  al.origin_state
  ,al.destination_state
  ,true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on al.origin_state = true_accessorial.origin_state
  and al.destination_state = true_accessorial.destination_state
order by safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) desc, origin_state, destination_state
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,origin_state,destination_state,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,*,KY,1,1,1.0
1,-,CO,4,4,1.0
2,.,.,2,2,1.0
3,.,KA,20,20,1.0
4,.,WE,2,2,1.0


In [6]:
# Percentage of all loads that have accessorial charges besides base & fuel, grouped by primary_naics_code
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
    ,primary_naics_code
  from `freightwaves-data-factory.warehouse.beetlejuice`
  group by primary_naics_code
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
    ,primary_naics_code
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
  group by primary_naics_code
)

select
  al.primary_naics_code
  ,true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on al.primary_naics_code = true_accessorial.primary_naics_code
order by safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) desc
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,primary_naics_code,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,442299,3195,3195,1.0
1,323111,24,24,1.0
2,312112,4736,4736,1.0
3,336340,17,17,1.0
4,334419,71,71,1.0


In [14]:
# Total accessorial charges divided by total amount paid, by state-to-state lane
query_job = client.query("""
with accessorial_charges as (
  select 
    sum(accessorial_charge_amount) as accessorial_charges_sum
    ,cass_shipment_id
    ,origin_state
    ,destination_state
    ,accessorial_charge_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
    and amount_paid is not null
  group by
    cass_shipment_id
    ,origin_state
    ,destination_state
    ,accessorial_charge_description
  order by cass_shipment_id
),
total_accessorial_charges as (
  select
    sum(ac.accessorial_charges_sum) as total_accessorial_charges
    ,ac.origin_state
    ,ac.destination_state
  from accessorial_charges as ac
  group by ac.origin_state, ac.destination_state
),
amount_paid as (
  select
    max(amount_paid) as amount_paid
    ,cass_shipment_id
    ,origin_state
    ,destination_state
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where amount_paid is not null
  group by
    cass_shipment_id
    ,origin_state
    ,destination_state
  order by cass_shipment_id
),
total_amount_paid as (
  select
    sum(ap.amount_paid) total_amount_paid
    ,ap.origin_state
    ,ap.destination_state
  from amount_paid as ap
  group by ap.origin_state, ap.destination_state
)

select
  t_acc.total_accessorial_charges
  ,t_amt.total_amount_paid
  ,safe_divide(t_acc.total_accessorial_charges, t_amt.total_amount_paid) as pct_of_total_charge
  ,t_acc.origin_state
  ,t_acc.destination_state
from total_accessorial_charges as t_acc
join total_amount_paid as t_amt
  on t_acc.origin_state = t_amt.origin_state
    and t_acc.destination_state = t_amt.destination_state
order by safe_divide(t_acc.total_accessorial_charges, t_amt.total_amount_paid) desc
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,total_accessorial_charges,total_amount_paid,pct_of_total_charge,origin_state,destination_state
0,105.66,10.0,10.566,AB,DK
1,40.0,4.0,10.0,CA,HM
2,1023.57,113.03,9.055737,IN,NW
3,223.76,26.0,8.606154,CH,26
4,3286.04,494.78,6.641416,OR,38


In [13]:
# Total accessorial charges divided by total amount paid, by shipper
query_job = client.query("""
with accessorial_charges as (
  select 
    sum(accessorial_charge_amount) as accessorial_charges_sum
    ,cass_shipment_id
    ,shipper_master_code
    ,accessorial_charge_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
    and amount_paid is not null
  group by
    cass_shipment_id
    ,shipper_master_code
    ,accessorial_charge_description
  order by cass_shipment_id
),
total_accessorial_charges as (
  select
    sum(ac.accessorial_charges_sum) as total_accessorial_charges
    ,ac.shipper_master_code
  from accessorial_charges as ac
  group by ac.shipper_master_code
),
amount_paid as (
  select
    max(amount_paid) as amount_paid
    ,cass_shipment_id
    ,shipper_master_code
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where amount_paid is not null
  group by
    cass_shipment_id
    ,shipper_master_code
  order by cass_shipment_id
),
total_amount_paid as (
  select
    sum(ap.amount_paid) total_amount_paid
    ,ap.shipper_master_code
  from amount_paid as ap
  group by ap.shipper_master_code
)

select
  t_acc.total_accessorial_charges
  ,t_amt.total_amount_paid
  ,safe_divide(t_acc.total_accessorial_charges, t_amt.total_amount_paid) as pct_of_total_charge
  ,t_acc.shipper_master_code
from total_accessorial_charges as t_acc
join total_amount_paid as t_amt
  on t_acc.shipper_master_code = t_amt.shipper_master_code
order by safe_divide(t_acc.total_accessorial_charges, t_amt.total_amount_paid) desc
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,total_accessorial_charges,total_amount_paid,pct_of_total_charge,shipper_master_code
0,544288200.0,337753500.0,1.611496,1462
1,34941280.0,23255280.0,1.502509,1451
2,3680420000.0,3004877000.0,1.224815,1452
3,281571400.0,235246600.0,1.19692,1446
4,235137100.0,228976200.0,1.026906,1361


In [12]:
# Total accessorial charges divided by total amount paid, by carrier
query_job = client.query("""
with accessorial_charges as (
  select 
    sum(accessorial_charge_amount) as accessorial_charges_sum
    ,cass_shipment_id
    ,unique_carrier_id
    ,accessorial_charge_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
    and amount_paid is not null
  group by
    cass_shipment_id
    ,unique_carrier_id
    ,accessorial_charge_description
  order by cass_shipment_id
),
total_accessorial_charges as (
  select
    sum(ac.accessorial_charges_sum) as total_accessorial_charges
    ,ac.unique_carrier_id
  from accessorial_charges as ac
  group by ac.unique_carrier_id
),
amount_paid as (
  select
    max(amount_paid) as amount_paid
    ,cass_shipment_id
    ,unique_carrier_id
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where amount_paid is not null
  group by
    cass_shipment_id
    ,unique_carrier_id
  order by cass_shipment_id
),
total_amount_paid as (
  select
    sum(ap.amount_paid) total_amount_paid
    ,ap.unique_carrier_id
  from amount_paid as ap
  group by ap.unique_carrier_id
)

select
  t_acc.total_accessorial_charges
  ,t_amt.total_amount_paid
  ,safe_divide(t_acc.total_accessorial_charges, t_amt.total_amount_paid) as pct_of_total_charge
  ,t_acc.unique_carrier_id
from total_accessorial_charges as t_acc
join total_amount_paid as t_amt
  on t_acc.unique_carrier_id = t_amt.unique_carrier_id
order by safe_divide(t_acc.total_accessorial_charges, t_amt.total_amount_paid) desc
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,total_accessorial_charges,total_amount_paid,pct_of_total_charge,unique_carrier_id
0,150.0,1.0,150.0,TL26164R26164
1,45.0,1.0,45.0,FR61702F16511
2,740933.8,23852.84,31.062708,AR60666AM9W5S
3,63.0,7.0,9.0,IS60197V28341
4,20831.25,3956.79,5.264684,PY61437P61437
