In [2]:
# Packages
import os
import pandas as pd
from google.cloud import bigquery
from google.oauth2 import service_account

# Authenticate GCP/BQ and connect
credentials = service_account.Credentials.from_service_account_file(os.environ['gcp_credentials'])
project_id = 'freightwaves-data-science'
client = bigquery.Client(credentials=credentials, project=project_id)



In [12]:
# List of accessorial charges, ordered by number of occurrences
query_job = client.query("""
    select
      accessorial_charge_description
      ,count(accessorial_charge_description) count
    from `freightwaves-data-factory.warehouse.beetlejuice`
    group by accessorial_charge_description
    order by count(accessorial_charge_description) desc
    """)

query = query_job.result().to_dataframe()
query.head()



Unnamed: 0,accessorial_charge_description,count
0,FUEL SURCHARGE,75219781
1,BASE CHARGE,25023829
2,FUEL CHARGE,19625809
3,DISCOUNT,11861836
4,BASE CHARGE ...,9760971


In [11]:
# Percentage of all loads that have accessorial charges besides base & fuel
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
  from `freightwaves-data-factory.warehouse.beetlejuice`
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
)

select
  true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on 1=1
""")

query = query_job.result().to_dataframe()
query.head()


Unnamed: 0,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,47222308,112961602,0.418039


In [13]:
# Percentage of all loads that have accessorial charges besides base & fuel, grouped by transportation mode
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
    ,transportation_mode_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  group by transportation_mode_description
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
    ,transportation_mode_description
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
  group by transportation_mode_description
)

select
  al.transportation_mode_description
  ,true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on al.transportation_mode_description = true_accessorial.transportation_mode_description
order by safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) desc
""")

query = query_job.result().to_dataframe()
query.head()


Unnamed: 0,transportation_mode_description,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,BACKHAUL,66,66,1.0
1,RAIL FLAT CAR,7,7,1.0
2,MILITARY AIRLIFT SVC,30,30,1.0
3,BREAKBULK OCEAN,69,69,1.0
4,WAREHOUSE,785,785,1.0


In [15]:
# Percentage of all loads that have accessorial charges besides base & fuel, grouped by origin and destination state
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
    ,origin_state
    ,destination_state
  from `freightwaves-data-factory.warehouse.beetlejuice`
  group by origin_state
    ,destination_state
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
    ,origin_state
    ,destination_state
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
  group by origin_state
    ,destination_state
)

select
  al.origin_state
  ,al.destination_state
  ,true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on al.origin_state = true_accessorial.origin_state
  and al.destination_state = true_accessorial.destination_state
order by safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) desc, origin_state, destination_state
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,origin_state,destination_state,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,*,KY,1,1,1.0
1,-,CO,4,4,1.0
2,.,.,2,2,1.0
3,.,KA,20,20,1.0
4,.,WE,2,2,1.0


In [16]:
# Percentage of all loads that have accessorial charges besides base & fuel, grouped by primary_naics_code
query_job = client.query("""
with all_loads as (
  select 
    count(distinct cass_shipment_id) as all_loads
    ,primary_naics_code
  from `freightwaves-data-factory.warehouse.beetlejuice`
  group by primary_naics_code
),
loads_with_true_accessorial as (
  select
    count(distinct cass_shipment_id) as loads_with_true_accessorial
    ,primary_naics_code
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
  group by primary_naics_code
)

select
  al.primary_naics_code
  ,true_accessorial.loads_with_true_accessorial
  ,al.all_loads
  ,safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) as pct_loads_with_accessorial
from all_loads as al
join loads_with_true_accessorial as true_accessorial
  on al.primary_naics_code = true_accessorial.primary_naics_code
order by safe_divide(true_accessorial.loads_with_true_accessorial, al.all_loads) desc
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,primary_naics_code,loads_with_true_accessorial,all_loads,pct_loads_with_accessorial
0,237110,2,2,1.0
1,312112,4736,4736,1.0
2,336340,17,17,1.0
3,334419,71,71,1.0
4,442299,3195,3195,1.0


In [17]:
# Total accessorial charges divided by total amount paid, by state-to-state lane
query_job = client.query("""
with accessorial_charges_summed as (
  select
    sum(accessorial_charge_amount) as total_accessorial_charges
    ,amount_paid
    ,origin_state
    ,destination_state
  from `freightwaves-data-factory.warehouse.beetlejuice`
  where accessorial_charge_description not like "%Fuel%"
    and accessorial_charge_description not like "%FUEL%"
    and accessorial_charge_description not like "%BASE%"
    and origin_state is not null
    and destination_state is not null
  group by 
    amount_paid
    ,origin_state
    ,destination_state
  order by origin_state, destination_state
)

select
  sum(total_accessorial_charges) as total_accessorial_charges
  ,sum(amount_paid) as total_amount_paid
  ,safe_divide(sum(accessorial_charges_summed.total_accessorial_charges), sum(amount_paid)) as pct_of_total_charge
  ,origin_state
  ,destination_state
from accessorial_charges_summed
group by origin_state, destination_state
order by safe_divide(sum(accessorial_charges_summed.total_accessorial_charges), sum(amount_paid)) desc
""")

query = query_job.result().to_dataframe()
query.head()

Unnamed: 0,total_accessorial_charges,total_amount_paid,pct_of_total_charge,origin_state,destination_state
0,14790.0,85.0,174.0,37,IN
1,14689177.28,139234.88,105.499263,TN,UP
2,6675.0,75.0,89.0,NT,MI
3,3235.0,40.0,80.875,CA,42
4,13080.0,180.0,72.666667,PU,CT
