# Import libraries

In [1]:
import vortexasdk as v
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import dateutil.relativedelta
import time
import plotly.express as px
import plotly.graph_objects as go

# Search Product & Geography IDs
Use the code snippets below to search and save Vortexa product and geography IDs.


In [2]:
# search for product ids (remove hashtags below to search)

# product_search = v.Products().search(term=['Naphtha']).to_df()
# print (product_search.to_string(index=False))

cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'
lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'
dpp='5de0b00094e0fd7542c10f9f8a71b4008d55750f21dc905cda9b0f7f5f76bc08'
crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'
naphtha='3e4db72ef7027de928ce55703a213a546fd86d2debe6f2e9c85f3a5f9d53e8dd'

# search for geography ids (remove hashtags to search)

# full_length_df = v.Geographies().search(term=["Norway"]).to_df()
# print(full_length_df.to_string(index=False))

meg='0427e0f9d52b38c1a98b68c59b8fd80cb1c508e44882e96611e48ef5b140d927'
atlantic_basin='67be333ea46a3cd312e207b05e2c5c4502a9353e39ec81642a93e3b23cc3c10a'
pacific_basin='715b6aab56723cb55c3ddbdc12a2851b379919010f119cf98765d4debbdb793d'
eos='9247125b0b7d8c78d68fb91a5e1ec0cd1b1c4f7276e3385dbe197004b6081229'
wos='1c9a4323f8d513ab37238bfbbcf02e39621791c1e941a286d2342ef7359a8f57'
basins=[atlantic_basin, pacific_basin]
hemispheres=[eos, wos]
wc_india='b18ac6665f23d0eaca2015d3e7924f668f606b26489ae055ec4e7916dfd8cc03'
europe='f39d455f5d38907394d6da3a91da4e391f9a34bd6a17e826d6042761067e88f4'
nwe='c5460c5a4ece7b64ffc0cc280aeade60d364423e8e062ef4a11494352fe6fdbb'
usac='2d8f42426b74af03caa9055df1952d22a011f2a210b53b9132955a89fc552433'
padd3='4e79eb8e84d26f5c3c0006283bc1aa52c170b58d667be9848e136afea91a57e9'
gom='37c8c4eeb730d1cd41f90ca6bf95c923222b0734b1b0336a475acce821f87ebd'

# Functions
*The below code contains functions for processing Vortexa data for visualisation. Run the cell below to define functions needed for the examples below. The functions include:*

**get_search_blocks:** creates 4-year time blocks for querying data over a longer period than the 4-year limit.

**moving_average:** calculates moving averages of numerical data.

**vessel_availability:** queries and plots vessel vailability data.

**voyages_time_series_with_split:** queries and plots voyages data, with the option to split.

**post_ballast_distribution:** queries and plots tanker activity after discharging on a specified laden route.

**fleet_distribution_per_location:** queries and plots vessel counts per selected location.

**voyages_time_series:** queries voyages time series data and converts to format required for seasonal data.

**seasonal_charts:** converts time series data to seasonal data (min, max, average and previous year).

**plot_seasonal:** plots seasonal charts.

**complete_seasonal_voyages:** one function which queries voyages time series data, converts to seasonal data and plots.

**prop_per_status:** queries and plots fleet activity per operational status, with the option to view vessel counts or fleet percentage.


In [3]:
# Helper function to make time blocks of 4 years from a specified start date
def get_search_blocks(start_y, start_m, start_d, today):
    
    """
    Vortexa's API maximum search is 4 years and starts in 2016. 
    This function creates a list of tuples splitting up start_date - present into 4-year blocks.
    """
    
    blocks=[]
    
    start=datetime(start_y, start_m, start_d)
    end=start + dateutil.relativedelta.relativedelta(years=4) - dateutil.relativedelta.relativedelta(seconds=1)
    
    if end > today:
        blocks.append((start, today))
        
    else:
        blocks.append((start, end))
    
    while end < today:
        start+=dateutil.relativedelta.relativedelta(years=4) 
        end+=dateutil.relativedelta.relativedelta(years=4)
        
        if end > today:
            blocks.append((start, today))
            
        else:  
            blocks.append((start, end))
    
        
    return blocks

# function to create a moving average
def moving_average(data, period, option, col='value'):
    
    if option=='multiple':

        # calculate moving avg
        moving_avg = pd.DataFrame(data.iloc[:, 1:].rolling(window=period, min_periods=1).mean())

        # add moving average
        moving_avg_df=pd.concat([data.iloc[0:, 0:1], moving_avg], axis=1)

        moving_avg_df.columns=list(data.columns)
        
    elif option=='single':
        
        # calculate moving avg
        moving_avg = pd.DataFrame(data[col].rolling(window=period, min_periods=1).mean())
        moving_avg.columns=[f'{period}-day moving_avg']

        # get all columns
        data_cols=list(data.columns)

        # get all columns except vlaue
        date_cols=[x for x in data_cols if x !=col]

        # add moving average
        moving_avg_df=pd.concat([data[date_cols], moving_avg], axis=1)

        moving_avg_df.rename(columns={f'{period}-day moving_avg':col}, inplace=True)
        

    return moving_avg_df

# Freight rates function
def freight_rates(start_y, start_m, start_d, rates, unit, freq, plot, title):
    
    # Set current date
    today=datetime.today()
    
    # empty data frame to loop through and store
    final = pd.DataFrame()
    
    # Obtain dates object
    dates=v.FreightPricingTimeseries().search(
        time_min=datetime(start_y, start_m, start_d),
        time_max=today,
        routes=rates[0],
        breakdown_property=unit,
        breakdown_frequency=freq).to_df()
    
    # take just dates column
    dates = pd.concat([dates["key"]], axis = 1)
    final = dates
    final.columns = ['Date']
    
    # Loop through route codes to obtain each route's freight rates
    for i in range(len(rates)):
        df=v.FreightPricingTimeseries().search(
            time_min=datetime(start_y, start_m, start_d),
            time_max=today,
            routes=rates[i],
            breakdown_property=unit,
            breakdown_frequency=freq).to_df()

        df2 = df["value"]
        final = pd.concat([final, df2], axis = 1)

    names = ['Date'] + rates
    final.columns = names
    final['Date']=pd.to_datetime(final['Date'])
    
        
    # Replace blanks with pandas NA values
    final.replace('', pd.NA, inplace=True)
    
    # Remove NAs
    final.dropna(inplace=True)

    # If desired, plot rates
    if plot:
        
        # Plot rates
        fig = px.line(
            final,
            x="Date", 
            y=list(final.columns)[1:],
            title=title,
            labels={
                "Date":"Date",
            },
            )
        fig.update_layout(xaxis_rangeslider_visible = True)
        fig.show()
    
    # Reformat dates for export to excel
    final['Date']=final['Date'].dt.strftime("%d-%m-%Y")

    
    return final

# Vessel avaialbility function
def vessel_availability(start_y, start_m, start_d, region, port, prod, prod_excl, vessel_class, vessel_class_excl, heading_to, heading_to_excl, status, laycan_min, laycan_max, plot, ma_period, title):

    today=datetime.today()
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading vessel availabiity for period: {time_min} to {time_max}")
    
        # Pull avalability time series data
        ts_result = v.VesselAvailabilityTimeseries().search(
            filter_time_min=time_min,
            filter_time_max=time_max,
            filter_region=region,
            filter_port=port,
            filter_products=prod,
            exclude_products=prod_excl,
            filter_vessel_classes=vessel_class,
            exclude_vessel_classes=vessel_class_excl,
            filter_destination=heading_to,
            exclude_destination=heading_to_excl,
            filter_days_to_arrival=[{"min": laycan_min, "max": laycan_max}], # specify laycan window
            filter_vessel_scrubbers="disabled",
            filter_vessel_status=status,
            use_reference_port=True
            ).to_df()

        ts_result=pd.concat([ts_result["key"], ts_result["count"]], axis=1)
        ts_result.columns=["Date", 'Vessel Availability']
        
        result_dfs=pd.concat([result_dfs, ts_result])
        
    if ma_period==None:
        data=result_dfs
        
    else:
        data=moving_average(data=result_dfs, period=ma_period, option='multiple')
        title=title+f' ({ma_period}-day MA)'
    
    if plot: # plot data if desired

        fig = px.line(
            data, # data to plot
            title=title, # title set as input
            x="Date",
            y=list(data.columns)[1:],
            labels={
                "value":'No. of vessels' # unit label
            },
            )
        fig.update_layout(xaxis_rangeslider_visible = True)
        fig.show()
    
    data['Date']=data['Date'].dt.strftime('%d-%m-%Y')
    
    return data


def fr_va_combination(start_y, start_m, start_d, region, port, prod, prod_excl, vessel_class, vessel_class_excl, heading_to, heading_to_excl, status, laycan_min, laycan_max, va_ma_period, rates, unit, plot, title):
    
    
    frs_df=freight_rates(start_y=start_y, start_m=start_m, start_d=start_d,  
                         rates=rates, unit=unit, freq='day', 
                         plot=False, title='freight rates')


    va_df=vessel_availability(start_y=start_y, start_m=start_m, start_d=start_d, 
                              region=region, port=port, 
                              prod=prod, prod_excl=prod_excl, 
                              vessel_class=vessel_class, vessel_class_excl=vessel_class_excl, 
                              heading_to=heading_to, heading_to_excl=heading_to_excl, 
                              status=status, laycan_min=laycan_min, laycan_max=laycan_max, 
                              ma_period=va_ma_period,
                              plot=False, title='availability')
    
    combined_df=pd.merge(frs_df, va_df, on='Date', how='left')
    
    combined_df['Date']=pd.to_datetime(combined_df['Date'], format="%d-%m-%Y", errors='coerce')
    
    # Convert all integer columns to floats
    combined_df[list(combined_df.columns)[1]]=combined_df[list(combined_df.columns)[1]].astype(float)
    combined_df[list(combined_df.columns)[2]]=combined_df[list(combined_df.columns)[2]].astype(float)
    
    if unit=='cost':
        fr_unit='$/ton'
        
    elif unit=='tce':
        fr_unit='$/day'
        
    elif unit=='route':
        fr_unit='WS'

    if plot:
        
        fig = go.Figure()

        # Add first line (LHS y-axis)
        fig.add_trace(
            go.Scatter(
                x=combined_df['Date'],
                y=combined_df[list(combined_df.columns)[1]],
                name=list(combined_df.columns)[1],
                yaxis='y1'
            )
        )

        # Add second trace (RHS y-axis)
        fig.add_trace(
            go.Scatter(
                x=combined_df['Date'],
                y=combined_df[list(combined_df.columns)[2]],
                name=list(combined_df.columns)[2],
                yaxis='y2'
            )
        )

        # Update layout to include secondary y-axis
        fig.update_layout(
            title=title,
            xaxis=dict(
                title='Date'
            ),
            yaxis=dict(
                title=fr_unit
            ),
            yaxis2=dict(
                title='No. of vessels',
                overlaying='y',
                side='right'
            ),
            xaxis_rangeslider_visible=True
        )

        fig.show()
    
#     combined_df['Date']=combined_df['Date'].dt.strftime('%d/%m/%Y')
    
    return combined_df

# Function for getting flows data and splitting
def voyages_time_series_with_split(start_y, start_m, start_d, origin, destination, locs, prod, prod_excl, vessel_class, vessel_class_excl, risk_rating, risk_rating_excl, age_min, age_max, freq, option, operator, title, split, plot, plot_type, show_top_x):
   
    today=datetime.today()
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading {option} for period: {time_min} to {time_max}")

    
        # Original query
        result = v.VoyagesTimeseries().search(
            time_min=time_min,
            time_max=time_max,
            origins=origin,
            destinations=destination,
            locations=locs,
            latest_products=prod,
            latest_products_excluded=prod_excl,
            vessels=vessel_class,
            vessels_excluded=vessel_class_excl,
            vessel_risk_level=risk_rating,
            vessel_risk_level_excluded=risk_rating_excl,
            vessel_age_min=age_min,
            vessel_age_max=age_max,
            breakdown_property=option,
            breakdown_frequency=freq,
            breakdown_split_property=split,
            breakdown_unit_operator=operator,
            ).to_df(columns='all')

        # If you wish to split, process the data as follows
        if split != None:

            # Break the output down into k data frames, all with date, id, label, value and count columns
            # Stack these on top of each other

            breakdown_cols=list(result.columns)[3:]
            cols=['key']+breakdown_cols
            k=int(len(breakdown_cols) / 4)
            result2=result[cols]

            # Empty data frame for stacking
            stack=pd.DataFrame()

            # Loop through each split property
            for i in range(k):

                cols=['key', f'breakdown.{i}.id', f'breakdown.{i}.label', f'breakdown.{i}.value', f'breakdown.{i}.count']

                temp=result2[cols]

                new_cols=['date', 'id', 'label', 'value', 'count']

                temp.columns=new_cols

                stack=pd.concat([stack, temp])

            # Choose relevant columns from the stacked data frame    
            stack2=stack[['date', 'label', 'value']]

            # Remove rows with blank labels
            # These are for regions where a 0 value will show, we deal with this later
            result3=stack2[stack2['label']!='']

            # Sum each split property and rank them to obtain an order for the data to appear in
            result3=result3.copy()
            result3['value'] = pd.to_numeric(result3['value'])
            sum_per_label=result3.groupby('label')['value'].sum().reset_index()
            sum_per_label.sort_values(by='value', ascending=False, inplace=True)
            labels=list(sum_per_label['label'].unique()) # we use this order

            # Sort the result first by split property and then by date
            # This helps us to re-transpose the data later
            result3=result3.sort_values(by=['label', 'date']).copy()

            # Create and sort a dates data frame
            dates_df=pd.DataFrame(result3['date'].unique(), columns=['date'])
            dates_df['date']=pd.to_datetime(dates_df['date'])
            dates_df.sort_values(by='date', ascending=True, inplace=True)

            # Empty data frame to store split properties' corresponding columns
            store_df=pd.DataFrame()

            # First loop through each split property
            for i in range(len(labels)):

                # Empty list to store values
                values=[]

                # Temporary data frame to work with (only for current split property)
                temp_df=result3[result3['label']==labels[i]]

                # Now loop through each date in the temporary data
                for j in range(len(dates_df['date'])):

                    # Obtain record for date in question
                    check=temp_df[temp_df['date']==dates_df['date'][j]]
                    
                    # If no record, add 0.0 as the value for that split property on that date
                    if len(check)==0:
                        values.append(0.0)

                    # If record exists, add its value
                    else:
                        values.append(check['value'].iloc[0])

                # Compile
                values_df=pd.DataFrame(values, columns=[labels[i]])
                store_df=pd.concat([store_df, values_df], axis=1)

            # After looping, add date column
            result5=pd.concat([dates_df, store_df], axis=1)



        # If no split, just select and rename relevant columns
        else:
            result5=result[['key', 'value']]
            result5.columns=['date', 'value']
            
        result_dfs=pd.concat([result_dfs, result5])
        
    # If you wish to only show the top x split properties in the plot, put the rest into 'other'
    if (len(list(result_dfs.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(result_dfs.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(result_dfs.columns)[(show_top_x + 1):]

        # Sum the others
        result_dfs['other']=result_dfs[rest].sum(axis=1) # other column is sum of everything not in top x

        result_dfs2=result_dfs[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        result_dfs2=result_dfs
        
    # Set units for y axis label if you wish to plot
    if option=='vessel_count':
        y_axis_label='No. of vessels'
        
    elif option=='utilisation':
        y_axis_label="No. of vessels"

    elif option=='cargo_quantity':
        y_axis_label="tonne-days"
        
    elif option=='dwt':
        y_axis_label="dwt"

    elif option=='cubic_capacity':
        y_axis_label="cubic meters"

    elif option=='tonne_miles':
        y_axis_label="tonne-miles"
        
    elif option=='avg_speed':
        y_axis_label="knots"
      
    if plot_type=='area':
    
        if plot: # plot data if desired

            fig = px.area(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "_value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    if plot_type=='line':
    
        if plot: # plot data if desired

            fig = px.line(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "_value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    if plot_type=='bar':
    
        if plot: # plot data if desired

            fig = px.bar(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "_value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
        
    # Reformat dates and rename date column
    result_dfs2=result_dfs2.copy()
#     result_dfs2['date']=result_dfs2['date'].dt.strftime('%d-%m-%Y')
    result_dfs2.rename(columns={'date': 'Date'}, inplace=True)
    
    if split==None:
        result_dfs2.rename(columns={'value': title}, inplace=True)
        
    result_dfs2 = result_dfs2.fillna(0)
        

    return result_dfs2

# Function for post ballast distirbution
def post_ballast_distribution(origin, origin_excl, destination, destination_excl, vessel_class, product, product_excl, start_y, start_m, start_d, end_y, end_m, end_d, show_top_x, plot, option, title):

    # set date objects
    start=datetime(start_y, start_m, start_d)
    end=datetime(end_y, end_m, end_d, 23, 59, 59)
    
    # Pull the laden voyages which occurred in the required timeframe
    route = v.VoyagesSearchEnriched().search(
        origins = origin,
        origins_excluded = origin_excl,
        destinations = destination,
        destinations_excluded = destination_excl,
        time_min = start,
        time_max = end,
        vessels = vessel_class,
        products = product,
        products_excluded = product_excl
    )
    
    # Convert to dataframe
    route = pd.DataFrame(route)
    
    # Sort by end_timestamp
    route["end_timestamp"] = pd.to_datetime(route["end_timestamp"])
    route.sort_values(by='end_timestamp', ascending = True, inplace=True)
    
    # Remove null end_timestamps
    route.drop(route[pd.isnull(route['end_timestamp']) == True].index, inplace = True)
    
    # Remove voyages that end past the specified end date
    route = route[(pd.to_datetime(route['end_timestamp']).dt.tz_convert(None) <= pd.to_datetime(end))]
    
    # Remove voyages still in progress (i.e. voyages with no next voyage ID)
    route = route.dropna(subset=['next_voyage_id'])

    # Get the next voyage IDs
    next_voyage_id_list = list(route["next_voyage_id"].unique())
    next_voyage_id_list=[x for x in next_voyage_id_list if x != '']
    
    # Get voyages corresponding to the next voyage IDs
    df = v.VoyagesSearchEnriched().search(
        voyage_id = next_voyage_id_list,
        columns = "all").to_df()

    # Sort them by their start dates (end date of laden voyage/discharge date)
    df["START DATE"] = pd.to_datetime(df["START DATE"])
    df.sort_values(by='START DATE', ascending = True, inplace=True)
    
    # Relabel blank destinations as Undetermined
    df['FINAL DESTINATION SHIPPING REGION']=df['FINAL DESTINATION SHIPPING REGION'].replace([''],'Undetermined')

    # Remove laden results
    df=df.loc[df["VOYAGE STATUS"] == 'Ballast']
    
    df.reset_index(drop=True, inplace=True)
    
    # Store the unique destinations
    dests = list(df["FINAL DESTINATION SHIPPING REGION"].unique())
        
    dest_counts = []
    # Count the number of times each ballast destination is declared
    for i in range(len(dests)):
        g = len(df.loc[df['FINAL DESTINATION SHIPPING REGION'] == dests[i]])
        dest_counts.append(g)

    # convert counts and destinations list to data frames
    dests = pd.DataFrame(dests)
    dest_counts = pd.DataFrame(dest_counts)
    
    # compile unique destinations and their counts
    ranked = pd.concat([dests, dest_counts], axis = 1)
    ranked.columns = ['Destination', 'Count']
    
    # Sort destinations by highest count
    ranked.sort_values(by='Count', ascending = False, inplace=True)
    
    # Get a list of ranked destinations
    dests = list(ranked["Destination"])
    
    # Convert dates of ballast voyages to months and years for counting purposes
    df["months"] = df['START DATE'].dt.strftime('%m-%Y')
    
    # Get a complete list of dates in month/year format
    dates = list(pd.date_range(start=start, end=end, freq='MS').strftime('%m-%Y'))
    dates_df=pd.DataFrame(dates, columns=['Date'])
    
    # Initialise a data frame for dates
    raw_counts_df=dates_df
    
    # Loop through all destinations
    for j in range(len(dests)):
        
        # initialise a list to store counts
        counts2=[]
        
        # loop through dates
        for i in range(len(dates)):
            
            # count destination occurrences for this date
            g = len(df[(df['FINAL DESTINATION SHIPPING REGION'] == dests[j]) & (df['months'] == dates[i])])
            
            # add to list
            counts2.append(g)
        
        # convert counts to data frame and label it with corresponding destination
        counts2_df=pd.DataFrame(counts2, columns=[dests[j]])
        
        # add counts for this destination to data frame
        raw_counts_df=pd.concat([raw_counts_df, counts2_df], axis=1)
    
    # select count values
    raw_count_vals=raw_counts_df[list(raw_counts_df.columns)[1:]]
    
    # convert counts to percentages
    df_props = raw_count_vals.div(raw_count_vals.sum(axis=1), axis=0)
    
    # add dates to proportions
    df_props=pd.concat([dates_df, df_props], axis=1)
    
    # If you wish to only see the top x destinations, put the rest into 'other'
    if (len(list(raw_counts_df.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(raw_counts_df.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(raw_counts_df.columns)[(show_top_x + 1):]

        # Sum the others
        raw_counts_df['other']=raw_counts_df[rest].sum(axis=1) # other column is sum of everything not in top x

        raw_counts_df2=raw_counts_df[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        raw_counts_df2=raw_counts_df
        
        # If you wish to only see the top x destinations, put the rest into 'other'
    if (len(list(df_props.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(df_props.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(df_props.columns)[(show_top_x + 1):]

        # Sum the others
        df_props['other']=df_props[rest].sum(axis=1) # other column is sum of everything not in top x

        df_props2=df_props[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        df_props2=df_props
        
    df_props2=df_props2.copy()
    raw_counts_df2=raw_counts_df2.copy()
    
    df_props2['Date']=pd.to_datetime(df_props2['Date'], format='%m-%Y')
    raw_counts_df2['Date']=pd.to_datetime(raw_counts_df2['Date'], format='%m-%Y')

    if plot:
        
        if option=='counts':
            
            # Plot ballast distribution data (counts)
            fig = px.bar(
                raw_counts_df2, 
                x="Date", 
                y=list(raw_counts_df2.columns)[1:],
                labels={
                    "Date":"Date",
                    "value":"Number of voyages"
                }, 
                title=title
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
        if option=='proportions':
            
            # Plot ballast distribution data (proportions)
            fig = px.area(
                df_props2, 
                x="Date", 
                y=list(df_props2.columns)[1:],
                labels={
                    "Date":"Date",
                    "value":"Proportion of voyages"
                }, 
                title=title
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
            
    raw_counts_df2['Date']=raw_counts_df2['Date'].dt.strftime('%b-%Y')
    df_props2['Date']=df_props2['Date'].dt.strftime('%b-%Y')

    
    return raw_counts_df2, df_props2, df

# Function for fleet distribution per location
def fleet_distribution_per_location(start_y, start_m, start_d, locs_ids, prod, prod_excl, vessel_class, vessel_class_excl, risk_rating, risk_rating_excl, age_min, age_max, freq, title, ma_period, plot):

    geogs_df=v.Geographies().search().to_df(columns='all')
    
    selected_geogs_df=geogs_df[geogs_df['id'].isin(locs_ids)]
    selected_geogs_df.reset_index(drop=True, inplace=True)
    
    data=pd.DataFrame()
    
    for i in range(len(selected_geogs_df)):
    
        temp_dist=voyages_time_series_with_split(start_y=start_y, start_m=start_m, start_d=start_d, 
                                                 origin=None, destination=None, locs=selected_geogs_df['id'].iloc[i], 
                                                 prod=prod, prod_excl=prod_excl, 
                                                 vessel_class=vessel_class, vessel_class_excl=vessel_class_excl, 
                                                 risk_rating=risk_rating, risk_rating_excl=risk_rating_excl, 
                                                 age_min=age_min, age_max=age_max,
                                                 freq=freq, option='vessel_count', operator='sum', 
                                                 title=selected_geogs_df['name'].iloc[i], split=None, 
                                                 plot=False, plot_type='line', show_top_x=1000)
        
        if i == 0:
            data=pd.concat([data, temp_dist], axis=1)
            
        else:
            data=pd.concat([data, temp_dist.iloc[:, 1:]], axis=1)
            
    
    if ma_period != None:
        data2=moving_average(data=data, period=ma_period, option='multiple')
        title=title+f' ({ma_period}-{freq} MA)'
        
    else:
        data2=data
        
    if plot: # plot data if desired

        fig = px.line(
            data2, # data to plot
            title=title, # title set as input
            x="Date",
            y=list(data2.columns)[1:],
            labels={
                "date":"Date",
                "value":'No. of vessels' # unit label
            }
            )
        fig.update_layout(xaxis_rangeslider_visible = True)
        fig.show()
        
    return data2

# Function for getting freight data
def voyages_time_series(start_y, start_m, start_d, origin, origin_excl, destination, destination_excl, prod, prod_excl, vessel_class, vessel_class_excl, status, freq, unit, operator):
   
    today=datetime.today()
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading freight data for period: {time_min} to {time_max}")

    
        # Original query
        result = v.VoyagesTimeseries().search(
            time_min=time_min,
            time_max=time_max,
            origins=origin,
            origins_excluded=origin_excl,
            destinations=destination,
            destinations_excluded=destination_excl,
            latest_products=prod,
            latest_products_excluded=prod_excl,
            vessels=vessel_class,
            vessels_excluded=vessel_class_excl,
            voyage_status=status,
            breakdown_frequency=freq,
            breakdown_property=unit,
            breakdown_unit_operator=operator
            ).to_df(columns='all')

        result2=result[['key', 'value']]
        result2.columns=['date', 'value']
            
        result_dfs=pd.concat([result_dfs, result2])
        
    # Reformat dates and rename date column
    result_dfs=result_dfs.copy()
    result_dfs['date'] = pd.to_datetime(result_dfs['date'])
    result_dfs['string_date']=result_dfs['date'].dt.strftime('%d-%m-%Y')
    result_dfs['dd_mmm']=result_dfs['date'].dt.strftime('%d-%b')
    result_dfs['month']=result_dfs['date'].dt.strftime('%b')
    result_dfs['week_end_timestamp'] = result_dfs['date'] + pd.offsets.Week(weekday=6)  
    result_dfs['week_number'] = result_dfs['date'].dt.isocalendar().week
    result_dfs['year']=round(pd.to_numeric(result_dfs['date'].dt.strftime('%Y')), 0)
    result_dfs = result_dfs.fillna(0)
    
    result_dfs=result_dfs[['date', 'week_end_timestamp', 'string_date', 'dd_mmm', 'week_number', 'month', 'year', 'value']]
        
    result_dfs.reset_index(drop=True, inplace=True)

    return result_dfs


# function for obtaining seasonal chart data
def seasonal_charts(data, freq, start_y):
    
    # Remove leap days for daily time series
    df=data[data['dd_mmm']!='29-Feb']
    df.reset_index(drop=True, inplace=True)
    
    # Set constants
    current_date=datetime.today()
    this_year=current_date.year
    last_year=this_year-1
    stats_end_y=last_year
    stats_start_y=start_y
    
    # Define stats calculating data set and current year dataset
    stats_df=df[(df['year'] >= stats_start_y) & (df['year'] <= stats_end_y)]
    this_year_df=df[df['year']==this_year]
    
    # if frequency is daily, calculate stats on a daily basis
    if freq=='day':

        # date range creation - use a non-leap year
        start_date = datetime(2023, 1, 1)
        end_date = datetime(2023, 12, 31)

        date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='1D'), columns=['Date'])
        date_range['Date']=date_range['Date'].dt.strftime('%d-%b')
        
        # empty lists to store stats
        mins=[]
        maxs=[]
        avgs=[]
        this_year_vals=[]
        
        # loop through dates and calculate stats
        for i in range(len(date_range)):

            temp=stats_df[stats_df['dd_mmm']==date_range['Date'][i]]
            
            mn=min(temp['value'])
            mx=max(temp['value'])
            av=temp['value'].mean()

            mins.append(mn)
            maxs.append(mx)
            avgs.append(av)

        # obtain last year's values
        last_year_df=pd.DataFrame(stats_df[stats_df['year']==last_year]['value'])
        last_year_df.columns=['Last year']
        last_year_df.reset_index(drop=True, inplace=True)
        
        # loop through dates and obtain current year values, if no data yet, add a blank
        for i in range(len(date_range)):

            temp=this_year_df[this_year_df['dd_mmm']==date_range['Date'][i]]

            if (len(temp)!=0):

                add=temp['value'].iloc[0]

                this_year_vals.append(add)

            elif (len(temp)==0):

                this_year_vals.append('')

        
        # convert stats to data frames
        mins_df=pd.DataFrame(mins, columns=['Min.'])
        maxs_df=pd.DataFrame(maxs, columns=['Max.'])
        avgs_df=pd.DataFrame(avgs, columns=[f'Average {stats_start_y}-{stats_end_y}'])
        this_year_vals_df=pd.DataFrame(this_year_vals, columns=['Current year'])
        
        # compile data
        seasonal_df=pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
        
        # calculate range
        seasonal_df[f'Range {stats_start_y}-{stats_end_y}']=seasonal_df['Max.']-seasonal_df['Min.']
        
        # compile in desired order
        seasonal_df=seasonal_df[['Date', 'Min.', f'Range {stats_start_y}-{stats_end_y}', f'Average {stats_start_y}-{stats_end_y}', 'Last year', 'Current year']]

    
    # if frequency is monthly, calculate stas on a monthly basis
    elif freq=='month':

        # date range creation
        start_date = datetime(2023, 1, 1)
        end_date = datetime(2023, 12, 31)

        date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='1M'), columns=['Date'])
        date_range['Date']=date_range['Date'].dt.strftime('%b')
        
        # empty lists to store various stats
        mins=[]
        maxs=[]
        avgs=[]
        this_year_vals=[]
        
        # loop through dates and calculate stats
        for i in range(len(date_range)):

            temp=stats_df[stats_df['month']==date_range['Date'][i]]

            mn=min(temp['value'])
            mx=max(temp['value'])
            av=temp['value'].mean()

            mins.append(mn)
            maxs.append(mx)
            avgs.append(av)

        # obtain previous year's values
        last_year_df=pd.DataFrame(stats_df[stats_df['year']==last_year]['value'])
        last_year_df.columns=['Last year']
        last_year_df.reset_index(drop=True, inplace=True)
        
        # loop through dates and obtain current year values, if not data yet, add a blank
        for i in range(len(date_range)):

            temp=this_year_df[this_year_df['month']==date_range['Date'][i]]

            if (len(temp)!=0):

                add=temp['value'].iloc[0]

                this_year_vals.append(add)

            elif (len(temp)==0):

                this_year_vals.append('')

        # convert stats lists to data frames
        mins_df=pd.DataFrame(mins, columns=['Min.'])
        maxs_df=pd.DataFrame(maxs, columns=['Max.'])
        avgs_df=pd.DataFrame(avgs, columns=[f'Average {stats_start_y}-{stats_end_y}'])
        this_year_vals_df=pd.DataFrame(this_year_vals, columns=['Current year'])
        
        # compile data
        seasonal_df=pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
        
        # calculate the range 
        seasonal_df[f'Range {stats_start_y}-{stats_end_y}']=seasonal_df['Max.']-seasonal_df['Min.']
        
        # compile in desired order
        seasonal_df=seasonal_df[['Date', 'Min.', f'Range {stats_start_y}-{stats_end_y}', f'Average {stats_start_y}-{stats_end_y}', 'Last year', 'Current year']]
        
        
    return seasonal_df


# Function to plot seasonal chart
def plot_seasonal(y_min, y_max, data, title):
    df=data

    colors = {
        'Min.': 'white',              
        list(df.columns)[2]: 'lightblue', 
        list(df.columns)[3]: 'blue', 
        'Last year': 'yellow',       
        'Current year': 'red'     
    }

    fig = px.area(df, x='Date', y=list(df.columns)[1:3], title=title, color_discrete_map=colors)

    # Add line charts for Average, Last year, and Current year
    for column in list(df.columns)[3:6]:
        fig.add_scatter(x=df['Date'], y=df[column], mode='lines', name=column, line=dict(color=colors[column]))

    # Set the y-axis range
    fig.update_yaxes(range=[y_min, y_max])

    # Show the plot
    fig.show()
    
    
# Function to plot and extract seasonal voyages data
def complete_seasonal_voyages(start_y, start_m, start_d, origin, origin_excl, destination, destination_excl, prod, prod_excl, vessel_class, vessel_class_excl, status, freq, unit, operator, ma_period, plot, title, y_min, y_max):

    # Query voyages data
    daily_voyages_ts=voyages_time_series(start_y=start_y, start_m=start_m, start_d=start_d, 
                                         prod=prod, prod_excl=prod_excl, 
                                         vessel_class=vessel_class, vessel_class_excl=vessel_class_excl, 
                                         status=status,
                                         freq=freq, unit=unit, operator=operator, 
                                         origin=origin, origin_excl=origin_excl,
                                         destination=destination, destination_excl=destination_excl)


    if ma_period==None:
        data=seasonal_charts(data=daily_voyages_ts, freq=freq, start_y=start_y)

    else:
        # Calculate moving averages
        voyages_ts_x_day_ma=moving_average(data=daily_voyages_ts, period=ma_period, option='single', col='value')
        data=seasonal_charts(data=voyages_ts_x_day_ma, freq=freq, start_y=start_y)
        title=title + f' ({ma_period}-{freq} MA)'

    if plot:
        plot_seasonal(y_min=y_min, y_max=y_max, 
                      data=data, 
                      title=title)
        
    return data
            
# Helper function for renaming columns
def select_rename(df,status):
    df.index = df.key
    df = df[['count']]
    df = df.rename(columns = {'count':status})
    return df 

# Function for obtaining tanker activity statuses
def prop_per_status(start_y, start_m, start_d, vc, prod, prod_excl, plot_laden, plot_ballast, export, option):
    
    # BALLAST STATIONARY
    df1 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        latest_products = prod,
        latest_products_excluded=prod_excl,
        breakdown_property = 'vessel_count', ## tonne-miles or etc
        ## breakdown_split_property = 'vessel_status',
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        time_max = datetime.today(),
        movement_status = ['congestion', 'waiting', 'stationary'], ## 'moving'
        voyage_status = 'ballast',
        location_status = ['anchorage-zone', 'berth'],
        ).to_df(columns = 'all')

    df1 = select_rename(df1,'Waiting to load')
    
    df2 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        latest_products = prod,
        latest_products_excluded=prod_excl, 
        breakdown_property = 'vessel_count', ## tonne-miles or etc
        ## breakdown_split_property = 'vessel_status',
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        time_max = datetime.today(),
        movement_status = ['congestion', 'waiting', 'stationary'], ## 'moving'
        voyage_status = 'ballast',
        location_status = 'dry-dock', ##['anchorage-zone', 'berth'],
        ).to_df(columns = 'all')

    df2 = select_rename(df2,'In shipyard')
    
    df3 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        latest_products = prod,
        latest_products_excluded=prod_excl,        
        breakdown_property = 'vessel_count', ## tonne-miles or etc
        ## breakdown_split_property = 'vessel_status',
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        time_max = datetime.today(),
        movement_status = ['congestion', 'waiting', 'stationary'], ## 'moving'
        voyage_status = 'ballast',
        location_status = 'on-the-sea', ##'dry-dock', ##['anchorage-zone', 'berth'],
        ).to_df(columns = 'all')
    df3 = select_rename(df3,'Waiting on the sea')
    
    ballast_stationary = pd.concat([df1,df2,df3],axis = 1)
    
    # LADEN STATIONARY
    
    df5 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        breakdown_property = 'vessel_count', 
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        time_max = datetime.today(),
        latest_products = prod,
        latest_products_excluded=prod_excl,
        voyage_status = 'laden',
        location_status = ['anchorage-zone', 'berth'],
        movement_status = ['moving','stationary','congestion','waiting'], 
        cargo_status = 'loading'
        ).to_df(columns = 'all')

    df5 = select_rename(df5,'Loading')
    
    df6 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        breakdown_property = 'vessel_count', 
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        latest_products = prod,
        latest_products_excluded=prod_excl,
        time_max = datetime.today(),
        voyage_status = 'laden',
        location_status = ['anchorage-zone', 'berth'],
        movement_status = ['moving','stationary','congestion','waiting'],
        cargo_status = 'discharging'
        ).to_df(columns = 'all')

    df6 = select_rename(df6,'Discharging')
    

    df7 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        breakdown_property = 'vessel_count', 
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        latest_products = prod,
        latest_products_excluded=prod_excl,
        time_max = datetime.today(),
        voyage_status = 'laden',
        location_status = ['anchorage-zone', 'berth'],
        movement_status = ['moving','stationary','congestion','waiting'],
        cargo_status = 'in-transit'
        ).to_df(columns = 'all')

    df7 = select_rename(df7,'Waiting to discharge')

    df8 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        breakdown_property = 'vessel_count', 
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        latest_products = prod,
        latest_products_excluded=prod_excl,
        time_max = datetime.today(),
        voyage_status = 'laden',
        location_status = ['anchorage-zone', 'berth','on-the-sea'],
        movement_status = ['moving','stationary','congestion','waiting'],
        cargo_status = 'floating-storage'
        ).to_df(columns = 'all')

    df8 = select_rename(df8,'Floating storage')
    
    laden_stationary = pd.concat([df5,df6,df7,df8],axis = 1)

    # BALLAST SAILING
    df10 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        latest_products = prod,
        latest_products_excluded=prod_excl,
        breakdown_property = 'vessel_count', ## tonne-miles or etc
        ## breakdown_split_property = 'vessel_status',
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        time_max = datetime.today(),
        movement_status = ['moving'], ## 'moving'
        voyage_status = 'ballast',
        #location_status = ['on-the-sea', 'dry-dock', 'anchorage-zone', 'berth'],
        ).to_df(columns = 'all')

    ballast_sailing = select_rename(df10,'ballast sailing')
    
    # LADEN SAILING
    df11_1 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        latest_products = prod,
        latest_products_excluded=prod_excl,
        breakdown_property = 'vessel_count',
        time_max = datetime.today(),
        movement_status = ['moving','stationary','congestion','waiting'],
        voyage_status = 'laden',
        location_status = ['on-the-sea'], 
        cargo_status = ['loading','discharging','in-transit']
        ).to_df(columns = 'all')
    laden_sailing1 = select_rename(df11_1,'laden sailing 1')

    df11_2 = v.VoyagesTimeseries().search(
        breakdown_frequency = 'day',
        time_min = datetime(start_y, start_m, start_d),
        vessels = vc,
        latest_products = prod,
        latest_products_excluded=prod_excl,
        breakdown_property = 'vessel_count',
        time_max = datetime.today(),
        movement_status = ['moving','stationary','congestion','waiting'],
        voyage_status = 'laden',
        location_status_excluded = ['dry-dock','berth','anchorage-zone','on-the-sea'], 
        #cargo_status = 'loading'
        ).to_df(columns = 'all')
    laden_sailing2 = select_rename(df11_2,'laden sailing 2')
    
    laden_sailing = pd.concat([laden_sailing1,laden_sailing2],axis = 1)
    laden_sailing['laden sailing'] = np.sum(laden_sailing,axis = 1)
    laden_sailing = laden_sailing[['laden sailing']]
    
    laden_df = pd.concat([laden_sailing,laden_stationary], axis = 1)
    ballast_df = pd.concat([ballast_sailing,ballast_stationary], axis = 1)



    all_df = pd.concat([laden_sailing,laden_stationary,ballast_sailing,ballast_stationary],axis = 1)
    
    # PLOTTING
    if plot_laden:
        
        if option=='percent':
            
            data=laden_df.div(laden_df.sum(axis=1), axis=0) * 100
            data.reset_index(inplace=True)
            
            fig = px.area(
                data, 
                x="key", 
                y=list(data.columns)[1:],
                labels={
                    "key":"Date",
                    "value":"Proportion of fleet (%)"
                }, 
                title='Laden fleet activity (%)'
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
        elif option=='count':
            
            data=laden_df
            data.reset_index(inplace=True)
            
            fig = px.area(
                data, 
                x="key", 
                y=list(data.columns)[1:],
                labels={
                    "key":"Date",
                    "value":"No. of vessels"
                }, 
                title='Laden fleet activity (count)'
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
        
        
    if plot_ballast:
        
        if option=='percent':
            
            data=ballast_df.div(ballast_df.sum(axis=1), axis=0) * 100
            data.reset_index(inplace=True)
            
            fig = px.area(
                data, 
                x="key", 
                y=list(data.columns)[1:],
                labels={
                    "key":"Date",
                    "value":"Proportion of fleet (%)"
                }, 
                title='Ballast fleet activity (%)'
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
        elif option=='count':
            
            data=ballast_df
            data.reset_index(inplace=True)
            
            fig = px.area(
                data, 
                x="key", 
                y=list(data.columns)[1:],
                labels={
                    "key":"Date",
                    "value":"No. of vessels"
                }, 
                title='Ballast fleet activity (count)'
            )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    laden_df.index = laden_df.index.tz_localize(None)
    ballast_df.index = ballast_df.index.tz_localize(None)

    
    if export:
    
        with pd.ExcelWriter('Fleet activity.xlsx', engine='xlsxwriter') as writer:
            # Write each DataFrame to a different sheet
            laden_df.to_excel(writer, sheet_name='laden')
            ballast_df.to_excel(writer, sheet_name='ballast')
        
    return laden_df, ballast_df
        
    


# Example 1: Fleet distribution per location
Track changes in tanker numbers in a custom set of Vortexa locations. In this example we monitor the supply of MR2 tankers in each basin (Atlantic and Pacific). This can indicate where supply side pressure might come into play for freight rates in various regions, as well as highlight changes in tanker migration patterns.

In [None]:
dist_per_loc_example=fleet_distribution_per_location(start_y=2018, start_m=1, start_d=1, 
                                                  locs_ids=basins, prod=cpp, prod_excl=lpg, 
                                                  vessel_class='oil_mr2', vessel_class_excl=None,
                                                  risk_rating=None, risk_rating_excl=['sanctioned', 'high'],
                                                  age_min=None, age_max=20,
                                                  freq='day', ma_period=7, plot=True, 
                                                  title='MR2 distribution per basin')



# Example 2: Post-ballast distribution
Using our voyages dataset and our powerful destination model, we can use this function to track tanker behaviour after discharging on a specified laden route. Here, we look at where MR2s head after they discharge on the NWE-to-USAC (TC2) trade route. This can help anticipate future tanker supply in a region as well as show where commodity demand might increase.

**Note:** *try changing the 'option' argument to 'proportions' to see the % of the trading fleet ballasting to a region, rather than the count of vessels.*

In [None]:
pb_dist_example=post_ballast_distribution(origin=nwe, origin_excl=None, 
                                       destination=usac, destination_excl=None, 
                                       vessel_class='oil_mr2', 
                                       product=cpp, product_excl=lpg, 
                                       start_y=2021, start_m=1, start_d=1, 
                                       end_y=2024, end_m=7, end_d=30, 
                                       show_top_x=3, plot=True, option='counts', 
                                       title='MR2 NWE-to-USAC (TC2) post-ballast distribution')




# Example 3: Vessel availability & Freight Pricing
Use this function to plot Vortexa's vessel availability data in a specified region for a particular vessel class against selected freight rates. The right combination of vessel availability laycan and moving averages can be used as a leading indicator for freight rates.

In [8]:
va_vs_freight_rates=fr_va_combination(start_y=2023, start_m=12, start_d=1, 
                                      region=nwe, port=None, 
                                      prod=cpp, prod_excl=lpg, 
                                      vessel_class='oil_mr2', vessel_class_excl=None, 
                                      heading_to=None, heading_to_excl=None, 
                                      status=None, laycan_min=7, laycan_max=20, va_ma_period=7,
                                      rates=['TC2_37'], unit='cost', plot=True, 
                                      title='TC2 freight rate vs MR2 availability in the NWE')





Downloading vessel availabiity for period: 2023-12-01 00:00:00 to 2024-08-01 13:54:04.605311


In [9]:
va_vs_freight_rates

Unnamed: 0,Date,TC2_37,Vessel Availability
0,2023-12-01,36.330002,437.000000
1,2023-12-04,33.669998,446.500000
2,2023-12-05,33.369999,444.200000
3,2023-12-06,33.500000,445.500000
4,2023-12-07,33.320000,445.571429
...,...,...,...
159,2024-07-25,31.620001,182.142857
160,2024-07-26,31.820000,123.000000
161,2024-07-29,32.220001,0.000000
162,2024-07-30,33.500000,60.714286


# Example 4: Seasonal freight metrics
Some data - when viewed as a plain time series - can be difficult to anlayse by eye. The below examples help to contextualise freight data, comparing current values to previous year values as well as the average, minimum and maximum over several years.

### Seasonal ballast speeds of MR2s heading to the Gulf of Mexico

In [None]:
seasonal_ballast_speed_example=complete_seasonal_voyages(start_y=2018, start_m=1, start_d=1, 
                                                      origin=None, origin_excl=None, 
                                                      destination=gom, destination_excl=None, 
                                                      prod=cpp, prod_excl=lpg, 
                                                      vessel_class='oil_mr2', vessel_class_excl=None, 
                                                      status='ballast', freq='day', unit='avg_speed', 
                                                      operator='avg', ma_period=14, plot=True, 
                                                      title='Seasonal MR2 ballast speeds towards Gulf of Mexico', 
                                                      y_min=10, y_max=12.5)





### Comparing seasonal utilisation of various tanker classes out of a region

VLCCs out of the Middle Eastern Gulf

In [None]:
seasonal_utilisation_vlcc=complete_seasonal_voyages(start_y=2018, start_m=1, start_d=1, 
                                                    origin=meg, origin_excl=None, 
                                                    destination=None, destination_excl=None, 
                                                    prod=crude, prod_excl=None, 
                                                    vessel_class='oil_vlcc', vessel_class_excl=None, 
                                                    status='laden', freq='day', unit='utilisation', 
                                                    operator='avg', ma_period=7, plot=True, 
                                                    title='Seasonal MEG VLCC utlisation', 
                                                    y_min=150, y_max=300)

Suezmaxes out of the Middle Eastern Gulf

In [None]:
seasonal_utilisation_suezmax=complete_seasonal_voyages(start_y=2018, start_m=1, start_d=1, 
                                                    origin=meg, origin_excl=None, 
                                                    destination=None, destination_excl=None, 
                                                    prod=crude, prod_excl=None, 
                                                    vessel_class='oil_suezmax', vessel_class_excl=None, 
                                                    status='laden', freq='day', unit='utilisation', 
                                                    operator='avg', ma_period=7, plot=True, 
                                                    title='Seasonal Global Suezmax utlisation', 
                                                    y_min=30, y_max=80)

# Example 5: Fleet per operational status
Sometimes, the status of tankers in a fleet can give insight into whether they are likely to participate in the market. Here, we look at the statuses of laden and ballast LR1 tankers. Of particular interest might be the % of the fleet which is in a shipyard. These tankers are likely to be out of the market, and can be subtracted from total fleet supply to give an indication of the actual trading fleet, more accurately reflecting tanker supply.

**Note:** *try changing the 'option' argument to 'count' to see the number of vessel per status instead of the proportion of tankers in each status.*

In [None]:
fleet_activity_example=prop_per_status(start_y=2023, start_m=1, start_d=1, 
                                    vc='oil_lr1', prod=cpp, prod_excl=lpg, 
                                    plot_ballast=True, plot_laden=True,
                                    export=False, option='percent')