# Import libraries

In [7]:
import vortexasdk as v
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import dateutil.relativedelta
import time
import plotly.express as px

# Search Product IDs

In [8]:
# search for product ids (remove hashtags below to search)

# product_search = v.Products().search(term=['Dirty Petr']).to_df()
# print (product_search.to_string(index=False))

# Store product ids

lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'
cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'
dpp='5de0b00094e0fd7542c10f9f8a71b4008d55750f21dc905cda9b0f7f5f76bc08'
crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'

russian_grades=['c8803c073c2980d9849784c16bcd41d7f7b7561d2c8c1799a16166527b120242', 
                '04c4ea46fc6fccfe8388717f399b40e5e348f9a98c451d99e7366caa998191a7', 
                'a6c0a31f361b480bddfb93b1b93f38f4989013ae54b2b147ab18116fdfbebdb7',
                '7cab5376bbb441feabdafc7b9f6272c2f9cbd10deb604bed8fa7f9986b2a39a9',
                '20c87d7477f8cfe103904d9af911668c9a69bd03690e74e52e4386ad34e054ee', 
                'fef83ff1194c77d969693ee17c08e1112878e184e3449400e5904bc47b0119fe',
                '0fff12441aa3bf28aa568134ef527d09f741c543a49c0db8c03a195a0bbc481e', 
                '4ff810b966104a658d22e7155596c11bedcd1e3e81594e6187ab88ec816d95d6', 
                '4aebc156649b4c9691cdf3a9c81758426d10ffc944ba441f907f9fbe0964e879',
                '0c703e0e14d99572c62f15a26fdf2c9f0c00e111e625cd40b8b337154e81d46d',
                '1dfa78c965d56ab9d7b953e424d9160e9f79d0275e460c74994193a328f51537', 
                '054b79baa9a54171be2582c9b4f8fa82d0386d41c1914700a498dc5b95e8daba', 
                '74a8bc823d4742318d38072f81068fac1ac02ac66e0d491ea4366007bb95c602']

# Search Geography IDs

In [10]:
# search for geography ids (remove hashtags to search)

# full_length_df = v.Geographies().search(term=["Russia"]).to_df()
# print(full_length_df.to_string(index=False))

# Store geography ids

russia='b996521be9c996db3560ca234a56286ac38f798d34be229437555fab4f12a6a5'

# Functions

In [33]:
# Helper function to make time blocks of 4 years from a specified start date
def get_search_blocks(start_y, start_m, start_d, today):
    
    """
    Vortexa's API maximum search is 4 years and starts in 2016. 
    This function creates a list of tuples splitting up start_date - present into 4-year blocks.
    """
    
    blocks=[]
    
    start=datetime(start_y, start_m, start_d)
    end=start + dateutil.relativedelta.relativedelta(years=4) - dateutil.relativedelta.relativedelta(seconds=1)
    
    if end > today:
        blocks.append((start, today))
        
    else:
        blocks.append((start, end))
    
    while end < today:
        start+=dateutil.relativedelta.relativedelta(years=4) 
        end+=dateutil.relativedelta.relativedelta(years=4)
        
        if end > today:
            blocks.append((start, today))
            
        else:  
            blocks.append((start, end))
    
        
    return blocks


# Function for getting flows data and splitting
def voyages_time_series_with_split_v2(start_y, start_m, start_d, origin, destination, locs, prod, prod_excl, vessel_class, vessel_class_excl, status, freq, option, title, split, plot, plot_type, show_top_x, timestamp='active'):
   
    today=datetime.today()
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading {option} for period: {time_min} to {time_max}")

    
        # Original query
        result = v.VoyagesTimeseriesV2(
            metric=option,
            breakdown_property=split
        ).search(
            time_min=time_min,
            time_max=time_max,
            origins=origin,
            destinations=destination,
            locations=locs,
            latest_products=prod,
            latest_products_excluded=prod_excl,
            vessels=vessel_class,
            vessels_excluded=vessel_class_excl,
            breakdown_frequency=freq,
            voyage_date_range_activity=timestamp,
            voyage_status=status
        ).to_df(columns='all')

        # If you wish to split, process the data as follows
        if split != None:

            # Break the output down into k data frames, all with date, id, label, value and count columns
            # Stack these on top of each other

            breakdown_cols=list(result.columns)[3:]
            cols=['key']+breakdown_cols
            k=int(len(breakdown_cols) / 4)
            result2=result[cols]

            # Empty data frame for stacking
            stack=pd.DataFrame()

            # Loop through each split property
            for i in range(k):

                cols=['key', f'breakdown.{i}.id', f'breakdown.{i}.label', f'breakdown.{i}.value', f'breakdown.{i}.count']

                temp=result2[cols]

                new_cols=['date', 'id', 'label', 'value', 'count']

                temp.columns=new_cols

                stack=pd.concat([stack, temp])

            # Choose relevant columns from the stacked data frame    
            stack2=stack[['date', 'label', 'value']]

            # Remove rows with blank labels
            # These are for regions where a 0 value will show, we deal with this later
            result3=stack2[stack2['label']!='']

            # Sum each split property and rank them to obtain an order for the data to appear in
            result3=result3.copy()
            result3['value'] = pd.to_numeric(result3['value'])
            sum_per_label=result3.groupby('label')['value'].sum().reset_index()
            sum_per_label.sort_values(by='value', ascending=False, inplace=True)
            labels=list(sum_per_label['label'].unique()) # we use this order

            # Sort the result first by split property and then by date
            # This helps us to re-transpose the data later
            result3=result3.sort_values(by=['label', 'date']).copy()

            # Create and sort a dates data frame
            dates_df=pd.DataFrame(result3['date'].unique(), columns=['date'])
            dates_df['date']=pd.to_datetime(dates_df['date'])
            dates_df.sort_values(by='date', ascending=True, inplace=True)

            # Empty data frame to store split properties' corresponding columns
            store_df=pd.DataFrame()

            # First loop through each split property
            for i in range(len(labels)):

                # Empty list to store values
                values=[]

                # Temporary data frame to work with (only for current split property)
                temp_df=result3[result3['label']==labels[i]]

                # Now loop through each date in the temporary data
                for j in range(len(dates_df['date'])):

                    # Obtain record for date in question
                    check=temp_df[temp_df['date']==dates_df['date'][j]]
                    
                    # If no record, add 0.0 as the value for that split property on that date
                    if len(check)==0:
                        values.append(0.0)

                    # If record exists, add its value
                    else:
                        values.append(check['value'].iloc[0])

                # Compile
                values_df=pd.DataFrame(values, columns=[labels[i]])
                store_df=pd.concat([store_df, values_df], axis=1)

            # After looping, add date column
            result5=pd.concat([dates_df, store_df], axis=1)



        # If no split, just select and rename relevant columns
        else:
            result5=result[['key', 'value']]
            result5.columns=['date', 'value']
            
        result_dfs=pd.concat([result_dfs, result5])
        
    # If you wish to only show the top x split properties in the plot, put the rest into 'other'
    if (len(list(result_dfs.columns))>(show_top_x + 1)): # if more than x breakdown labels, create another column - can change if required

        # Store first x columns
        first_x=list(result_dfs.columns)[:(show_top_x + 1)]

        # Store the others
        rest=list(result_dfs.columns)[(show_top_x + 1):]

        # Sum the others
        result_dfs['other']=result_dfs[rest].sum(axis=1) # other column is sum of everything not in top x

        result_dfs2=result_dfs[first_x + ['other']] # compile

    # If you want all split properties to show, set show_top_x to a large number and no 'other' category will be made
    else:
        result_dfs2=result_dfs
        
    # Set units for y axis label if you wish to plot
    if option=='voyage-count':
        y_axis_label='No. of voyages'
        
    elif option=='tonne-miles-sum':
        y_axis_label="Tonne-miles"

    elif option=='distance-avg':
        y_axis_label="Mileage (nmi)"

    elif option=='distance-voyage-avg':
        y_axis_label="Mileage (nmi)"
        
      
    if plot_type=='area':
    
        if plot: # plot data if desired

            fig = px.area(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    if plot_type=='line':
    
        if plot: # plot data if desired

            fig = px.line(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
            
    if plot_type=='bar':
    
        if plot: # plot data if desired

            fig = px.bar(
                result_dfs2, # data to plot
                title=title, # title set as input
                x="date",
                y=list(result_dfs2.columns)[1:],
                labels={
                    "date":"Date",
                    "value":y_axis_label # unit label
                },
                )
            fig.update_layout(xaxis_rangeslider_visible = True)
            fig.show()
        
    # Reformat dates and rename date column
    result_dfs2=result_dfs2.copy()
    result_dfs2['date']=result_dfs2['date'].dt.strftime('%d-%m-%Y')
    result_dfs2.rename(columns={'date': 'Date'}, inplace=True)
    
    if split==None:
        result_dfs2.rename(columns={'value': title}, inplace=True)
        
    result_dfs2 = result_dfs2.fillna(0)
        

    return result_dfs2



## Example 1 - Russian crude mileage

In [36]:
example1=voyages_time_series_with_split_v2(start_y=2021, start_m=1, start_d=1, 
                                           origin=None, destination=None, 
                                           locs=None, timestamp='arrivals',
                                           prod=russian_grades, prod_excl=None, status='laden',
                                           vessel_class=None, vessel_class_excl=None, 
                                           freq='month', option='distance-voyage-avg',
                                           title='Russian crude average voyage mileage', 
                                           split='status', plot=True, 
                                           plot_type='bar', show_top_x=10)





Downloading distance-voyage-avg for period: 2021-01-01 00:00:00 to 2024-09-17 11:20:15.779014


## Example 2 - Russian CPP mileage

In [37]:
example2=voyages_time_series_with_split_v2(start_y=2021, start_m=1, start_d=1, 
                                           origin=russia, destination=None, 
                                           locs=None, timestamp='arrivals',
                                           prod=cpp, prod_excl=lpg, status='laden',
                                           vessel_class=None, vessel_class_excl=None, 
                                           freq='month', option='distance-voyage-avg',
                                           title='Russian CPP average voyage mileage', 
                                           split='status', plot=True, 
                                           plot_type='bar', show_top_x=10)

Downloading distance-voyage-avg for period: 2021-01-01 00:00:00 to 2024-09-17 11:21:03.415340
