## Import libraries
The first step is to import the libraries required to query the data in question. To do this, run the below cell.

In [1]:
from vortexasdk import CargoMovements, CargoTimeSeries, OnshoreInventoriesTimeseries, Geographies, Products, StorageTerminals
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import time
import plotly.express as px
import dateutil.relativedelta

## Search for IDs
In the Vortexa SDK, we cannot refer to products, vessels or geographies by name. Each product, vessel or geography in our data has its own unique ID. These IDs are used to refer to the products, vessels or geographies in our database when making queries.

Once you find the ID, you can copy it and assign it as an object. This way, you can refer to the product or geography by the name you have given to it. Some examples have been done below.

In [2]:
# search for geography ids (remove hashtags to search)

# full_length_df = Geographies().search(term=["Wider"]).to_df()
# print(full_length_df.to_string(index=False))

# full_length_df = StorageTerminals().search(term=["Cushing"]).to_df()
# print(full_length_df.to_string(index=False))

# Store geography ids

us_gulf='e0d68b7a4ac37c97e3387471644d8b5c2a4be16a50092676ec3bec08408a2ebb'
united_states='2d92cc08f22524dba59f6a7e340f132a9da0ce9573cca968eb8e3752ef17a963'
europe='f39d455f5d38907394d6da3a91da4e391f9a34bd6a17e826d6042761067e88f4'
wider_arabian_sea='8fc803bba1e888808f954f5fc025b489e729a23d6f957f8991b4d71c572c3fd1'
china='934c47f36c16a58d68ef5e007e62a23f5f036ee3f3d1f5f85a48c572b90ad8b2'
nwe='c5460c5a4ece7b64ffc0cc280aeade60d364423e8e062ef4a11494352fe6fdbb'
india='70425373a1836d6d0390dc6fef838a468717fba7776d66065a5043488f9f041b'
gom='37c8c4eeb730d1cd41f90ca6bf95c923222b0734b1b0336a475acce821f87ebd'
cushing='cde783a902c7837b814dfa5988ed74fb14841c2999d6984952c7cbfc543d5073'
wider_europe=['7f97abda5311b51c2463779249f9a9b62478ed87f8e24ce04dddf58fe077bae5', 'e21ed02b282c14bec4fec50cec6ce09ee4795f0dc332c4cf6d787f9bc2b2bad6']

# search for product ids (remove hashtags below to search)

# product_search = Products().search(term=['Gasoline']).to_df()
# print (product_search.to_string(index=False))

# Store product ids
crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'
cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'
lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'
gasoline='9256907ba7e4ed11ff03aa297a7e62e14484ce5a85c8118c7495b9120ad0e268'

# Main functions
Run the cell below to define the required functions.

In [3]:
# Helper function to make time blocks of 4 years from a specified start date
def get_search_blocks(start_y, start_m, start_d, today):
    
    """
    Vortexa's API maximum search is 4 years and starts in 2016. 
    This function creates a list of tuples splitting up start_date - present into 4-year blocks.
    """
    
    blocks=[]
    
    start=datetime(start_y, start_m, start_d)
    end=start + dateutil.relativedelta.relativedelta(years=4) - dateutil.relativedelta.relativedelta(seconds=1)
    
    if end > today:
        blocks.append((start, today))
        
    else:
        blocks.append((start, end))
    
    while end < today:
        start+=dateutil.relativedelta.relativedelta(years=4) 
        end+=dateutil.relativedelta.relativedelta(years=4)
        
        if end > today:
            blocks.append((start, today))
            
        else:  
            blocks.append((start, end))
    
        
    return blocks

# Function for getting inventories
def inventories_time_series(start_y, start_m, start_d, locs, locs_excl, company, freq, unit, option, storage_type, confidence):
   
    today=datetime.today()
    search_blocks=get_search_blocks(start_y, start_m, start_d, today)
    
    if confidence:
        conf=['confirmed', 'probable']
        
    else:
        conf=None
    
    result_dfs=pd.DataFrame()

    for block in search_blocks:

        time_min=block[0]
        time_max=block[1]
        print(f"Downloading inventories for period: {time_min} to {time_max}")

    
        # Original query
        result = OnshoreInventoriesTimeseries().search(
            crude_confidence=conf,
            time_min=time_min,
            time_max=time_max,
            corporate_entity_ids=company,
            location_ids=locs,
            exclude_location_ids=locs_excl,
            timeseries_unit=unit,
            timeseries_frequency=freq,
            storage_types=storage_type,
            timeseries_unit_operator=option, # fill or capacity
            ).to_df(columns='all')

        result2=result[['key', 'value']]
        result2.columns=['date', 'value']

            
        result_dfs=pd.concat([result_dfs, result2])
        
        
    # Reformat dates and rename date column
    result_dfs=result_dfs.copy()
    result_dfs['date'] = pd.to_datetime(result_dfs['date'])
    result_dfs['string_date']=result_dfs['date'].dt.strftime('%d-%m-%Y')
    result_dfs['dd_mmm']=result_dfs['date'].dt.strftime('%d-%b')
    result_dfs['month']=result_dfs['date'].dt.strftime('%b')
    result_dfs['week_end_timestamp'] = result_dfs['date'] + pd.offsets.Week(weekday=6)
    result_dfs['week_number'] = result_dfs['date'].dt.isocalendar().week
    result_dfs['year']=round(pd.to_numeric(result_dfs['date'].dt.strftime('%Y')), 0)
    result_dfs = result_dfs.fillna(0)
    
    result_dfs=result_dfs[['date', 'week_end_timestamp', 'string_date', 'dd_mmm', 'week_number', 'month', 'year', 'value']]

    result_dfs.reset_index(drop=True, inplace=True)

    return result_dfs


# function for obtaining seasonal chart data
def seasonal_charts(data, freq, start_y):
    
    # Remove leap days for daily time series
    df=data[data['dd_mmm']!='29-Feb']
    df.reset_index(drop=True, inplace=True)
    
    # Set constants
    current_date=datetime.today()
    this_year=current_date.year
    last_year=this_year-1
    stats_end_y=last_year
    stats_start_y=start_y
    
    # Define stats calculating data set and current year dataset
    stats_df=df[(df['year'] >= stats_start_y) & (df['year'] <= stats_end_y)]
    this_year_df=df[df['year']==this_year]
    
    # if frequency is daily, calculate stats on a daily basis
    if freq=='day':

        # date range creation - use a non-leap year
        start_date = datetime(2023, 1, 1)
        end_date = datetime(2023, 12, 31)

        date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='1D'), columns=['Date'])
        date_range['Date']=date_range['Date'].dt.strftime('%d-%b')
        
        # empty lists to store stats
        mins=[]
        maxs=[]
        avgs=[]
        this_year_vals=[]
        
        # loop through dates and calculate stats
        for i in range(len(date_range)):

            temp=stats_df[stats_df['dd_mmm']==date_range['Date'][i]]

            mn=min(temp['value'])
            mx=max(temp['value'])
            av=temp['value'].mean()

            mins.append(mn)
            maxs.append(mx)
            avgs.append(av)

        # obtain last year's values
        last_year_df=pd.DataFrame(stats_df[stats_df['year']==last_year]['value'])
        last_year_df.columns=['Last year']
        last_year_df.reset_index(drop=True, inplace=True)
        
        # loop through dates and obtain current year values, if no data yet, add a blank
        for i in range(len(date_range)):

            temp=this_year_df[this_year_df['dd_mmm']==date_range['Date'][i]]

            if (len(temp)!=0):

                add=temp['value'].iloc[0]

                this_year_vals.append(add)

            elif (len(temp)==0):

                this_year_vals.append('')

        
        # convert stats to data frames
        mins_df=pd.DataFrame(mins, columns=['Min.'])
        maxs_df=pd.DataFrame(maxs, columns=['Max.'])
        avgs_df=pd.DataFrame(avgs, columns=[f'Average {stats_start_y}-{stats_end_y}'])
        this_year_vals_df=pd.DataFrame(this_year_vals, columns=['Current year'])
        
        # compile data
        seasonal_df=pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
        
        # calculate range
        seasonal_df[f'Range {stats_start_y}-{stats_end_y}']=seasonal_df['Max.']-seasonal_df['Min.']
        
        # compile in desired order
        seasonal_df=seasonal_df[['Date', 'Min.', f'Range {stats_start_y}-{stats_end_y}', f'Average {stats_start_y}-{stats_end_y}', 'Last year', 'Current year']]

    
    # if frequency is monthly, calculate stas on a monthly basis
    elif freq=='month':

        # date range creation
        start_date = datetime(2023, 1, 1)
        end_date = datetime(2023, 12, 31)

        date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='1M'), columns=['Date'])
        date_range['Date']=date_range['Date'].dt.strftime('%b')
        
        # empty lists to store various stats
        mins=[]
        maxs=[]
        avgs=[]
        this_year_vals=[]
        
        # loop through dates and calculate stats
        for i in range(len(date_range)):

            temp=stats_df[stats_df['month']==date_range['Date'][i]]

            mn=min(temp['value'])
            mx=max(temp['value'])
            av=temp['value'].mean()

            mins.append(mn)
            maxs.append(mx)
            avgs.append(av)

        # obtain previous year's values
        last_year_df=pd.DataFrame(stats_df[stats_df['year']==last_year]['value'])
        last_year_df.columns=['Last year']
        last_year_df.reset_index(drop=True, inplace=True)
        
        # loop through dates and obtain current year values, if not data yet, add a blank
        for i in range(len(date_range)):

            temp=this_year_df[this_year_df['month']==date_range['Date'][i]]

            if (len(temp)!=0):

                add=temp['value'].iloc[0]

                this_year_vals.append(add)

            elif (len(temp)==0):

                this_year_vals.append('')

        # convert stats lists to data frames
        mins_df=pd.DataFrame(mins, columns=['Min.'])
        maxs_df=pd.DataFrame(maxs, columns=['Max.'])
        avgs_df=pd.DataFrame(avgs, columns=[f'Average {stats_start_y}-{stats_end_y}'])
        this_year_vals_df=pd.DataFrame(this_year_vals, columns=['Current year'])
        
        # compile data
        seasonal_df=pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
        
        # calculate the range 
        seasonal_df[f'Range {stats_start_y}-{stats_end_y}']=seasonal_df['Max.']-seasonal_df['Min.']
        
        # compile in desired order
        seasonal_df=seasonal_df[['Date', 'Min.', f'Range {stats_start_y}-{stats_end_y}', f'Average {stats_start_y}-{stats_end_y}', 'Last year', 'Current year']]
        
        
    return seasonal_df

# function to create a moving average
def moving_average(data, period, option):
    
    if option=='multiple':

        # calculate moving avg
        moving_avg = pd.DataFrame(data.iloc[:, 1:].rolling(window=period, min_periods=1).mean())

        # add moving average
        moving_avg_df=pd.concat([data.iloc[0:, 0:1], moving_avg], axis=1)

        moving_avg_df.columns=list(data.columns)
        
    elif option=='single':
        
        # calculate moving avg
        moving_avg = pd.DataFrame(data['value'].rolling(window=period, min_periods=1).mean())
        moving_avg.columns=[f'{period}-day moving_avg']

        # get all columns
        data_cols=list(data.columns)

        # get all columns except vlaue
        date_cols=[x for x in data_cols if x !='value']

        # add moving average
        moving_avg_df=pd.concat([data[date_cols], moving_avg], axis=1)

        moving_avg_df.rename(columns={f'{period}-day moving_avg':'value'}, inplace=True)
        

    return moving_avg_df


# Function to plot seasonal chart

def plot_seasonal(y_min, y_max, data, title):
    df=data

    colors = {
        'Min.': 'white',              
        list(df.columns)[2]: 'lightblue', 
        list(df.columns)[3]: 'blue', 
        'Last year': 'yellow',       
        'Current year': 'red'     
    }

    fig = px.area(df, x='Date', y=list(df.columns)[1:3], title=title, color_discrete_map=colors)

    # Add line charts for Average, Last year, and Current year
    for column in list(df.columns)[3:6]:
        fig.add_scatter(x=df['Date'], y=df[column], mode='lines', name=column, line=dict(color=colors[column]))

    # Set the y-axis range
    fig.update_yaxes(range=[y_min, y_max])

    # Show the plot
    fig.show()

# combined function for seasonal onshore crude inventories
def complete_seasonal_inventories(start_y, start_m, start_d, locs, locs_excl, company, freq, unit, option, ma_period, storage_type, confidence, title, plot, y_min, y_max):

    # Query daily inventories data
    daily_inventories=inventories_time_series(start_y=start_y, start_m=start_m, start_d=start_d, 
                                                      locs=locs, locs_excl=locs_excl, company=company, 
                                                      freq=freq, unit=unit, 
                                                      option=option, storage_type=storage_type, confidence=confidence)


        
    if ma_period!=None:
        
        # Calculate moving averages
        data=moving_average(data=daily_inventories, period=ma_period, option='single')
        
        title=title+f' ({ma_period}-{freq} MA)'
        
    else:
        data=daily_inventories
        

    # Convert data to seasonal format
    daily_inventories_seasonal=seasonal_charts(data=data, freq=freq, start_y=start_y)
    
    if plot:
        plot_seasonal(y_min=y_min, y_max=y_max, 
                      data=daily_inventories_seasonal, 
                      title=title)

# Total US onshore crude inventories (seasonal)

In [6]:
# Inventories
us_inv=complete_seasonal_inventories(start_y=2020, start_m=1, start_d=1, 
                                     locs=[united_states], locs_excl=None, 
                                     company=None, freq='day', unit='b', option='fill', ma_period=None, 
                                     storage_type=None, confidence=True, 
                                     title='Seasonal US onshore crude inventories', 
                                     plot=True, y_min=250000000, y_max=450000000)




Downloading inventories for period: 2020-01-01 00:00:00 to 2023-12-31 23:59:59
Downloading inventories for period: 2024-01-01 00:00:00 to 2024-09-17 10:22:05.201146
