## KPLER API - floating Storage

In [14]:
import requests
import pandas as pd
import io
from numpy import int64
from datetime import date
from dateutil.relativedelta import relativedelta

## Floating storage data by continent & vessel type

### Authentication

In [2]:
headers = {"Authorization": "Basic ZGlvbnlzaWEubHluZ29wb3Vsb3VAaWVhLm9yZzpHbHJ6dDB6bg=="}


###  Get vessels data from '/v1/fleet-metrics/vessels'

Through this endpoint is provided the list of vessels with their cargo per day on a given period (of 31 days) and their location, for the Floating Storage.

parameters: 
- "metric": "floating_storage",
- "zones":"world",
- "floatingStorageDurationMin":"12", 
- "floatingStorageDurationMax":"Inf", 
- "period": "daily", 
- "products":"crude/co", 
- "unit":"kb"


Each query can retreive data of 31 days only. So to get the full timeseries starting from 2016  we need to loop through the time range

In [8]:
from dateutil.relativedelta import relativedelta
from datetime import date
import pandas as pd

today = date.today()
last_year_date = today - relativedelta(months=12)
starting_date ='2016-01-01'

ending_date = today - relativedelta(months=12)
end_of_month = date(today.year, today.month, 1) + relativedelta(months=1, days=-1)

print(end_of_month)

[date for date in pd.date_range(start=starting_date, end=end_of_month, freq='1M')]

2021-10-31


[Timestamp('2016-01-31 00:00:00', freq='M'),
 Timestamp('2016-02-29 00:00:00', freq='M'),
 Timestamp('2016-03-31 00:00:00', freq='M'),
 Timestamp('2016-04-30 00:00:00', freq='M'),
 Timestamp('2016-05-31 00:00:00', freq='M'),
 Timestamp('2016-06-30 00:00:00', freq='M'),
 Timestamp('2016-07-31 00:00:00', freq='M'),
 Timestamp('2016-08-31 00:00:00', freq='M'),
 Timestamp('2016-09-30 00:00:00', freq='M'),
 Timestamp('2016-10-31 00:00:00', freq='M'),
 Timestamp('2016-11-30 00:00:00', freq='M'),
 Timestamp('2016-12-31 00:00:00', freq='M'),
 Timestamp('2017-01-31 00:00:00', freq='M'),
 Timestamp('2017-02-28 00:00:00', freq='M'),
 Timestamp('2017-03-31 00:00:00', freq='M'),
 Timestamp('2017-04-30 00:00:00', freq='M'),
 Timestamp('2017-05-31 00:00:00', freq='M'),
 Timestamp('2017-06-30 00:00:00', freq='M'),
 Timestamp('2017-07-31 00:00:00', freq='M'),
 Timestamp('2017-08-31 00:00:00', freq='M'),
 Timestamp('2017-09-30 00:00:00', freq='M'),
 Timestamp('2017-10-31 00:00:00', freq='M'),
 Timestamp

In [18]:

# Returns a list with the the time range of equally spaced time points (every 31 days)
def date_time(starting_date,ending_date):
    date_time_index=pd.date_range(start=starting_date, end=ending_date, freq='1M')
    date_range=date_time_index.tolist()
    return date_range
    # print(type(date_range))
    # print ( date_range)
    
[date for date in pd.date_range(start=starting_date, end=ending_date, freq='1M')]

[Timestamp('2016-01-31 00:00:00', freq='M'),
 Timestamp('2016-02-29 00:00:00', freq='M'),
 Timestamp('2016-03-31 00:00:00', freq='M'),
 Timestamp('2016-04-30 00:00:00', freq='M'),
 Timestamp('2016-05-31 00:00:00', freq='M'),
 Timestamp('2016-06-30 00:00:00', freq='M'),
 Timestamp('2016-07-31 00:00:00', freq='M'),
 Timestamp('2016-08-31 00:00:00', freq='M'),
 Timestamp('2016-09-30 00:00:00', freq='M'),
 Timestamp('2016-10-31 00:00:00', freq='M'),
 Timestamp('2016-11-30 00:00:00', freq='M'),
 Timestamp('2016-12-31 00:00:00', freq='M'),
 Timestamp('2017-01-31 00:00:00', freq='M'),
 Timestamp('2017-02-28 00:00:00', freq='M'),
 Timestamp('2017-03-31 00:00:00', freq='M'),
 Timestamp('2017-04-30 00:00:00', freq='M'),
 Timestamp('2017-05-31 00:00:00', freq='M'),
 Timestamp('2017-06-30 00:00:00', freq='M'),
 Timestamp('2017-07-31 00:00:00', freq='M'),
 Timestamp('2017-08-31 00:00:00', freq='M'),
 Timestamp('2017-09-30 00:00:00', freq='M'),
 Timestamp('2017-10-31 00:00:00', freq='M'),
 Timestamp

In [2]:
# Get a data frame with vessels data for a speicific period of time

def vessel_data(date_range):
    vessel_data = []
    for n in range(len(date_range)):
        end_date=date_range[n].date()
        print(end_date)
        endpoint = "https://api.kpler.com/v1/fleet-metrics/vessels"
        payload={}
        params_vesseldata = {"metric": "floating_storage",
                         "zones":"world",
                         "floatingStorageDurationMin":"12", 
                         "floatingStorageDurationMax":"Inf", 
                         "period": "daily", 
                         "products":"crude/co",
                         "unit":"kb",
                         "endDate":{end_date} }

        response_vessel_data = requests.get(endpoint, params=params_vesseldata, headers=headers, data=payload, verify=False)
        response_vessel_data_content = response_vessel_data.content
        vessel_data_df = pd.read_csv(io.StringIO(response_vessel_data_content.decode('utf-8')), sep=';', parse_dates=[1], infer_datetime_format=True)
        vessel_data.append(vessel_data_df)
    vessel_data = pd.concat(vessel_data)
    vessel_data['IMO']=vessel_data['IMO'].astype(str).astype(int64)
    return vessel_data

    

In [12]:
from datetime import date
end_date = date.today()
params_vesseldata = {"metric": "floating_storage",
                         "zones":"world",
                         "floatingStorageDurationMin":"12", 
                         "floatingStorageDurationMax":"Inf", 
                         "period": "daily", 
                         "products":"crude/co",
                         "unit":"kb",
                         "endDate": end_date.strftime('%Y-%m-%d')}

endpoint = "https://api.kpler.com/v1/fleet-metrics/vessels"

def get_params(param_list):
    return '&'.join([f"{k}={v}" for k, v in param_list.items()])


f"{endpoint}?{get_params(params_vesseldata)}"
    

'https://api.kpler.com/v1/fleet-metrics/vessels?metric=floating_storage&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&products=crude/co&unit=kb&endDate=2021-10-13'

In [6]:
# vessel_data.dtypes

In [7]:
# Convert IMO column from object to int64
##vessel_data.IMO.astype(int64)
#vessel_data['IMO']=vessel_data['IMO'].astype(str).astype(int64)
#vessel_data.dtypes

### Get vessel details  from '/v1/vessels'

The Vessels query returns a list of the snapshot of the current status of the fleet, including details on vessel status, IMO, vessel type

parameters
- "columns": "vessel_status,vessel_type,vessel_imo"

In [8]:
# Get a data frame with vessels details, names & types
def vessel():
    endpoint = "https://api.kpler.com/v1/vessels"
    payload={}
    headers = {"Authorization": "Basic ZGlvbnlzaWEubHluZ29wb3Vsb3VAaWVhLm9yZzpHbHJ6dDB6bg=="}
    params_vesseldetails = {"columns": "vessel_status,vessel_type,vessel_imo"}

    response_vessel = requests.get(endpoint, params=params_vesseldetails, headers=headers, data=payload, verify=False)
    response_vessel_content = response_vessel.content
    vessel_df = pd.read_csv(io.StringIO(response_vessel_content.decode('utf-8')), sep=';')
    return vessel_df

In [9]:
#response_vessel.headers

In [10]:
#response_vessel_content = response_vessel.content
#vessel_df = pd.read_csv(io.StringIO(response_vessel_content.decode('utf-8')), sep=';')
#vessel_df.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\output_response_vessel_details.csv', index = False, header=True)

In [11]:
#vessel_df.dtypes

In [12]:
#result = pd.merge(vessel_data, vessel_df, how="left", on="IMO")

In [13]:
#result.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\output_reult.csv', index = False, header=True)

# Get Historical data (2016- lastyear)

In [14]:
today = date.today()
last_year_date = today - relativedelta(months=12)
starting_date ='2016-01-01'
ending_date = today - relativedelta(months=6)

In [15]:
date_range_his=date_time(starting_date,ending_date)
vessel_data_hist=vessel_data(date_range_his)
vessel_data_hist.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\output_response_vessel_hist.csv')

2016-01-31




2016-02-29




2016-03-31




2016-04-30




2016-05-31




2016-06-30




2016-07-31




2016-08-31




2016-09-30




2016-10-31




2016-11-30




2016-12-31




2017-01-31




2017-02-28




2017-03-31




2017-04-30




2017-05-31




2017-06-30




2017-07-31




2017-08-31




2017-09-30




2017-10-31




2017-11-30




2017-12-31




2018-01-31




2018-02-28




2018-03-31




2018-04-30




2018-05-31




2018-06-30




2018-07-31




2018-08-31




2018-09-30




2018-10-31




2018-11-30




2018-12-31




2019-01-31




2019-02-28




2019-03-31




2019-04-30




2019-05-31




2019-06-30




2019-07-31




2019-08-31




2019-09-30




2019-10-31




2019-11-30




2019-12-31




2020-01-31




2020-02-29




2020-03-31




2020-04-30




2020-05-31




2020-06-30




2020-07-31




2020-08-31




2020-09-30




In [16]:
vessel_df= vessel()



In [17]:
result_hist = pd.merge(vessel_data_hist, vessel_df, how="left", on="IMO")

In [18]:
result_hist.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\floating_storage_hist.csv', index = False, header=True)

# Get Current data ( lastyear - today)¶

In [19]:
ending_date = date.today()
starting_date = today - relativedelta(months=6)

In [20]:
date_range=date_time(starting_date,ending_date)
vessel_data=vessel_data(date_range)
vessel_data.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\output_response_vessel.csv')

2020-10-31




2020-11-30




2020-12-31




2021-01-31




2021-02-28




2021-03-31




2021-04-30




2021-05-31




2021-06-30




2021-07-31




2021-08-31




2021-09-30




In [21]:
vessel_df= vessel()



In [22]:
result = pd.merge(vessel_data, vessel_df, how="left", on="IMO")

In [23]:
result.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\floating_storage.csv', index = False, header=True)

## Test the scraper

In [10]:
cd ..

C:\Users\ROSA_L\PycharmProjects\scraper


In [11]:
%load_ext autoreload
%autoreload 2

In [12]:
import logging
logger = logging.getLogger()
logging.basicConfig(level=logging.DEBUG)
logger.setLevel(logging.DEBUG)

In [26]:
from scraper.core import factory

job = factory.get_scraper_job('com_kpler', 'floating_storage', full_load=True)
job.run(download=False)
# job.get_sources()

DEBUG:scraper.core.factory:Loading module scraper.jobs.com_kpler.floating_storage
DEBUG:scraper.core.factory:Getting class FloatingStorageJob
INFO:scraper.core.job:Temporary table name: #floating_storage_temp, final table name: floating_storage_data
INFO:scraper.jobs.com_kpler.floating_storage:Getting sources...
INFO:scraper.jobs.com_kpler.floating_storage:70 sources to load.
INFO:scraper.core.utils:download_and_get_checksum: 36.02027893066406 ms
DEBUG:scraper.core.job:rm_sources_up_to_date: processing com_kpler_fs_data_2021-10-31
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): vipenta.iea.org:8000
DEBUG:urllib3.connectionpool:http://vipenta.iea.org:8000 "GET /dimension/source?code=com_kpler_fs_data_2021-10-31 HTTP/1.1" 200 846
DEBUG:scraper.core.job:rm_sources_up_to_date: processing com_kpler_fs_data_2021-09-30
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): vipenta.iea.org:8000
DEBUG:urllib3.connectionpool:http://vipenta.iea.org:8000 "GET /dimension/sour

In [29]:
from scraper.core import factory

job = factory.get_scraper_job('com_kpler', 'floating_storage')
job.run(download=False)

DEBUG:scraper.core.factory:Loading module scraper.jobs.com_kpler.floating_storage
DEBUG:scraper.core.factory:Getting class FloatingStorageJob
INFO:scraper.core.job:Temporary table name: #floating_storage_temp, final table name: floating_storage_data
INFO:scraper.jobs.com_kpler.floating_storage:Getting sources...
INFO:scraper.jobs.com_kpler.floating_storage:6 sources to load.
INFO:scraper.core.utils:download_and_get_checksum: 4.000663757324219 ms
DEBUG:scraper.core.job:rm_sources_up_to_date: processing com_kpler_fs_data_2021-10-31
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): vipenta.iea.org:8000
DEBUG:urllib3.connectionpool:http://vipenta.iea.org:8000 "GET /dimension/source?code=com_kpler_fs_data_2021-10-31 HTTP/1.1" 200 869
DEBUG:scraper.core.job:rm_sources_up_to_date: processing com_kpler_fs_data_2021-09-30
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): vipenta.iea.org:8000
DEBUG:urllib3.connectionpool:http://vipenta.iea.org:8000 "GET /dimension/sourc

In [27]:
job.data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 187954 entries, 0 to 1605
Data columns (total 19 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Date                     187954 non-null  object        
 1   IMO                      187954 non-null  int64         
 2   Name                     187954 non-null  object        
 3   Dead Weight Tonnage      187954 non-null  int64         
 4   Quantity (kb)            187954 non-null  int64         
 5   Family                   187954 non-null  object        
 6   Group                    187954 non-null  object        
 7   Product                  159257 non-null  object        
 8   Grade                    154376 non-null  object        
 9   Current Continent        185235 non-null  object        
 10  Current Subcontinent     185214 non-null  object        
 11  Current Country          185298 non-null  object        
 12  Current Sea       

In [24]:
job.data['Floating Since'].drop_duplicates().tolist()

[Timestamp('2015-12-20 00:00:00+0000', tz='UTC'),
 Timestamp('2015-05-28 00:00:00+0000', tz='UTC'),
 Timestamp('2015-12-18 00:00:00+0000', tz='UTC'),
 Timestamp('2015-12-05 00:00:00+0000', tz='UTC'),
 Timestamp('2015-10-14 00:00:00+0000', tz='UTC'),
 Timestamp('2015-12-02 00:00:00+0000', tz='UTC'),
 Timestamp('2015-07-22 00:00:00+0000', tz='UTC'),
 Timestamp('2015-12-06 00:00:00+0000', tz='UTC'),
 Timestamp('2015-11-04 00:00:00+0000', tz='UTC'),
 Timestamp('2015-02-08 00:00:00+0000', tz='UTC'),
 Timestamp('2014-11-16 00:00:00+0000', tz='UTC'),
 Timestamp('2014-09-22 00:00:00+0000', tz='UTC'),
 Timestamp('2015-08-03 00:00:00+0000', tz='UTC'),
 Timestamp('2015-12-03 00:00:00+0000', tz='UTC'),
 Timestamp('2015-05-25 00:00:00+0000', tz='UTC'),
 Timestamp('2015-06-28 00:00:00+0000', tz='UTC'),
 Timestamp('2015-12-16 00:00:00+0000', tz='UTC'),
 Timestamp('2015-10-18 00:00:00+0000', tz='UTC'),
 Timestamp('2015-10-24 00:00:00+0000', tz='UTC'),
 Timestamp('2015-05-12 00:00:00+0000', tz='UTC'),


In [14]:
[vars(s) for s in job.sources]

[{'code': 'com_kpler_fs_data_2021-05-31',
  'url': 'http://api-oil.kpler.com/v1/fleet-metrics/vessels?metric=floating_storage&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&products=crude/co&unit=kb&endDate=2021-05-31',
  'path': 'com_kpler_fs_data_2021-05-31.csv',
  'long_name': 'KPLER - vessel data for 2021-05-31'},
 {'code': 'com_kpler_fs_data_2021-06-30',
  'url': 'http://api-oil.kpler.com/v1/fleet-metrics/vessels?metric=floating_storage&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&products=crude/co&unit=kb&endDate=2021-06-30',
  'path': 'com_kpler_fs_data_2021-06-30.csv',
  'long_name': 'KPLER - vessel data for 2021-06-30'},
 {'code': 'com_kpler_fs_data_2021-07-31',
  'url': 'http://api-oil.kpler.com/v1/fleet-metrics/vessels?metric=floating_storage&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&products=crude/co&unit=kb&endDate=2021-07-31',
  'path': 'com_kpler_fs

In [6]:
job.download_and_get_checksum(download=True, parallel_download=False)

DEBUG:scraper.core.job:download: True, parallel download: False
DEBUG:scraper.jobs.com_kpler.floating_storage:Downloading com_kpler_fs_data_2021-05-31
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): proxy.iea.org:8080
DEBUG:urllib3.connectionpool:http://proxy.iea.org:8080 "GET http://api-oil.kpler.com/v1/fleet-metrics/vessels?metric=floating_storage&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&products=crude/co&unit=kb&endDate=2021-05-31 HTTP/1.1" 301 134
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api-oil.kpler.com:443
DEBUG:urllib3.connectionpool:https://api-oil.kpler.com:443 "GET /v1/fleet-metrics/vessels?metric=floating_storage&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&products=crude/co&unit=kb&endDate=2021-05-31 HTTP/1.1" 200 None
DEBUG:scraper.jobs.com_kpler.floating_storage:Downloading com_kpler_fs_data_2021-06-30
DEBUG:urllib3.connectionpool:Starting new HTTP 

## Test vessel detail job

We've just separated this into a separate scraper.

In [8]:
from scraper.core import factory

job = factory.get_scraper_job('com_kpler', 'vessel_details', full_load=True)
job.run()

DEBUG:scraper.core.factory:Loading module scraper.jobs.com_kpler.vessel_details
DEBUG:scraper.core.factory:Getting class VesselDetailsJob
INFO:scraper.core.job:Temporary table name: #vessel_temp, final table name: vessel_data
INFO:scraper.jobs.com_kpler.vessel_details:Getting sources...
INFO:scraper.jobs.com_kpler.vessel_details:1 sources to load.
DEBUG:scraper.core.job:download: True, parallel download: True
DEBUG:scraper.jobs.com_kpler.vessel_details:Downloading com_kpler_fs_detail
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): proxy.iea.org:8080
DEBUG:urllib3.connectionpool:http://proxy.iea.org:8080 "GET http://api-oil.kpler.com/v1/vessels?columns=vessel_status,vessel_type,vessel_imo HTTP/1.1" 301 134
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api-oil.kpler.com:443
DEBUG:urllib3.connectionpool:https://api-oil.kpler.com:443 "GET /v1/vessels?columns=vessel_status,vessel_type,vessel_imo HTTP/1.1" 200 None
INFO:scraper.core.utils:download_and_get_che

In [5]:
from datetime import date

# '2016-01-01'
date(2016, 1, 1)

datetime.date(2016, 1, 1)