# KPLER API - oil in transit

In [8]:
import requests
import pandas as pd
import io
from numpy import int64
from datetime import date
from dateutil.relativedelta import relativedelta

### Authentication

In [9]:
headers = {"Authorization": "Basic ZGlvbnlzaWEubHluZ29wb3Vsb3VAaWVhLm9yZzpHbHJ6dDB6bg=="}

In [3]:
# Returns a list with the the time range of equally spaced time points (every 31 days)
def date_time(starting_date,ending_date):
    date_time_index=pd.date_range(start=starting_date, end=ending_date, freq='1M')
    date_range=date_time_index.tolist()
    return date_range
    # print(type(date_range))
    # print ( date_range)

In [32]:
# Get a data frame with vessels data for a specific period of time

def loaded_vessels(date_range):
    vessel_data = []
    for n in range(len(date_range)):
        end_date=date_range[n].date()
        print(f"Iteration {n}: {end_date}")
        endpoint = "https://api.kpler.com/v1/fleet-metrics/vessels"
        payload={}
        params_vesseldata = {"metric": "loaded_vessels",
                         "zones":"world",
                         "floatingStorageDurationMin":"12", 
                         "floatingStorageDurationMax":"Inf", 
                         "period": "daily", 
                         "unit":"kb",
                         "endDate":end_date }
        

        response_vessel_data = requests.get(endpoint, params=params_vesseldata, headers=headers, data=payload, verify=False)
        response_vessel_data_content = response_vessel_data.content
        vessel_data_df = pd.read_csv(io.StringIO(response_vessel_data_content.decode('utf-8')), sep=';', parse_dates=[1], infer_datetime_format=True)
        vessel_data.append(vessel_data_df)
    vessel_data = pd.concat(vessel_data)
    return vessel_data


### Get Historical data (2016- previous year)

In [5]:
today = date.today()
starting_date ='2016-01-01'
ending_date = today - relativedelta(months=12)

In [6]:
date_range_his=date_time(starting_date,ending_date)
loaded_vessels_hist=loaded_vessels(date_range_his)
loaded_vessels_hist.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\oil_transit_hist.csv')

2016-01-31




KeyboardInterrupt: 

### Get Current data ( lastyear - today)Â¶

In [7]:
today = date.today()
starting_date = today - relativedelta(months=12)
ending_date = today

In [8]:
date_range=date_time(starting_date,ending_date)
loaded_vessels=loaded_vessels(date_range)
loaded_vessels.to_csv(r'C:\Users\LYNGOPOULOU_D\PycharmProjects\scraper\filestore\oil_transit.csv')

2021-08-31




2021-09-30




## Test the scraper

In [4]:
cd ..

C:\Users\ROSA_L\PycharmProjects\scraper


In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
import logging
logger = logging.getLogger()
logging.basicConfig(level=logging.DEBUG)
logger.setLevel(logging.DEBUG)

In [21]:
from scraper.core import factory

#job = factory.get_scraper_job('com_kpler', 'oil_in_transit', full_load=True)
job = factory.get_scraper_job('com_kpler', 'oil_in_transit')
#job.run()
# job.get_sources()
job.run(download=False)

DEBUG:scraper.core.factory:Loading module scraper.jobs.com_kpler.oil_in_transit
DEBUG:scraper.core.factory:Getting class OilInTransitJob
INFO:scraper.core.job:Temporary table name: #oil_in_transit_temp, final table name: oil_in_transit_data
DEBUG:scraper.jobs.com_kpler.oil_in_transit:Assuming parallel_download=False as default to avoid Kpler overload.
INFO:scraper.jobs.com_kpler.oil_in_transit:Getting sources...
INFO:scraper.jobs.com_kpler.oil_in_transit:6 sources to load.
INFO:scraper.core.utils:download_and_get_checksum: 84.5029354095459 ms
DEBUG:scraper.core.job:rm_sources_up_to_date: processing com_kpler_transit_2021-10-31
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): vipenta.iea.org:8000
DEBUG:urllib3.connectionpool:http://vipenta.iea.org:8000 "GET /dimension/source?code=com_kpler_transit_2021-10-31 HTTP/1.1" 200 825
DEBUG:scraper.core.job:rm_sources_up_to_date: processing com_kpler_transit_2021-09-30
DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 

In [16]:
from scraper.settings import FILE_STORE_PATH

for file in FILE_STORE_PATH.glob('com_kpler_transit_data*'):
    print(file)
    name = file.name
    suffix = name.split('data')[1]
    new_name = f'com_kpler_transit{suffix}'
    print(f'Renaming {name} to {new_name}')
    file.rename(new_name)
    print('done!')
    

C:\Users\ROSA_L\PycharmProjects\scraper\filestore\com_kpler_transit_data_2016-01-31.csv
Renaming com_kpler_transit_data_2016-01-31.csv to com_kpler_transit_2016-01-31.csv
done!
C:\Users\ROSA_L\PycharmProjects\scraper\filestore\com_kpler_transit_data_2016-02-29.csv
Renaming com_kpler_transit_data_2016-02-29.csv to com_kpler_transit_2016-02-29.csv
done!
C:\Users\ROSA_L\PycharmProjects\scraper\filestore\com_kpler_transit_data_2016-03-31.csv
Renaming com_kpler_transit_data_2016-03-31.csv to com_kpler_transit_2016-03-31.csv
done!
C:\Users\ROSA_L\PycharmProjects\scraper\filestore\com_kpler_transit_data_2016-04-30.csv
Renaming com_kpler_transit_data_2016-04-30.csv to com_kpler_transit_2016-04-30.csv
done!
C:\Users\ROSA_L\PycharmProjects\scraper\filestore\com_kpler_transit_data_2016-05-31.csv
Renaming com_kpler_transit_data_2016-05-31.csv to com_kpler_transit_2016-05-31.csv
done!
C:\Users\ROSA_L\PycharmProjects\scraper\filestore\com_kpler_transit_data_2016-06-30.csv
Renaming com_kpler_transit_

In [7]:

url = "http://api-oil.kpler.com/v1/fleet-metrics/vessels?metric=loaded_vessels&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&unit=kb&endDate=2020-10-31"

import requests
import pandas as pd

print('Testing method 1:')
r = requests.get(url, headers=headers)

r.raise_for_status()

from pathlib import Path

f = Path('test1.csv')

with f.open(mode='wb') as fp:
    fp.write(r.content)
    
print('Testing method 2:')    
r = requests.get(url, headers=headers)

r.raise_for_status()

f2 = Path('test2.csv')
f2.write_bytes(r.content)

chunk_size = 2048
print('Testing method 3:')  
r = requests.get(url, headers=headers, stream=True)
f3 = Path('test3.csv')
with f3.open(mode='wb') as fd:
    print('reading chunk...')
    for chunk in r.iter_content(chunk_size):
        fd.write(chunk)


DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): api-oil.kpler.com:80
DEBUG:urllib3.connectionpool:http://api-oil.kpler.com:80 "GET /v1/fleet-metrics/vessels?metric=loaded_vessels&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&unit=kb&endDate=2021-01-31 HTTP/1.1" 301 134
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api-oil.kpler.com:443


Testing method 1:
Testing method 2:
Testing method 3:
reading chunk...


In [8]:
df1 = pd.read_csv(f, sep=';')
print(f"file 1: {f} \n {df1['Family'].drop_duplicates()}")
df2 = pd.read_csv(f2, sep=';')
print(f"file 2: {f2}\n {df2['Family'].drop_duplicates()}")
df3 = pd.read_csv(f3, sep=';')
print(f"file 3: {f3}\n {df3['Family'].drop_duplicates()}")

file 1: test1.csv 
 0    Dirty
2      DPP
Name: Family, dtype: object
file 2: test2.csv
 0    Dirty
2      DPP
Name: Family, dtype: object
file 3: test3.csv
 0    Dirty
2      DPP
Name: Family, dtype: object


In [9]:
df1 = pd.read_csv(f, sep=';')
print(f"file 1: {f} \n {df1['Product'].drop_duplicates()}")
df2 = pd.read_csv(f2, sep=';')
print(f"file 2: {f2}\n {df2['Product'].drop_duplicates()}")
df3 = pd.read_csv(f3, sep=';')
print(f"file 3: {f3}\n {df3['Product'].drop_duplicates()}")

file 1: test1.csv 
 0                  Crude
2        Bitumen/Asphalt
4                     FO
6                    NaN
61            Condensate
101                  VGO
151                 SRFO
515                 CBFS
1357              Slurry
13704       Specialities
Name: Product, dtype: object
file 2: test2.csv
 0                  Crude
2        Bitumen/Asphalt
4                     FO
6                    NaN
61            Condensate
101                  VGO
151                 SRFO
515                 CBFS
1357              Slurry
13704       Specialities
Name: Product, dtype: object
file 3: test3.csv
 0                  Crude
2        Bitumen/Asphalt
4                     FO
6                    NaN
61            Condensate
101                  VGO
151                 SRFO
515                 CBFS
1357              Slurry
13704       Specialities
Name: Product, dtype: object


In [7]:
# test inline params x params parameter
import requests
from pathlib import Path


headers = {"Authorization": "Basic ZGlvbnlzaWEubHluZ29wb3Vsb3VAaWVhLm9yZzpHbHJ6dDB6bg=="}
end_date = "2020-10-31"
base_url = "http://api-oil.kpler.com/v1/fleet-metrics/vessels"

params_vesseldata = {"metric": "loaded_vessels",
                     "zones":"world",
                     "floatingStorageDurationMin":"12", 
                     "floatingStorageDurationMax":"Inf", 
                     "period": "daily", 
                     "unit":"kb",
                     "endDate": {end_date} }

print(params_vesseldata)

r = requests.get(base_url, params=params_vesseldata, headers=headers)
file1 = Path('get_params.csv')
r.raise_for_status()
file1.write_bytes(r.content)
print(f'{file1}: {len(r.content)} bytes written.')
    
url = ("http://api-oil.kpler.com/v1/fleet-metrics/vessels?"
       "metric=loaded_vessels&"
       "zones=world&"
       "floatingStorageDurationMin=12&"
       "floatingStorageDurationMax=Inf&"
       "period=daily&"
       "unit=kb&endDate=2020-10-31")


r = requests.get(url, headers=headers)
file2 = Path('get_inline.csv')
r.raise_for_status()
file2.write_bytes(r.content)
print(f'{file2}: {len(r.content)} bytes written.')



{'metric': 'loaded_vessels', 'zones': 'world', 'floatingStorageDurationMin': '12', 'floatingStorageDurationMax': 'Inf', 'period': 'daily', 'unit': 'kb', 'endDate': {'2020-10-31'}}
get_params.csv: 6222631 bytes written.
get_inline.csv: 6222631 bytes written.


In [34]:
from datetime import datetime

date = datetime(2021, 10, 31)

df = loaded_vessels([date])

print(f'{file2}: {len(df)} rows written.')

df.to_csv('file_load_vessels.csv', index=False)

Iteration 0: 2021-10-31




test_text.csv: 118258 rows written.


In [16]:
# Test using decode('utf-8') and text
url = ("http://api-oil.kpler.com/v1/fleet-metrics/vessels?"
       "metric=loaded_vessels&"
       "zones=world&"
       "floatingStorageDurationMin=12&"
       "floatingStorageDurationMax=Inf&"
       "period=daily&"
       "unit=kb&endDate=2020-10-31")


r = requests.get(url, headers=headers)
file2 = Path('test_binary.csv')
r.raise_for_status()
print(f'Encoding: {r.encoding}')
print(f'Apparent Encoding: {r.apparent_encoding}')
file2.write_bytes(r.content)
print(f'{file2}: {len(r.content)} bytes written.')


file2 = Path('test_text.csv')
r.raise_for_status()
print(f'Encoding: {r.encoding}')
print(f'Apparent Encoding: {r.apparent_encoding}')
file2.write_text(r.text)
print(f'{file2}: {len(r.text)} bytes written.')

file3 = Path('test_encode.csv')
r.raise_for_status()
print(f'Encoding: {r.encoding}')
print(f'Apparent Encoding: {r.apparent_encoding}')
file3.write_text(r.content.decode('utf-8'))
print(f'{file3}: {len(r.text)} bytes written.')


Encoding: None
Apparent Encoding: ascii
test_binary.csv: 6222600 bytes written.
Encoding: None
Apparent Encoding: ascii
test_text.csv: 6222600 bytes written.
Encoding: None
Apparent Encoding: ascii
test_encode.csv: 6222600 bytes written.


In [22]:
import io
file3 = Path('binary_encode.csv')
r.raise_for_status()
print(f'Encoding: {r.encoding}')
print(f'Apparent Encoding: {r.apparent_encoding}')

decoded = 

file3.write_text(io.StringIO(r.content.decode('utf-8')))
print(f'{file3}: {len(r.text)} bytes written.')

Encoding: None
Apparent Encoding: ascii


TypeError: data must be str, not StringIO

In [24]:
import io
file3 = Path('binary_encode.csv')
r.raise_for_status()
print(f'Encoding: {r.encoding}')
print(f'Apparent Encoding: {r.apparent_encoding}')

file3.write_text(io.StringIO(r.content.decode('utf-8')).getvalue())
print(f'{file3}: {len(r.text)} bytes written.')

Encoding: None
Apparent Encoding: ascii
binary_encode.csv: 6222600 bytes written.


In [25]:
# with pandas

vessel_data_df = pd.read_csv(io.StringIO(r.content.decode('utf-8')), sep=';', parse_dates=[1], infer_datetime_format=True)

In [27]:
vessel_data_df['Family'].drop_duplicates()

0    Dirty
2      DPP
Name: Family, dtype: object

In [62]:
import logging
logger = logging.getLogger()
logging.basicConfig(level=logging.DEBUG)

# url = ("https://api-oil.kpler.com/v1/fleet-metrics/vessels?"
url = ("https://api.kpler.com/v1/fleet-metrics/vessels?"
       "metric=loaded_vessels&"
       "zones=world&"
       "floatingStorageDurationMin=12&"
       "floatingStorageDurationMax=Inf&"
       "period=daily&"
       "unit=kb&endDate=2020-10-31")


end_date = datetime(2021, 10, 31).date()
#end_date=date_range[n].date()
print(f"Date: {end_date}")
endpoint = "https://api.kpler.com/v1/fleet-metrics/vessels"
payload={}
params_vesseldata = {"metric": "loaded_vessels",
                 "zones":"world",
                 "floatingStorageDurationMin":"12", 
                 "floatingStorageDurationMax":"Inf", 
                 "period": "daily", 
                 "unit":"kb",
                 "endDate":end_date}

# This is NOK
#r = requests.get(url, headers=headers, verify=False)

# This one is OK
print('This works:')
r = requests.get(endpoint, params=params_vesseldata, headers=headers, data=payload, verify=False)
r.raise_for_status()
df = pd.read_csv(io.StringIO(r.text), sep=';', encoding="utf-8", parse_dates=[1], infer_datetime_format=True)

display(df['Family'].drop_duplicates())

# This doesn't work
print('This does not work:')
r = requests.get(url, headers=headers, data=payload, verify=False)
r.raise_for_status()
df = pd.read_csv(io.StringIO(r.text), sep=';', encoding="utf-8", parse_dates=[1], infer_datetime_format=True)

display(df['Family'].drop_duplicates())




DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.kpler.com:443


Date: 2021-10-31
This works:


DEBUG:urllib3.connectionpool:https://api.kpler.com:443 "GET /v1/fleet-metrics/vessels?metric=loaded_vessels&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&unit=kb&endDate=2021-10-31 HTTP/1.1" 200 None


0                  Dirty
1                    DPP
2     Middle Distillates
9             Light Ends
11                 Clean
12                   NPC
Name: Family, dtype: object

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.kpler.com:443


This does not work:


DEBUG:urllib3.connectionpool:https://api.kpler.com:443 "GET /v1/fleet-metrics/vessels?metric=loaded_vessels&zones=world&floatingStorageDurationMin=12&floatingStorageDurationMax=Inf&period=daily&unit=kb&endDate=2020-10-31 HTTP/1.1" 200 None


0                  Dirty
2                    DPP
7     Middle Distillates
10            Light Ends
12                   NPC
14                 Clean
Name: Family, dtype: object

## So the culprit for those differences was... the endpoint!

We should use https://api.kpler.com instead https://api-oil.kpler.com, unless we want only oil data.

In [50]:
from pathlib import Path

file = Path('binary_encode_test.csv')
print(f"{len(r.content)} bytes written.")
file.write_bytes(r.content)


13250826 bytes written.


13250826

In [51]:
pd.read_csv(file, sep=';')['Family'].drop_duplicates()

0                  Dirty
1                    DPP
2     Middle Distillates
9             Light Ends
11                 Clean
12                   NPC
Name: Family, dtype: object