In [1]:
import os
import json
import re
import pandas as pd
import numpy as np

In [2]:
DIRNAME = os.path.abspath('')
THETIS_CSV_PATH = os.path.join(DIRNAME, "../files_computed/thetis_all_with_computed.csv")
WIKIDATA_SHIPS_CSV_PATH = os.path.join(DIRNAME, "../files_original/original.wikidata.ships.csv")
WIKIDATA_URLS_CSV_PATH = os.path.join(DIRNAME, "../files_original/original.wikidata.urls.csv")
WWW_DATA_SHIP_TYPES_PATH = os.path.join(DIRNAME, "../../www/views/doc/ship_types.11tydata.json")
WWW_SHIPS_DATA_PATH = os.path.join(DIRNAME, "../../www/views/ships")

In [3]:
df_thetis = pd.read_csv(THETIS_CSV_PATH, usecols=["imo", 'name', "ship_type"], dtype={"imo": str}).replace({np.nan: None}).groupby('imo').agg('first')

In [4]:
ship_types = df_thetis["ship_type"].value_counts()
ship_types

Bulk carrier                  4871
Oil tanker                    2333
Container ship                2109
Chemical tanker               1615
General cargo ship            1347
Vehicle carrier                488
Gas carrier                    416
Ro-pax ship                    384
Ro-ro ship                     289
LNG carrier                    282
Passenger ship                 190
Refrigerated cargo carrier     163
Other ship types               153
Container/ro-ro cargo ship      83
Combination carrier             12
Name: ship_type, dtype: int64

In [5]:
df_thetis_filtered = df_thetis[df_thetis.ship_type.isin(["Ro-pax ship", "Passenger ship"])]
thetis_passenger_ships_count = df_thetis_filtered.shape[0]
print(f"found {thetis_passenger_ships_count} passenger ships in THETIS ")

found 574 passenger ships in THETIS 


In [6]:
df_wikidata_ships = pd.read_csv(WIKIDATA_SHIPS_CSV_PATH, dtype={"imo": str})
df_wikidata_urls = pd.read_csv(WIKIDATA_URLS_CSV_PATH, dtype={"imo": str})
df_wikidata = pd.merge(df_wikidata_ships, df_wikidata_urls, on="imo").groupby('imo').agg(set)
wikidata_ships_count = df_wikidata.shape[0]
print(f"found {wikidata_ships_count} ships in wikidata")
df_wikidata.head()

found 6498 ships in wikidata


Unnamed: 0_level_0,wikidataUrl_x,mmsi,shipTypes,countryCode,imageUrl,beam,draft,maximumCapacity,length,width,...,operatorUrl,operatorCountryCode,operatorName,manufacturerUrl,manufacturerCountryCode,manufacturerName,wikidataUrl_y,item,wikipediaUrl,wikipediaLang
imo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
516298,{http://www.wikidata.org/entity/Q28803281},{nan},{cargo ship},{nan},{nan},{nan},{nan},{nan},{nan},{nan},...,{nan},{nan},{nan},{nan},{nan},{nan},{13162},{http://www.wikidata.org/entity/Q28803281},{https://en.wikipedia.org/wiki/SS_Hazelbank},{en}
896800,{http://www.wikidata.org/entity/Q2123498},{nan},{tugboat},{nan},{http://commons.wikimedia.org/wiki/Special:Fil...,{nan},{nan},{nan},{35.26},{7.45},...,{nan},{nan},{nan},{nan},{nan},{nan},{16405},{http://www.wikidata.org/entity/Q2123498},"{https://nl.wikipedia.org/wiki/Hudson_(schip,_...",{nl}
1000150,{http://www.wikidata.org/entity/Q58623059},{232398000.0},{three-masted schooner},{nan},{http://commons.wikimedia.org/wiki/Special:Fil...,{nan},{4.09},{nan},{65.0},{nan},...,{nan},{nan},{nan},{nan},{nan},{nan},{18422},{http://www.wikidata.org/entity/Q58623059},{https://fr.wikipedia.org/wiki/Adix},{fr}
1000356,{http://www.wikidata.org/entity/Q1511990},"{nan, nan}",{motor ship},{nan},{nan},{13.0},{3.6},{nan},{82.0},"{nan, nan}",...,{nan},{nan},{nan},{nan},{nan},{nan},"{9672, 9671}",{http://www.wikidata.org/entity/Q1511990},"{https://fr.wikipedia.org/wiki/Basrah_Breeze, ...","{fr, de}"
1000447,{http://www.wikidata.org/entity/Q18888436},{nan},"{motor yacht, luxury yacht}",{nan},{http://commons.wikimedia.org/wiki/Special:Fil...,{nan},{nan},{nan},{43.8},{nan},...,{nan},{nan},{nan},{nan},{nan},{nan},{11542},{http://www.wikidata.org/entity/Q18888436},{https://en.wikipedia.org/wiki/Va_Bene_(yacht)},{en}


In [7]:
df_all = pd.merge(df_thetis_filtered, df_wikidata, on="imo")
merged_ships_count = df_all.shape[0]
print(f"intersection of thetis and wikidata gave {merged_ships_count} ships.")

intersection of thetis and wikidata gave 497 ships.


In [8]:
data = {}
data["shipTypesValueCounts"] = ship_types.to_dict()
data["thetisPassengerShipsCount"] = thetis_passenger_ships_count
data["wikidataShipsCount"] = wikidata_ships_count
data["mergedShipsCount"] = merged_ships_count
with open(WWW_DATA_SHIP_TYPES_PATH, 'w') as f:
    json.dump(data, f, indent=2)
print(f"wrote file to {WWW_DATA_SHIP_TYPES_PATH}")

wrote file to /Users/adipasquale/dev/greenferries/data/notebooks/../../www/views/doc/ship_types.11tydata.json
