In [9]:
import os, sys, re, frontmatter
import sqlite3
import pandas as pd

In [45]:
REPO_ROOT = os.path.join(os.path.abspath(''), "..")
sys.path.append(REPO_ROOT)
    
def get_df(dirname):
    dirpath = os.path.join(REPO_ROOT, "www/11ty_input", dirname)
    items = []
    for filename in os.listdir(dirpath):
        # print(f"Processing {filename}")
        match_data = re.match(r".*\.md", filename)
        if not match_data:
            continue
        item = frontmatter.load(os.path.join(dirpath, filename)).metadata
        items.append(item)
    return pd.DataFrame(items).drop(columns=["tags", "layout"])

In [57]:
df_routes = get_df("routes") \
  .set_index("slug") \
  .rename(columns={"cityA": "city_a_slug", "cityB": "city_b_slug", "distanceKm": "distance_km", "distanceNms": "distance_nms"})
df_routes

Unnamed: 0_level_0,city_a_slug,city_b_slug,distance_km,distance_nms
slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
chios-gr-karlovassi-gr,gr-chios-259973,gr-karlovassi-256492,81,43.851103
guernesey-gg-saint-malo-fr,gg-guernesey-3042287,fr-saint-malo-2978640,98,53.114364
savona-it-bastia-fr,it-savona-3167022,fr-bastia-3034640,192,103.776676
oinousses-gr-le-piree-gr,gr-oinousses-256329,gr-le-piree-255274,234,126.412493
genes-it-barcelone-es,it-genes-3176219,es-barcelone-3128760,646,349.306620
...,...,...,...,...
arrecife-es-cadiz-es,es-arrecife-2521570,es-cadiz-2520600,1081,583.837047
barcelone-es-ibiza-es,es-barcelone-3128760,es-ibiza-2516479,282,152.749601
heraklion-gr-le-piree-gr,gr-heraklion-261745,gr-le-piree-255274,320,172.924542
kaskinen-fi-harnosand-se,fi-kaskinen-653758,se-harnosand-2707684,171,92.357407


In [142]:
df_companies = get_df("companies") \
    .set_index("slug") \
    .rename(columns={"officialUrl": "official_url", "wikipediaUrl": "wikipedia_url", "outOfScope": "out_of_scope"})
df_companies["out_of_scope"] = df_companies.out_of_scope.apply(lambda x: x == True)
df_companies["out_of_scope"] = df_companies.out_of_scope.astype(bool)
# reorder columns
df_companies = df_companies[["name", "country", "out_of_scope", "imo", "logo", "official_url", "wikipedia_url"]]
df_companies.dtypes

FileNotFoundError: [Errno 2] No such file or directory: '/Users/adipasquale/dev/greenferries/notebooks/../www/11ty_input/companies'

In [80]:
df_cities = get_df("cities") \
    .set_index("slug") \
    .rename(columns = {"geonamesId": "geonames_id", "targetAirportCode": "target_airport_code"})

df_cities = df_cities[["name", "country", "geonames_id", "latitude", "longitude", "target_airport_code"]]
df_cities

Unnamed: 0_level_0,name,country,geonames_id,latitude,longitude,target_airport_code
slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
dk-frederikshavn-2621927,Frederikshavn,DK,2621927,57.44073,10.53661,CNL
se-trelleborg-2667402,Trelleborg,SE,2667402,55.37514,13.15691,MMX
dk-grena-2621230,Grenå,DK,2621230,56.41578,10.87825,AAR
ee-paldiski-589663,Paldiski,EE,589663,59.35667,24.05306,TLL
fr-le-havre-3003796,Le Havre,FR,3003796,49.48510,0.11440,LEH
...,...,...,...,...,...,...
gb-lerwick-2644605,Lerwick,GB,2644605,60.15339,-1.14427,LSI
se-grisslehamn-2710606,Grisslehamn,SE,2710606,60.10048,18.80819,ARN
ax-lumparland-647455,Lumparland,AX,647455,60.11695,20.25819,
gb-brodick-2654604,Brodick,GB,2654604,55.57539,-5.14717,PIK


In [138]:
df_ships = get_df("ships") \
    .set_index("imo") \
    .rename(columns={"outOfScope": "out_of_scope", "wikipediaUrl": "wikipedia_url", "unknownRoutes": "unknown_routes", "capacityPax": "capacity_pax", "company": "company_slug"})
df_ships["out_of_scope"] = df_ships.out_of_scope.apply(lambda x: x == True)
df_ships["out_of_scope"] = df_ships.out_of_scope.astype(bool)
df_ships["unknown_routes"] = df_ships.unknown_routes.apply(lambda x: x == True)
df_ships["unknown_routes"] = df_ships.unknown_routes.astype(bool)
df_ships_routes = df_ships["routes"].explode().dropna().reset_index().rename(columns={"routes": "route_slug"})
df_ships_routes
df_ships = df_ships[["name", "slug", "company_slug", "out_of_scope", "photo", "wikipedia_url", "unknown_routes", "capacity_pax"]]
df_ships

Unnamed: 0_level_0,name,slug,company_slug,out_of_scope,photo,wikipedia_url,unknown_routes,capacity_pax
imo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
9332559,MS Stena Horizon,ms-stena-horizon-9332559,stena-line-se,False,/img/300px-Stena_Horizon_approaching_Rosslare_...,https://en.wikipedia.org/wiki/MS_Stena_Horizon,False,
9783576,Mein Schiff 2,mein-schiff-2-9783576,tui-cruises-de,True,/img/mein-schiff-2-9783576.jpg,,False,
9215490,Caribbean Princess,caribbean-princess-9215490,princess-cruises-us,True,/img/caribbean-princess-9215490.jpg,https://fr.wikipedia.org/wiki/Caribbean_Princess,False,
9364722,MS Star,ms-star-9364722,tallink-ee,False,/img/300px-190407_Star_Helsinki.jpg,https://en.wikipedia.org/wiki/MS_Star,False,1900.0
9813072,ROALD AMUNDSEN,roald-amundsen-9813072,hurtigruten-group-no,False,/img/roald-amundsen-9813072.jpg,https://en.wikipedia.org/wiki/MS_Roald_Amundsen,False,
...,...,...,...,...,...,...,...,...
7361324,MS Moby Otta,ms-moby-otta-7361324,moby-lines-it,False,/img/300px-Princess_of_Scandinavia_Gothenburg.jpg,https://en.wikipedia.org/wiki/MS_Moby_Otta,False,
9479864,Costa Fascinosa,costa-fascinosa-9479864,costa-cruises-it,True,/img/300px-Costa_Fascinosa_close_to_Corfu.jpg,https://en.wikipedia.org/wiki/Costa_Fascinosa,False,3800.0
9767091,CARNIVAL HORIZON,carnival-horizon-9767091,carnival-cruise-line-us,True,/img/carnival-horizon-9767091.jpg,https://en.wikipedia.org/wiki/Carnival_Horizon,False,
9221346,HSC Volcan de Teno,hsc-volcan-de-teno-9221346,naviera-armas-es,False,/img/300px-Highspeed_6_-_HSW_-_Heraklion_-_Por...,https://en.wikipedia.org/wiki/HSC_Volcan_de_Teno,False,


In [143]:
db_con = sqlite3.connect(os.path.join(REPO_ROOT, "dbs/www.db"))

df_ships.to_sql("ships", db_con, if_exists="replace")
df_routes.to_sql("routes", db_con, if_exists="replace")
df_companies.to_sql("companies", db_con, if_exists="replace")
df_ships_routes.to_sql("ships_routes", db_con, if_exists="replace")
df_cities.to_sql("cities", db_con, if_exists="replace")

187