In [1]:
import pandas as pd
import bamboolib as bam
import requests
import json

## Travels data

In [60]:
df = pd.read_json("../static/data/travels_init.json")
# Step: Keep rows where (departure_admiralty is not missing) and (destination_admiralty is not missing)
df = df.loc[(df['departure_admiralty'].notna()) & (df['destination_admiralty'].notna())]

# Step: Replace missing values
df[['distance_dep_dest_miles', 'tonnage']] = df[['distance_dep_dest_miles', 'tonnage']].fillna(0)

# Step: Change data type of ['distance_dep_dest_miles', 'tonnage'] to Integer
for column_name in ['distance_dep_dest_miles', 'tonnage']:
    df[column_name] = df[column_name].astype('float')

# Step: Keep rows where outdate_fixed starts with 1787
df = df.loc[df['outdate_fixed'].str.startswith('1787', na=False)]

# Step: Change data type of outdate_fixed to Datetime
df['outdate_fixed_datetime'] = pd.to_datetime(df['outdate_fixed'], format='%Y-%m-%d')

# Step: Extract datetime attribute(s) month number from 'outdate_fixed_datetime'
df['outdate_fixed_datetime_month_number'] = df['outdate_fixed_datetime'].dt.month

# Step: Extract datetime attribute(s) month name from 'outdate_fixed_datetime'
df['outdate_fixed_datetime_month_name'] = df['outdate_fixed_datetime'].dt.month_name()

# Step: Extract datetime attribute(s) week from 'outdate_fixed_datetime'
df['outdate_fixed_datetime_week'] = df['outdate_fixed_datetime'].dt.week

# Step: Change data type of ['outdate_fixed_datetime_month_number', 'outdate_fixed_datetime_week'] to String/Text
for column_name in ['outdate_fixed_datetime_month_number', 'outdate_fixed_datetime_week']:
    df[column_name] = df[column_name].astype('string')
    
# Step: Drop columns
df = df.drop(columns=['outdate_fixed'])

In [58]:
# render
import pandas as pd; import numpy as np
# Step: Change data type of commodity_id to String/Text
df['commodity_id'] = df['commodity_id'].astype('string')
#df['commodity_id'] = df['commodity_id'].astype(object).replace(np.nan, 'None')

# Step: Manipulate strings of 'commodity_id' via Find '.0' and Replace with ''
df["commodity_id"] = df["commodity_id"].str.replace('.0', '', regex=False)
df["commodity_id"] = df["commodity_id"].apply(lambda x: x.zfill(8) if (pd.notnull(x)) else x)

import pandas as pd; import numpy as np
# Step: Left Join with df1 where commodity_id=record_id
df = pd.merge(df, df1[['record_id', 'commodity_standardized_en', 'category_portic_fr']], how='left', left_on=['commodity_id'], right_on=['record_id'])

# Step: Drop columns
df = df.drop(columns=['record_id', 'commodity_standardized_fr'])

df

          travel_id  distance_dep_dest_miles        departure  \
0      0000076N- 29                     20.0        Granville   
4      0005962N- 07                     20.0  Tonnay-Charente   
9      0000624N- 05                     32.0         Le Havre   
11     0000359N- 04                     51.0         Honfleur   
12     0000624N- 16                     20.0         Le Havre   
...             ...                      ...              ...   
39445  0000625N- 16                     56.0            Rouen   
39447  0021805N- 08                     48.0           Toulon   
39448  0000590N- 18                     58.0         Le Havre   
39449  0000591N- 15                    142.0         Le Havre   
39450  0000600N- 03                     56.0            Rouen   

       departure_latitude  departure_longitude departure_admiralty  \
0               48.838204            -1.597853           Granville   
4               45.950000            -0.900000            Marennes   
9        

## Products data

In [3]:
portic_products_api_url = "http://data.portic.fr/api/cargo_categories/"
def get_portic_products_api_data():
    headers = {'Accept': 'application/json'}
    response = requests.request(
        "GET", portic_products_api_url, headers=headers)
    data = json.loads(response.text)
    df = pd.DataFrame(data)
    return df

df1 = get_portic_products_api_data()

df1

      pkid record_id commodity_standardized_en commodity_standardized_fr  \
0        3  00000003                  Military                 Militaire   
1        4  00000004                  Prisoner                Prisonnier   
2        5  00000005                      Crew                  Equipage   
3        6  00000006                     Greek                      Grec   
4        7  00000007                   Genoese                    Génois   
...    ...       ...                       ...                       ...   
1064  1059  00001699      Coffee (West Indian)           Café (Antilles)   
1065  1061  00001701        Planks (cork wood)         Planches de liège   
1066  1063  00001703                     Manna                     Manne   
1067  1067  00001707             Oil (Vitriol)          Huile de vitriol   
1068  1068  00001708              Seeds (hemp)          Graine (chanvre)   

                                    category_portic_fr  \
0                            

## Debug