In [106]:
import pandas as pd
import numpy as np

## Upload the data as a data frame

In [27]:
synergy_logistics_info_df = pd.read_csv('synergy_logistics_database.csv', usecols = lambda x: x != 'register_id')
synergy_logistics_info_df

Unnamed: 0,direction,origin,destination,year,date,product,transport_mode,company_name,total_value
0,Exports,Japan,China,2015,31/01/15,Cars,Sea,Honda,33000000
1,Exports,Japan,China,2015,01/02/15,Cars,Sea,Honda,16000000
2,Exports,Japan,China,2015,02/02/15,Cars,Sea,Honda,29000000
3,Exports,Japan,China,2015,03/02/15,Cars,Sea,Honda,14000000
4,Exports,Japan,China,2015,04/02/15,Cars,Sea,Honda,17000000
...,...,...,...,...,...,...,...,...,...
19051,Imports,Japan,Singapore,2020,27/06/20,Gas turbines,Sea,Union Energy Co,1000000
19052,Imports,Malaysia,Singapore,2020,28/06/20,Gas turbines,Sea,Union Energy Co,2000000
19053,Imports,Malaysia,Singapore,2020,29/06/20,Gas turbines,Sea,Union Energy Co,33000000
19054,Imports,Malaysia,Singapore,2020,30/06/20,Gas turbines,Sea,Union Energy Co,13000000


# Option 1 .- Import and Export routes
## Obtain the 10 most used routes

In [195]:
option_1_df =  synergy_logistics_info_df.copy()
option_1_df.drop(columns = ['product', 'transport_mode', 'date', 'company_name'], inplace=True)

# divide in two df, one for imports and the other for exports
option_1_df_exports = option_1_df[option_1_df['direction'] == 'Exports']
option_1_df_imports = option_1_df[option_1_df['direction'] == 'Imports']


In [196]:
# Get the yearly sum of the sales per route
option_1_df_exports = option_1_df_exports.drop(columns='direction')
option_1_df_exports = option_1_df_exports.groupby(['origin', 'destination','year'], sort = False).agg(
    total_value = pd.NamedAgg(column = 'total_value', aggfunc = 'sum'),
    total_sales = pd.NamedAgg(column = 'total_value', aggfunc = 'count'))

# Get the mean total_value per route
option_1_df_exports = option_1_df_exports.reset_index().drop(columns='year')
option_1_df_exports = option_1_df_exports.groupby(['origin', 'destination'], sort = False).agg(
    avg_total_value = pd.NamedAgg(column = 'total_value', aggfunc = 'mean'),
    total_sales = pd.NamedAgg(column = 'total_sales', aggfunc = 'sum'))
option_1_df_exports = option_1_df_exports.reset_index()


In [197]:
option_1_df_exports.sort_values(by = 'avg_total_value', ascending = False)[:10]

Unnamed: 0,origin,destination,avg_total_value,total_sales
11,China,Mexico,4083333000.0,330
26,USA,Canada,2412000000.0,136
23,USA,Brazil,2384000000.0,101
6,Germany,Italy,2270500000.0,130
9,China,Germany,2045000000.0,142
58,France,Belgium,1846023000.0,223
56,France,United Kingdom,1809000000.0,147
46,South Korea,Vietnam,1719252000.0,497
53,France,USA,1701000000.0,103
59,Canada,Mexico,1690000000.0,261


In [198]:
option_1_df_exports.sort_values(by = 'total_sales', ascending = False)[:10]

Unnamed: 0,origin,destination,avg_total_value,total_sales
46,South Korea,Vietnam,1719252000.0,497
51,Netherlands,Belgium,809535500.0,437
48,USA,Netherlands,258046800.0,436
11,China,Mexico,4083333000.0,330
41,Japan,Brazil,842038800.0,306
57,Germany,France,483702300.0,299
45,South Korea,Japan,1531333000.0,279
89,Australia,Singapore,98600000.0,273
59,Canada,Mexico,1690000000.0,261
37,China,Spain,931000000.0,250


In [199]:
# Get the yearly sum of the sales per route
option_1_df_imports = option_1_df_imports.drop(columns='direction')
option_1_df_imports = option_1_df_imports.groupby(['origin', 'destination', 'year'], sort = False).agg(
    total_value = pd.NamedAgg(column = 'total_value', aggfunc = 'sum'),
    total_sales = pd.NamedAgg(column = 'total_value', aggfunc = 'count'))

# Get the mean total_value per route
option_1_df_imports = option_1_df_imports.reset_index().drop(columns='year')
option_1_df_imports = option_1_df_imports.groupby(['origin', 'destination'], sort = False).agg(
    avg_total_value = pd.NamedAgg(column = 'total_value', aggfunc = 'mean'),
    total_sales = pd.NamedAgg(column = 'total_sales', aggfunc = 'sum'))
option_1_df_imports = option_1_df_imports.reset_index()

In [202]:
option_1_df_imports.sort_values(by = 'avg_total_value', ascending = False)[:10]

Unnamed: 0,origin,destination,avg_total_value,total_sales
12,China,United Arab Emirates,1119000000.0,114
13,Japan,United Arab Emirates,1119000000.0,76
17,Singapore,Thailand,1004250000.0,273
18,China,Thailand,957750000.0,200
42,Australia,Japan,920000000.0,102
9,Germany,Mexico,902000000.0,70
14,South Korea,United Arab Emirates,856500000.0,51
27,Russia,India,851000000.0,42
4,USA,Germany,810000000.0,58
0,Mexico,USA,788333300.0,122


In [203]:
option_1_df_imports.sort_values(by = 'total_sales', ascending = False)[:10]

Unnamed: 0,origin,destination,avg_total_value,total_sales
17,Singapore,Thailand,1004250000.0,273
39,Germany,China,332000000.0,233
23,China,Japan,647400000.0,210
10,Japan,Mexico,783600000.0,206
18,China,Thailand,957750000.0,200
16,Malaysia,Thailand,696400000.0,195
5,Spain,Germany,681333300.0,142
0,Mexico,USA,788333300.0,122
12,China,United Arab Emirates,1119000000.0,114
40,Brazil,China,206500000.0,113


In [209]:
# We need to normalize using feature scaling the avg_total_value and total_sales, 
# add them together by using a weighted average and sort them in descending order

# Get the min and max values of each column 
max_avg_total_value = option_1_df_exports['avg_total_value'].max()
max_total_sales = option_1_df_exports['total_sales'].max()
min_avg_total_value = option_1_df_exports['avg_total_value'].min()
min_total_sales = option_1_df_exports['total_sales'].min()

# Use this statistics to normalize and create an index to sort the routes
scaled_avg_total_value = (option_1_df_exports['avg_total_value']-min_avg_total_value)/(max_avg_total_value-min_avg_total_value)
scaled_total_sales = (option_1_df_exports['total_sales']-min_total_sales)/(max_total_sales-min_total_sales)

# We perform an arithmetic mean to get the index
option_1_df_exports['index']=(scaled_avg_total_value + scaled_total_sales)/2

In [211]:
option_1_df_exports.sort_values(by = 'index', ascending = False)[:10]

Unnamed: 0,origin,destination,avg_total_value,total_sales,index
11,China,Mexico,4083333000.0,330,0.829592
46,South Korea,Vietnam,1719252000.0,497,0.71052
51,Netherlands,Belgium,809535500.0,437,0.537902
48,USA,Netherlands,258046800.0,436,0.469352
59,Canada,Mexico,1690000000.0,261,0.466122
45,South Korea,Japan,1531333000.0,279,0.465061
58,France,Belgium,1846023000.0,223,0.446451
26,USA,Canada,2412000000.0,136,0.426979
41,Japan,Brazil,842038800.0,306,0.408208
6,Germany,Italy,2270500000.0,130,0.40353
