In [1]:
import pandas as pd

## Upload the data as a data frame

In [7]:
synergy_logistics_info_df = pd.read_csv('synergy_logistics_database.csv', usecols = lambda x: x != 'register_id')
synergy_logistics_info_df

Unnamed: 0,direction,origin,destination,year,date,product,transport_mode,company_name,total_value
0,Exports,Japan,China,2015,31/01/15,Cars,Sea,Honda,33000000
1,Exports,Japan,China,2015,01/02/15,Cars,Sea,Honda,16000000
2,Exports,Japan,China,2015,02/02/15,Cars,Sea,Honda,29000000
3,Exports,Japan,China,2015,03/02/15,Cars,Sea,Honda,14000000
4,Exports,Japan,China,2015,04/02/15,Cars,Sea,Honda,17000000
...,...,...,...,...,...,...,...,...,...
19051,Imports,Japan,Singapore,2020,27/06/20,Gas turbines,Sea,Union Energy Co,1000000
19052,Imports,Malaysia,Singapore,2020,28/06/20,Gas turbines,Sea,Union Energy Co,2000000
19053,Imports,Malaysia,Singapore,2020,29/06/20,Gas turbines,Sea,Union Energy Co,33000000
19054,Imports,Malaysia,Singapore,2020,30/06/20,Gas turbines,Sea,Union Energy Co,13000000


# Option 1 .- Import and Export routes
## Obtain the 10 most used routes

In [8]:
option_1_df =  synergy_logistics_info_df.copy()
option_1_df.drop(columns = ['product', 'transport_mode', 'date', 'company_name'], inplace=True)

# divide in two df, one for imports and the other for exports
option_1_df_exports = option_1_df[option_1_df['direction'] == 'Exports']
option_1_df_imports = option_1_df[option_1_df['direction'] == 'Imports']

def get_routes(dataframe):
    '''

    '''
    # Get the yearly sum of the sales per route
    dataframe = dataframe.drop(columns='direction')
    dataframe = dataframe.groupby(['origin', 'destination', 'year'], sort=False).agg(
        total_value=pd.NamedAgg(column='total_value', aggfunc='sum'),
        total_sales=pd.NamedAgg(column='total_value', aggfunc='count'))

    # Get the mean total_value per route
    dataframe = dataframe.reset_index().drop(columns='year')
    dataframe = dataframe.groupby(['origin', 'destination'], sort=False).agg(
        avg_total_value=pd.NamedAgg(column='total_value', aggfunc='mean'),
        total_sales=pd.NamedAgg(column='total_sales', aggfunc='sum'))
    dataframe = dataframe.reset_index()

    return dataframe

def sort_routes_by_index(dataframe):
    """

    """
    # We need to normalize using feature scaling the avg_total_value and total_sales,
    # add them together by using a weighted average and sort them in descending order

    # Get the min and max values of each column
    max_avg_total_value = dataframe['avg_total_value'].max()
    max_total_sales = dataframe['total_sales'].max()
    min_avg_total_value = dataframe['avg_total_value'].min()
    min_total_sales = dataframe['total_sales'].min()

    # Use this statistics to normalize and create an index to sort the routes
    scaled_avg_total_value = (dataframe['avg_total_value'] - min_avg_total_value) / (
                max_avg_total_value - min_avg_total_value)
    scaled_total_sales = (dataframe['total_sales'] - min_total_sales) / (max_total_sales - min_total_sales)

    # We perform an arithmetic mean to get the index
    dataframe['index'] = (scaled_avg_total_value + scaled_total_sales) / 2

    dataframe = dataframe.sort_values(by='index', ascending=False)

    return dataframe

In [9]:
option_1_df_exports = get_routes(option_1_df_exports)
option_1_df_imports = get_routes(option_1_df_imports)

option_1_df_exports = sort_routes_by_index(option_1_df_exports)
option_1_df_imports = sort_routes_by_index(option_1_df_imports)

option_1_df_exports = sort_routes_by_index(option_1_df_exports).reset_index(drop=True)
option_1_df_imports = sort_routes_by_index(option_1_df_imports).reset_index(drop=True)

option_1_df_exports.index = option_1_df_exports.index + 1
option_1_df_imports.index = option_1_df_imports.index + 1

print(option_1_df_exports[:10])
print('\n')
print(option_1_df_imports[:10])

         origin  destination  avg_total_value  total_sales     index
1         China       Mexico     4.083333e+09          330  0.829592
2   South Korea      Vietnam     1.719252e+09          497  0.710520
3   Netherlands      Belgium     8.095355e+08          437  0.537902
4           USA  Netherlands     2.580468e+08          436  0.469352
5        Canada       Mexico     1.690000e+09          261  0.466122
6   South Korea        Japan     1.531333e+09          279  0.465061
7        France      Belgium     1.846023e+09          223  0.446451
8           USA       Canada     2.412000e+09          136  0.426979
9         Japan       Brazil     8.420388e+08          306  0.408208
10      Germany        Italy     2.270500e+09          130  0.403530


       origin           destination  avg_total_value  total_sales     index
1   Singapore              Thailand     1.004250e+09          273  0.944990
2       China              Thailand     9.577500e+08          200  0.787011
3       Jap

In [209]:
# We need to normalize using feature scaling the avg_total_value and total_sales, 
# add them together by using a weighted average and sort them in descending order

# Get the min and max values of each column 
max_avg_total_value = option_1_df_exports['avg_total_value'].max()
max_total_sales = option_1_df_exports['total_sales'].max()
min_avg_total_value = option_1_df_exports['avg_total_value'].min()
min_total_sales = option_1_df_exports['total_sales'].min()

# Use this statistics to normalize and create an index to sort the routes
scaled_avg_total_value = (option_1_df_exports['avg_total_value']-min_avg_total_value)/(max_avg_total_value-min_avg_total_value)
scaled_total_sales = (option_1_df_exports['total_sales']-min_total_sales)/(max_total_sales-min_total_sales)

# We perform an arithmetic mean to get the index
option_1_df_exports['index']=(scaled_avg_total_value + scaled_total_sales)/2

In [211]:
option_1_df_exports.sort_values(by = 'index', ascending = False)[:10]

Unnamed: 0,origin,destination,avg_total_value,total_sales,index
11,China,Mexico,4083333000.0,330,0.829592
46,South Korea,Vietnam,1719252000.0,497,0.71052
51,Netherlands,Belgium,809535500.0,437,0.537902
48,USA,Netherlands,258046800.0,436,0.469352
59,Canada,Mexico,1690000000.0,261,0.466122
45,South Korea,Japan,1531333000.0,279,0.465061
58,France,Belgium,1846023000.0,223,0.446451
26,USA,Canada,2412000000.0,136,0.426979
41,Japan,Brazil,842038800.0,306,0.408208
6,Germany,Italy,2270500000.0,130,0.40353
