# Inicio de código

In [1]:
import pandas as pd

In [2]:
pd.options.display.float_format = '${:,.2f}'.format

In [3]:
df = pd.read_csv('/Users/danielmartinez/Desktop/EMTECH/Proyecto_final_2/synergy_logistics_database.csv')
df

Unnamed: 0,register_id,direction,origin,destination,year,date,product,transport_mode,company_name,total_value
0,1,Exports,Japan,China,2015,31/01/15,Cars,Sea,Honda,33000000
1,2,Exports,Japan,China,2015,01/02/15,Cars,Sea,Honda,16000000
2,3,Exports,Japan,China,2015,02/02/15,Cars,Sea,Honda,29000000
3,4,Exports,Japan,China,2015,03/02/15,Cars,Sea,Honda,14000000
4,5,Exports,Japan,China,2015,04/02/15,Cars,Sea,Honda,17000000
...,...,...,...,...,...,...,...,...,...,...
19051,19052,Imports,Japan,Singapore,2020,27/06/20,Gas turbines,Sea,Union Energy Co,1000000
19052,19053,Imports,Malaysia,Singapore,2020,28/06/20,Gas turbines,Sea,Union Energy Co,2000000
19053,19054,Imports,Malaysia,Singapore,2020,29/06/20,Gas turbines,Sea,Union Energy Co,33000000
19054,19055,Imports,Malaysia,Singapore,2020,30/06/20,Gas turbines,Sea,Union Energy Co,13000000


# Cambio de tipos de variable

In [4]:
df.dtypes

register_id        int64
direction         object
origin            object
destination       object
year               int64
date              object
product           object
transport_mode    object
company_name      object
total_value        int64
dtype: object

In [5]:
df['total_value'] = df['total_value'].astype('float')

In [6]:
df[['direction', 'origin', 'destination', 'product', 'transport_mode', 'company_name']] = df[['direction', 'origin', 'destination', 'product', 'transport_mode', 'company_name']].astype('category')

In [7]:
df['year'] = pd.to_datetime(
    df['year'],
    errors='coerce',
    format = '%m/%d/%Y %H:%M:%S %p'
)

In [8]:
df['date'] = pd.to_datetime(
    df['date'],
    errors='coerce',
    format = '%m/%d/%Y %H:%M:%S %p'
)

In [9]:
df.dtypes

register_id                int64
direction               category
origin                  category
destination             category
year              datetime64[ns]
date              datetime64[ns]
product                 category
transport_mode          category
company_name            category
total_value              float64
dtype: object

# Resolución de problemas

## Opción 1) Rutas de importación y exportación. 

* Synergy logistics está considerando la posibilidad de enfocar sus esfuerzos en las 10 rutas más demandadas. Acorde a los flujos de importación y exportación, ¿cuáles son esas 10 rutas? 

* ¿le conviene implementar esa estrategia? ¿porqué?

In [10]:
df_grouped = df.groupby(['origin', 'destination']).count()
df_grouped_sorted = df_grouped.sort_values('product', ascending=False)
df_grouped_sorted['register_id'].head(10)

origin       destination
South Korea  Vietnam        497
Netherlands  Belgium        437
USA          Netherlands    436
Japan        Mexico         385
China        Mexico         351
             Japan          343
Germany      China          328
Japan        Brazil         306
Germany      France         299
South Korea  Japan          294
Name: register_id, dtype: int64

In [11]:
df_grouped = df.groupby(['origin', 'destination']).sum()
df_grouped_sorted = df_grouped.sort_values('total_value', ascending=False)
df_grouped_sorted['total_value'].head(10)


origin       destination   
China        Mexico           $12,494,000,000.00
Canada       Mexico            $8,450,000,000.00
South Korea  Vietnam           $6,877,007,000.00
China        Japan             $5,891,000,000.00
Japan        Mexico            $5,829,000,000.00
France       Belgium           $5,538,069,000.00
             United Kingdom    $5,427,000,000.00
China        South Korea       $4,790,000,000.00
South Korea  Japan             $4,741,000,000.00
USA          Mexico            $4,710,000,000.00
Name: total_value, dtype: float64

## Opción 2) Medio de transporte utilizado. 
+ ¿Cuáles son los 3 medios de transporte más importantes para Synergy logistics considerando el valor de las importaciones y exportaciones? 
+ ¿Cuál es medio de transporte que podrían reducir?

In [12]:
df.groupby(['transport_mode']).sum().sort_values('total_value', ascending=False)

Unnamed: 0_level_0,register_id,total_value
transport_mode,Unnamed: 1_level_1,Unnamed: 2_level_1
Sea,107111433,"$100,530,622,000.00"
Rail,32521472,"$43,628,043,000.00"
Air,18686086,"$38,262,147,000.00"
Road,23256105,"$33,270,486,000.00"


## Opción 3) Valor total de importaciones y exportaciones. 

Si Synergy Logistics quisiera enfocarse en los países que le generan el 80% del valor de las exportaciones e importaciones 
* ¿en qué grupo de países debería enfocar sus esfuerzos?

In [13]:
df_countries = df.groupby(['origin', 'destination', 'direction']).sum()
df_countries_sorted = df_countries.sort_values('total_value', ascending=False)

In [14]:
df_countries_sorted['cumsum'] = df_countries_sorted['total_value'].cumsum()

In [15]:
df_countries_sorted['percentage'] = df_countries_sorted['cumsum'] / df_countries_sorted['total_value'].sum() * 100

In [16]:
pd.options.display.float_format = '{:,.2f}%'.format
df_80 = df_countries_sorted[['percentage', 'total_value']].head(62).reset_index()

In [17]:
df_80

Unnamed: 0,origin,destination,direction,percentage,total_value
0,China,Mexico,Exports,5.68%,"12,250,000,000.00%"
1,Canada,Mexico,Exports,9.60%,"8,450,000,000.00%"
2,South Korea,Vietnam,Exports,12.79%,"6,877,007,000.00%"
3,France,Belgium,Exports,15.35%,"5,538,069,000.00%"
4,France,United Kingdom,Exports,17.87%,"5,427,000,000.00%"
...,...,...,...,...,...
57,USA,India,Imports,78.29%,"1,133,000,000.00%"
58,Spain,Russia,Exports,78.79%,"1,085,000,000.00%"
59,India,United Arab Emirates,Exports,79.27%,"1,037,000,000.00%"
60,USA,Netherlands,Exports,79.75%,"1,032,187,000.00%"


In [18]:
df_80[['origin','destination']]

Unnamed: 0,origin,destination
0,China,Mexico
1,Canada,Mexico
2,South Korea,Vietnam
3,France,Belgium
4,France,United Kingdom
...,...,...
57,USA,India
58,Spain,Russia
59,India,United Arab Emirates
60,USA,Netherlands


In [19]:
df_80_destination = df_80[['destination']].value_counts().to_frame(name='count_destination').reset_index()
df_80_destination.rename(columns={'destination':'countries'}, inplace=True)

Unnamed: 0,countries,count_destination
0,Mexico,7
1,Germany,6
2,USA,5
3,China,5
4,Japan,4
5,United Arab Emirates,4
6,Thailand,4
7,Belgium,3
8,Spain,3
9,Argentina,2


In [20]:
df_80_origin = df_80[['origin']].value_counts().to_frame(name='count_origin').reset_index()
df_80_origin.rename(columns={'origin':'countries'}, inplace=True)

Unnamed: 0,countries,count_origin
0,China,11
1,USA,10
2,Japan,7
3,South Korea,6
4,France,6
5,Russia,5
6,Germany,5
7,Spain,2
8,Mexico,2
9,Singapore,1


In [21]:
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
df_top_contries = pd.merge(df_80_origin, df_80_destination, how='outer', on='countries')

In [23]:
df_top_contries['count_origin'] = df_top_contries['count_origin'].cumsum() / df_top_contries['count_origin'].sum() *100
df_top_contries['count_destination'] = df_top_contries['count_destination'].cumsum() / df_top_contries['count_destination'].sum() *100
df_top_contries.head(19)

Unnamed: 0,countries,count_origin,count_destination
0,China,17.74,8.06
1,USA,33.87,16.13
2,Japan,45.16,22.58
3,South Korea,54.84,25.81
4,France,64.52,27.42
5,Russia,72.58,29.03
6,Germany,80.65,38.71
7,Spain,83.87,43.55
8,Mexico,87.1,54.84
9,Singapore,88.71,58.06



A partir del análisis de las opciones anteriores, ¿Cuál opción u opciones es conveniente implementar como base en la estrategia?
Justifica a la dirección tu recomendación en forma de un reporte respaldado por datos y análisis en una extensión máxima de 1.5 cuartillas.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=26d139f0-46cd-48ab-bde1-cce8316500a3' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>

In [None]:
try:   
    !jupyter nbconvert --to python file_name.ipynb
    # Python se convierte a .py, el script se convierte a .html
         # file_name.ipynb es el nombre del archivo del módulo actual
except:
    pass
