<a href="https://colab.research.google.com/github/jmelendezgeo/Exploratory-analysis-/blob/main/ImportExportColombia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
# COLOMBIA EXPORTS
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/colombia_exports.csv'
colombia_exports = pd.read_csv(url, sep = '|')
# COLOMBIA IMPORTS
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/colombia_imports.csv'
colombia_imports = pd.read_csv(url, sep = '|')
# COUNTRY NAMES
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/country_names.csv'
country_names = pd.read_csv(url, sep = '|')
# GROUP SITC
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/groups_sitc_rev2.csv'
groups_sitc = pd.read_csv(url, sep= '|')
# PRODUCTS SITC
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/products_sitc_rev2.csv'
products_sitc = pd.read_csv(url, sep = '|')

# CURRENT GDP 
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/GDP_current.csv'
GDP_current = pd.read_csv(url, skiprows = 4)
# GDP GROWTH
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/GDP_growth.csv'
GDP_growth = pd.read_csv(url,skiprows = 4 )
# ELECTRICITY POWER ACCESS
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/elec_accs.csv'
elec_access = pd.read_csv (url, skiprows = 4)
# ELECTRICITY POWER USE 
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/elec_use.csv'
elec_use = pd.read_csv(url,skiprows = 4)
# POPULATION
url = 'https://raw.githubusercontent.com/jmelendezgeo/Data-Triathlon/main/population.csv'
population = pd.read_csv(url,skiprows=4)


In [None]:
def create_continent(acr):
  """This function receives the acronym of a country 
  and relates the first two letters with the respective continents. 
  Returns the name of the continent  """

  if acr[0:2] == 'af':
    return 'Africa'
  elif acr[0:2] == 'as':
    return 'Asia'
  elif acr[0:2] == 'eu':
    return 'Europe'
  elif acr[0:2] == 'na':
    return 'North America'
  elif acr[0:2] == 'oc':
    return 'Oceania'
  elif acr[0:2] == 'sa':
    return 'South America'
  else:
    return 'Other'

In [None]:
# We want to drop unnecessary columns 
columns_to_keep = ['year','origin','dest','sitc4','export_val']
colombia_exports = colombia_exports[columns_to_keep]
colombia_exports=colombia_exports.rename(columns={'sitc4':'sitc'})
colombia_imports = colombia_imports[columns_to_keep]
colombia_imports = colombia_imports.rename(columns={'sitc4':'sitc', 'export_val':'import_val'})
country_names = country_names[['id','id_3char','name']]
groups_sitc = groups_sitc[['id','category']]
groups_sitc['id'] = groups_sitc['id'].astype(str).str.zfill(2) # group code has 2 characters
products_sitc = products_sitc[['id','sitc','name']]

In [None]:
# Joining information 
# -------------------------- EXPORTS DF-------------------------------------
exports_df = pd.merge(colombia_exports,country_names, how = 'inner', left_on = 'dest', right_on = 'id_3char').drop(columns='id_3char')
exports_df['to continent'] = exports_df['id'].apply(lambda x : create_continent(x))
exports_df = (exports_df
              .rename(columns={'name':'country name'})
              .drop(columns='id'))
exports_df = (pd.merge(exports_df,products_sitc[['sitc','name']], how='inner', on = 'sitc' )
                .rename(columns={'name':'product name'}))
exports_df['sitc'] = exports_df['sitc'].astype(str).str.zfill(4) # Now we need 4 characters in sitc
exports_df['sitc']=exports_df['sitc'].str.extract(r'(^\d{2})') # The first 2 characters are the group sitc code 
exports_df = (pd.merge(exports_df,groups_sitc,how='inner',left_on='sitc',right_on='id')
                .drop(columns=['sitc','id']))

# --------------------------- IMPORTS DF ---------------------------------------
imports_df = pd.merge(colombia_imports,country_names, how = 'inner', left_on = 'dest', right_on = 'id_3char').drop(columns='id_3char')
imports_df['from continent'] = imports_df['id'].apply(lambda x : create_continent(x))
imports_df = (imports_df
              .rename(columns={'name':'country name'})
              .drop(columns='id'))
imports_df = (pd.merge(imports_df,products_sitc[['sitc','name']], how='inner', on = 'sitc' )
                .rename(columns={'name':'product name'}))
imports_df['sitc'] = imports_df['sitc'].astype(str).str.zfill(4) # Now we need 4 characters in sitc
imports_df['sitc']=imports_df['sitc'].str.extract(r'(^\d{2})') # The first 2 characters are the group sitc code 
imports_df = (pd.merge(imports_df,groups_sitc,how='inner',left_on='sitc',right_on='id')
                .drop(columns=['sitc','id']))

#----------------------------- WORLD BANK -------------------------------------
columns_to_drop = ['Country Code', 'Indicator Name', 'Indicator Code']
population = population[population['Country Name'] == 'Colombia'].drop(columns=columns_to_drop)
GDP_current = GDP_current[GDP_current['Country Name'] == 'Colombia'].drop(columns=columns_to_drop).dropna(axis=1)
GDP_growth = GDP_growth[GDP_growth['Country Name'] == 'Colombia'].drop(columns=columns_to_drop).dropna(axis=1)
elec_access = elec_access[elec_access['Country Name'] == 'Colombia'].drop(columns=columns_to_drop).dropna(axis=1)
elec_use = elec_use[elec_use['Country Name'] == 'Colombia'].drop(columns=columns_to_drop).dropna(axis=1)


In [None]:
exports_df.groupby('to continent').count()['year'].sort_values(ascending=False)