# Table of Contents

1. [Importing Libraries](#1.-Importing-Libraries)
2. [Getting the data](#2.-Getting-the-data)
3. [Config and Functions](#3.-Config-and-Functions) \
    3.1 [Config](#3.1-Config) \
    3.2 [Functions](#3.2-Functions)
4. [Visualization](#4.-Visualization) \
    4.1 [Largest producers / consumer per commodity](#Largest-producers-/-consumer-per-commodity) \
    4.2 [Balance between production and consumption in respective origin countries]() \
    4.3 [Stock level balance throughout the years]()

# 1. Importing Libraries

In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

# 2. Getting the data

In [2]:
# commodities_link = 'https://apps.fas.usda.gov/psdonline/downloads/psd_alldata_csv.zip'
commodities_file = 'data/psd_alldata_csv.zip'

all_commodities = pd.read_csv(commodities_file, 
                              compression='zip',
                              keep_default_na=False)

In [3]:
commodities_to_explore = \
    ['Grapefruit, Fresh',
     'Lemons/Limes, Fresh',
     'Oranges, Fresh',
     'Tangerines/Mandarins, Fresh',
     'Coffee, Green']

commodities = (all_commodities
                   [all_commodities.Commodity_Description.isin(commodities_to_explore)]
                   .copy()
                   .reset_index())

# 3. Config and Functions 

## 3.1 Config

In [5]:
# dataframe style
heading_properties = [('font-size', '16px')]
cell_properties = [('font-size', '15px')]
dfstyle = [dict(selector="th", props=heading_properties),
           dict(selector="td", props=cell_properties)]


## 3.2 Functions

In [55]:
# manipulation

def filter_by_values(df: pd.DataFrame, column: str, values: list):
    
    return df[df[column].isin(values)].copy()


def groupby_and_sum_values(df: pd.DataFrame, group: list):
    
    return df.groupby(group).sum('Value')[['Value']]


def get_top_countries_per_commodity(df_attr: pd.DataFrame, column_title: str):

    commodities_sum_value_per_country = \
        groupby_and_sum_values(df_attr, ['Commodity_Description', 'Country_Name'])

    commodities_max_values = \
        commodities_sum_value_per_country.groupby(['Commodity_Description']).max()#idxmax()

    return (commodities_max_values
                .merge(commodities_sum_value_per_country.reset_index())
                .rename(columns={'Commodity_Description': 'Commodity',
                                 'Country_Name': column_title})
                .sort_values(column_title))

In [45]:
#visualization

def stylize_df_top_country_per_commodity(df: pd.DataFrame, highlight_columns: list):
    
    return (df#.drop('Value', axis=1)
              .style
                  .set_table_styles(dfstyle)
                  .set_properties(**{'background-color': '#ffffb3', 
                                     'text-align': 'left'}, 
                                  subset=highlight_columns)
              .hide_index())

# 4. Visualization

## 4.1 Largest producers / consumer per commodity

##### Largest Producers per Commodity

In [56]:
commodities_production = \
    filter_by_values(df=commodities, 
                     column='Attribute_Description', 
                     values=['Production'])

# commodities_production.head()

- checking if each commodity has only one type of unit. In this way we can sum or compare countries more fairly:

In [57]:
commodities.groupby('Commodity_Description')['Unit_Description'].unique()

Commodity_Description
Coffee, Green                  [(1000 60 KG BAGS)]
Grapefruit, Fresh                      [(1000 MT)]
Lemons/Limes, Fresh                    [(1000 MT)]
Oranges, Fresh                         [(1000 MT)]
Tangerines/Mandarins, Fresh            [(1000 MT)]
Name: Unit_Description, dtype: object

In [58]:
largest_producers_per_commodity = \
    get_top_countries_per_commodity(commodities_production, 
                                    'Largest Producer')

# largest_producers_per_commodity

##### Largest Consumer per Commodity

In [62]:
commodities_consumption = \
    filter_by_values(df=commodities, 
                     column='Attribute_Description',
                     values=['Domestic Consumption', 'Fresh Dom. Consumption'])

# commodities_consumption.head()

In [60]:
largest_consumers_per_commodity = \
    get_top_countries_per_commodity(commodities_consumption, 
                                    'Largest Consumer')

# largest_consumers_per_commodity

##### Largest producers / consumer per commodity

In [61]:
largest_consumers_producers_per_commodity = \
    (largest_producers_per_commodity.drop('Value', axis=1)
         .merge(largest_consumers_per_commodity.drop('Value', axis=1), 
                on='Commodity'))

stylize_df_top_country_per_commodity(largest_consumers_producers_per_commodity,
                                     ['Largest Producer', 'Largest Consumer'])

Commodity,Largest Producer,Largest Consumer
"Coffee, Green",Brazil,European Union
"Oranges, Fresh",Brazil,Brazil
"Tangerines/Mandarins, Fresh",China,China
"Lemons/Limes, Fresh",Mexico,European Union
"Grapefruit, Fresh",United States,China


## 4.2  Balance between production and consumption in respective origin countries

## 4.3 Stock level balance throughout the years