# Fish, fish fillets, other fish meat and fish livers and roes, frozen

http://data.un.org/Data.aspx?d=ICS&f=cmID%3a21220-0

The United Nations Industrial Commodity Statistics Database provides annual statistics on the production of major industrial commodities by country. Data are provided in terms of physical quantities as well as monetary value. The online database covers the years 1995 to 2016

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

path = os.getcwd()                                  # get current working directory

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12                                       
warnings.simplefilter('ignore')

# Import Data

In [3]:
filepath = os.path.join(path, 'datasets', 'Fish, fish fillets, other fish meat and fish livers and roes, frozen.csv')

df = pd.read_csv(filepath)
df

Unnamed: 0,Country or Area,Year,Unit,Value,Value Footnotes
0,Albania,2003,Thousand metric tons,0.000,
1,Albania,2002,Thousand metric tons,0.000,
2,Albania,2001,Thousand metric tons,0.200,12
3,Albania,2000,Thousand metric tons,0.206,12
4,Albania,1999,Thousand metric tons,0.450,12
...,...,...,...,...,...
2145,12,"Including salted, dried or smoked fish.",,,
2146,13,Excluding national product list code 10.20.13.60.,,,
2147,14,Excluding Prodcom 2002 code 15.20.12.50.,,,
2148,15,Excluding Prodcom 2002 code 15.20.12.30.,,,


# Head and Tail

In [4]:
df = df[:2133]                                   # select index position 0-2133
df

Unnamed: 0,Country or Area,Year,Unit,Value,Value Footnotes
0,Albania,2003,Thousand metric tons,0.000,
1,Albania,2002,Thousand metric tons,0.000,
2,Albania,2001,Thousand metric tons,0.200,12
3,Albania,2000,Thousand metric tons,0.206,12
4,Albania,1999,Thousand metric tons,0.450,12
...,...,...,...,...,...
2128,Zimbabwe,1999,Thousand metric tons,1.250,12
2129,Zimbabwe,1998,Thousand metric tons,1.600,12
2130,Zimbabwe,1997,Thousand metric tons,1.800,12
2131,Zimbabwe,1996,Thousand metric tons,1.600,12


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2133 entries, 0 to 2132
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Country or Area  2133 non-null   object 
 1   Year             2133 non-null   object 
 2   Unit             2133 non-null   object 
 3   Value            2133 non-null   float64
 4   Value Footnotes  784 non-null    object 
dtypes: float64(1), object(4)
memory usage: 83.4+ KB


In [6]:
df.rename(columns={'Country or Area': 'country_or_area'}, inplace=True) 

df.columns = [col.lower() for col in df.columns]
df.head()

Unnamed: 0,country_or_area,year,unit,value,value footnotes
0,Albania,2003,Thousand metric tons,0.0,
1,Albania,2002,Thousand metric tons,0.0,
2,Albania,2001,Thousand metric tons,0.2,12.0
3,Albania,2000,Thousand metric tons,0.206,12.0
4,Albania,1999,Thousand metric tons,0.45,12.0


In [7]:
df_sub = df[['country_or_area', 'year', 'unit', 'value']]
df_sub.head()

Unnamed: 0,country_or_area,year,unit,value
0,Albania,2003,Thousand metric tons,0.0
1,Albania,2002,Thousand metric tons,0.0
2,Albania,2001,Thousand metric tons,0.2
3,Albania,2000,Thousand metric tons,0.206
4,Albania,1999,Thousand metric tons,0.45


In [8]:
df_sub['value'] = np.round(df_sub['value'], 2)
df_sub

Unnamed: 0,country_or_area,year,unit,value
0,Albania,2003,Thousand metric tons,0.00
1,Albania,2002,Thousand metric tons,0.00
2,Albania,2001,Thousand metric tons,0.20
3,Albania,2000,Thousand metric tons,0.21
4,Albania,1999,Thousand metric tons,0.45
...,...,...,...,...
2128,Zimbabwe,1999,Thousand metric tons,1.25
2129,Zimbabwe,1998,Thousand metric tons,1.60
2130,Zimbabwe,1997,Thousand metric tons,1.80
2131,Zimbabwe,1996,Thousand metric tons,1.60


# Descriptive Statistics

In [9]:
df_sub['value'].describe()

count    2133.000000
mean      174.956803
std       580.866022
min         0.000000
25%         1.730000
50%        15.460000
75%        68.400000
max      5675.670000
Name: value, dtype: float64

# Dataframe Grouping

In [10]:
cnt_area_grp = df_sub.groupby('country_or_area')
cnt_area_grp.head()

Unnamed: 0,country_or_area,year,unit,value
0,Albania,2003,Thousand metric tons,0.00
1,Albania,2002,Thousand metric tons,0.00
2,Albania,2001,Thousand metric tons,0.20
3,Albania,2000,Thousand metric tons,0.21
4,Albania,1999,Thousand metric tons,0.45
...,...,...,...,...
2124,Zimbabwe,2003,Thousand metric tons,1.30
2125,Zimbabwe,2002,Thousand metric tons,1.30
2126,Zimbabwe,2001,Thousand metric tons,1.30
2127,Zimbabwe,2000,Thousand metric tons,1.30


In [11]:
cnt_area_grp_stats = cnt_area_grp.agg(
    min = ('value', 'min'),
    avg = ('value', 'mean'),
    total = ('value', 'max')
)

cnt_area_grp_stats

Unnamed: 0_level_0,min,avg,total
country_or_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Albania,0.00,0.324444,0.68
Algeria,0.00,0.000000,0.00
Angola,1.19,16.372273,56.20
Antigua and Barbuda,1.02,1.110000,1.20
Argentina,223.51,288.665556,373.83
...,...,...,...
Venezuela (Bolivarian Republic of),10.60,22.685556,41.51
Viet Nam,4.53,701.351667,1763.10
Yemen,2.00,2.600000,4.00
Zambia,0.00,0.000000,0.00


In [12]:
cnt_area_grp_stats['total'].sort_values(ascending=False).head(10)      # top 10 country/area with the highest commodities

country_or_area
China                       5675.67
United States of America    4947.00
Russian Federation          4028.66
Japan                       2518.11
Viet Nam                    1763.10
Chile                       1464.43
Indonesia                   1411.55
Norway                      1282.27
Iceland                     1068.74
Canada                       940.26
Name: total, dtype: float64

In [13]:
cnt_area_grp.get_group('China')

Unnamed: 0,country_or_area,year,unit,value
337,China,2003,Thousand metric tons,5675.67
338,China,2002,Thousand metric tons,5167.42
339,China,2001,Thousand metric tons,4434.19
340,China,2000,Thousand metric tons,4035.12
341,China,1999,Thousand metric tons,3733.63
342,China,1998,Thousand metric tons,3540.03
343,China,1997,Thousand metric tons,3380.23
344,China,1996,Thousand metric tons,3345.87
345,China,1995,Thousand metric tons,3205.31


In [14]:
cnt_area_grp.get_group('United States of America').head()

Unnamed: 0,country_or_area,year,unit,value
2025,United States of America,2005,Mil. USD,4947.0
2026,United States of America,2005,Thousand metric tons,1404.44
2027,United States of America,2004,Mil. USD,4142.0
2028,United States of America,2004,Thousand metric tons,1187.04
2029,United States of America,2003,Mil. USD,4589.0


In [15]:
cnt_area_grp.get_group('Russian Federation').head()

Unnamed: 0,country_or_area,year,unit,value
1569,Russian Federation,2016,Thousand metric tons,4028.66
1570,Russian Federation,2015,Thousand metric tons,3829.43
1571,Russian Federation,2014,Thousand metric tons,3725.38
1572,Russian Federation,2013,Thousand metric tons,3788.76
1573,Russian Federation,2012,Thousand metric tons,3689.06


In [16]:
cnt_area_grp.get_group('Japan').head()

Unnamed: 0,country_or_area,year,unit,value
922,Japan,2016,Thousand metric tons,1401.66
923,Japan,2015,Thousand metric tons,1416.23
924,Japan,2014,Thousand metric tons,1485.41
925,Japan,2013,Thousand metric tons,1382.6
926,Japan,2012,Thousand metric tons,1257.11


In [17]:
cnt_area_grp.get_group('Viet Nam').head()

Unnamed: 0,country_or_area,year,unit,value
2088,Viet Nam,2016,Mil. USD,6.31
2089,Viet Nam,2016,Thousand metric tons,1763.1
2090,Viet Nam,2015,Mil. USD,4.53
2091,Viet Nam,2015,Thousand metric tons,1666.0
2092,Viet Nam,2014,Thousand metric tons,1586.67


In [18]:
cnt_area_grp.get_group('Chile').head()

Unnamed: 0,country_or_area,year,unit,value
304,Chile,2016,Mil. USD,1008.96
305,Chile,2016,Thousand metric tons,210.78
306,Chile,2015,Mil. USD,769.19
307,Chile,2015,Thousand metric tons,123.14
308,Chile,2014,Mil. USD,176.84


In [19]:
cnt_area_grp.get_group('Indonesia').head()

Unnamed: 0,country_or_area,year,unit,value
874,Indonesia,2012,Mil. USD,257.76
875,Indonesia,2003,Thousand metric tons,555.12
876,Indonesia,2002,Thousand metric tons,1411.55
877,Indonesia,2001,Thousand metric tons,1051.67
878,Indonesia,2000,Thousand metric tons,418.12


In [20]:
cnt_area_grp.get_group('Norway').head()

Unnamed: 0,country_or_area,year,unit,value
1353,Norway,2006,Mil. USD,1191.42
1354,Norway,2006,Thousand metric tons,918.21
1355,Norway,2005,Mil. USD,1282.27
1356,Norway,2005,Thousand metric tons,1021.42
1357,Norway,2004,Mil. USD,1096.35


In [21]:
cnt_area_grp.get_group('Iceland').head()

Unnamed: 0,country_or_area,year,unit,value
822,Iceland,2016,Mil. USD,834.93
823,Iceland,2016,Thousand metric tons,256.51
824,Iceland,2015,Mil. USD,810.54
825,Iceland,2015,Thousand metric tons,272.17
826,Iceland,2014,Mil. USD,969.14


In [22]:
cnt_area_grp.get_group('Canada').head()

Unnamed: 0,country_or_area,year,unit,value
287,Canada,2014,Mil. USD,778.86
288,Canada,2013,Mil. USD,852.3
289,Canada,2012,Mil. USD,609.4
290,Canada,2011,Mil. USD,741.42
291,Canada,2010,Mil. USD,940.26
