# Trade of goods , US$, HS 1992, 01 Live animals

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = os.getcwd()                                         # get current working directory
warnings.simplefilter('ignore')

# Import Data

In [3]:
filepath = os.path.join('datasets', 'Trade of goods , US$, HS 1992, 01 Live animals.csv')

df = pd.read_csv(filepath)

# Head and Tail

In [4]:
df

Unnamed: 0,Country or Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity Name,Quantity
0,Afghanistan,2018,"Horses, live pure-bred breeding",Import,15561458.59,,Number of items,28859.00
1,Afghanistan,2018,"Sheep, live",Export,3064.44,1.125916e+03,Number of items,14.98
2,Afghanistan,2018,"Fowls, live domestic > 185 grams",Import,16338543.11,2.190237e+07,Number of items,12297334.30
3,Afghanistan,2018,"Animals, live, except farm animals",Import,13186.60,,No Quantity,0.00
4,Afghanistan,2016,"Sheep, live",Export,6088.00,2.339000e+03,Number of items,51.00
...,...,...,...,...,...,...,...,...
62306,Zimbabwe,1995,"Fowls, live domestic > 185 grams",Export,444.00,0.000000e+00,Number of items,1920.00
62307,Zimbabwe,1995,"Poultry, live except domestic fowls, > 185 grams",Import,25541.00,0.000000e+00,Number of items,1690.00
62308,Zimbabwe,1995,"Poultry, live except domestic fowls, > 185 grams",Export,3482722.00,0.000000e+00,Number of items,1005516.00
62309,Zimbabwe,1995,"Animals, live, except farm animals",Import,111642.00,0.000000e+00,Number of items,2381.00


In [5]:
df.shape                      

(62311, 8)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62311 entries, 0 to 62310
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Country or Area  62311 non-null  object 
 1   Year             62311 non-null  int64  
 2   Commodity        62311 non-null  object 
 3   Flow             62311 non-null  object 
 4   Trade (USD)      62311 non-null  float64
 5   Weight (kg)      58567 non-null  float64
 6   Quantity Name    62311 non-null  object 
 7   Quantity         53164 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 3.8+ MB


In [7]:
df.rename(columns={'Country or Area': 'Country_or_Area'}, inplace=True)

df.head()

Unnamed: 0,Country_or_Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity Name,Quantity
0,Afghanistan,2018,"Horses, live pure-bred breeding",Import,15561458.59,,Number of items,28859.0
1,Afghanistan,2018,"Sheep, live",Export,3064.44,1125.916,Number of items,14.98
2,Afghanistan,2018,"Fowls, live domestic > 185 grams",Import,16338543.11,21902370.0,Number of items,12297334.3
3,Afghanistan,2018,"Animals, live, except farm animals",Import,13186.6,,No Quantity,0.0
4,Afghanistan,2016,"Sheep, live",Export,6088.0,2339.0,Number of items,51.0


In [8]:
df1 = df[['Country_or_Area', 'Year', 'Commodity', 'Flow', 'Trade (USD)',
       'Weight (kg)', 'Quantity']]

df1.head()

Unnamed: 0,Country_or_Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity
0,Afghanistan,2018,"Horses, live pure-bred breeding",Import,15561458.59,,28859.0
1,Afghanistan,2018,"Sheep, live",Export,3064.44,1125.916,14.98
2,Afghanistan,2018,"Fowls, live domestic > 185 grams",Import,16338543.11,21902370.0,12297334.3
3,Afghanistan,2018,"Animals, live, except farm animals",Import,13186.6,,0.0
4,Afghanistan,2016,"Sheep, live",Export,6088.0,2339.0,51.0


In [9]:
df1.describe(exclude='number')                 

Unnamed: 0,Country_or_Area,Commodity,Flow
count,62311,62311,62311
unique,207,15,4
top,Canada,"Animals, live, except farm animals",Import
freq,1046,7662,34773


In [10]:
df1[['Trade (USD)', 'Weight (kg)', 'Quantity']].describe()            # weight and quantity columns contain missing data

Unnamed: 0,Trade (USD),Weight (kg),Quantity
count,62311.0,58567.0,53164.0
mean,13241960.0,11832900.0,273606900.0
std,73023690.0,742885800.0,19004310000.0
min,0.93,0.0,0.0
25%,18000.0,1103.5,100.0
50%,231788.0,21376.22,3761.0
75%,2690106.0,330383.0,114299.5
max,2532308000.0,120104200000.0,2833182000000.0


# Missing Values Exploration

In [11]:
df1.isna().any()

Country_or_Area    False
Year               False
Commodity          False
Flow               False
Trade (USD)        False
Weight (kg)         True
Quantity            True
dtype: bool

In [12]:
df1.isna().sum()                                                      

Country_or_Area       0
Year                  0
Commodity             0
Flow                  0
Trade (USD)           0
Weight (kg)        3744
Quantity           9147
dtype: int64

In [13]:
df1['Weight (kg)'].isna().value_counts()                                  

False    58567
True      3744
Name: Weight (kg), dtype: int64

In [14]:
df1['Weight (kg)'].isna().value_counts(True)                            

False    0.939914
True     0.060086
Name: Weight (kg), dtype: float64

In [15]:
wt_idx = df1.index[df1['Weight (kg)'].isna()]                            

df1.loc[wt_idx].head()

Unnamed: 0,Country_or_Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity
0,Afghanistan,2018,"Horses, live pure-bred breeding",Import,15561458.59,,28859.0
3,Afghanistan,2018,"Animals, live, except farm animals",Import,13186.6,,0.0
126,Albania,2007,"Horses, live pure-bred breeding",Import,97402.0,,
127,Albania,2007,"Horses, live except pure-bred breeding",Import,8753.0,,
131,Albania,2007,"Swine, live except pure-bred breeding < 50 kg",Import,103493.0,,


In [16]:
df1['Quantity'].isna().value_counts()

False    53164
True      9147
Name: Quantity, dtype: int64

In [17]:
qty_idx = df1.index[df1['Quantity'].isna()]

df1.loc[qty_idx].head()

Unnamed: 0,Country_or_Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity
43,Albania,2014,"Animals, live, except farm animals",Import,17544.0,4948.0,
56,Albania,2013,"Animals, live, except farm animals",Import,816312.0,108525.0,
57,Albania,2013,"Animals, live, except farm animals",Export,846588.0,99329.0,
70,Albania,2012,"Animals, live, except farm animals",Import,1401366.0,208478.0,
71,Albania,2012,"Animals, live, except farm animals",Export,895347.0,119590.0,


In [18]:
df2 = df1.fillna(0)                          # fill missing values with 0        
df2.head()     

Unnamed: 0,Country_or_Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity
0,Afghanistan,2018,"Horses, live pure-bred breeding",Import,15561458.59,0.0,28859.0
1,Afghanistan,2018,"Sheep, live",Export,3064.44,1125.916,14.98
2,Afghanistan,2018,"Fowls, live domestic > 185 grams",Import,16338543.11,21902370.0,12297334.3
3,Afghanistan,2018,"Animals, live, except farm animals",Import,13186.6,0.0,0.0
4,Afghanistan,2016,"Sheep, live",Export,6088.0,2339.0,51.0


In [19]:
df2.isna().sum()                               # no missing values

Country_or_Area    0
Year               0
Commodity          0
Flow               0
Trade (USD)        0
Weight (kg)        0
Quantity           0
dtype: int64