In [1]:
# Libraries for data manipulation and visualization
import pandas as pd # for data manipulation 
import numpy as np # for data manipulation 
import seaborn as sns # for data visualization
import matplotlib.pyplot as plt # for data visualization

# Libraries for clustering
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer,SilhouetteVisualizer
from sklearn.metrics import silhouette_score

# Success metrics for the models
from sklearn.metrics import accuracy_score,precision_score,recall_score, confusion_matrix,f1_score
from sklearn.preprocessing import MinMaxScaler # to scale the numeric features 
from sklearn.model_selection import train_test_split,cross_val_score #splitting the dataset into test-train
from sklearn.model_selection import GridSearchCV #GridSearchCV parameter optimization

# Statistical libraries
import researchpy as rp #!pip install researchpy
from scipy.stats import shapiro,kruskal
import pylab

# Algorithms for using supervised learning methods
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from warnings import filterwarnings
filterwarnings('ignore')

Creating the Dataset from CSVs

Part 1: Animal Products Datasets

In [3]:
df_fish_seafood_production = pd.read_csv("fish-seafood-production.csv")
df_fish_seafood_production.head()

Unnamed: 0,Entity,Code,Year,Fish and seafood | 00002960 || Production | 005511 || tonnes
0,Afghanistan,AFG,1961,300.0
1,Afghanistan,AFG,1962,300.0
2,Afghanistan,AFG,1963,300.0
3,Afghanistan,AFG,1964,300.0
4,Afghanistan,AFG,1965,300.0


In [4]:
df_global_meat_production = pd.read_csv("global-meat-production-by-livestock-type.csv")
df_global_meat_production.head()

Unnamed: 0,Entity,Code,Year,"Meat, game | 00001163 || Production | 005510 || tonnes","Meat, duck | 00001069 || Production | 005510 || tonnes","Meat, horse | 00001097 || Production | 005510 || tonnes","Meat, camel | 00001127 || Production | 005510 || tonnes","Meat, goose and guinea fowl | 00001073 || Production | 005510 || tonnes","Meat, sheep and goat | 00001807 || Production | 005510 || tonnes","Meat, beef and buffalo | 00001806 || Production | 005510 || tonnes","Meat, pig | 00001035 || Production | 005510 || tonnes","Meat, poultry | 00001808 || Production | 005510 || tonnes"
0,Afghanistan,AFG,1961,4000.0,,,3600.0,,73220.0,43000.0,,5600.0
1,Afghanistan,AFG,1962,4000.0,,,4031.0,,72375.0,45800.0,,6000.0
2,Afghanistan,AFG,1963,5000.0,,,5031.0,,75530.0,47250.0,,6160.0
3,Afghanistan,AFG,1964,5000.0,,,4680.0,,79750.0,48000.0,,6400.0
4,Afghanistan,AFG,1965,6000.0,,,4320.0,,84375.0,48700.0,,6800.0


Part 2: Grain Products Datasets

In [5]:
df_barley_production = pd.read_csv("barley-production.csv")
df_barley_production.head()

Unnamed: 0,Entity,Code,Year,Barley | 00000044 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,378000
1,Afghanistan,AFG,1962,378000
2,Afghanistan,AFG,1963,378000
3,Afghanistan,AFG,1964,380000
4,Afghanistan,AFG,1965,380000


In [6]:
df_cereal_production = pd.read_csv("cereal-production.csv")
df_cereal_production.head()

Unnamed: 0,Entity,Code,Year,Cereals | 00001717 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,3695000
1,Afghanistan,AFG,1962,3696000
2,Afghanistan,AFG,1963,3378000
3,Afghanistan,AFG,1964,3732000
4,Afghanistan,AFG,1965,3785000


In [7]:
df_wheat_production = pd.read_csv("wheat-production.csv")
df_wheat_production.head()

Unnamed: 0,Entity,Code,Year,Wheat | 00000015 || Production | 005510 || tonnes
0,Afghanistan,AFG,1961,2279000
1,Afghanistan,AFG,1962,2279000
2,Afghanistan,AFG,1963,1947000
3,Afghanistan,AFG,1964,2230000
4,Afghanistan,AFG,1965,2282000


In [8]:
df_rye_production = pd.read_csv("rye-production.csv")
df_rye_production.head()

Unnamed: 0,Entity,Code,Year,Rye | 00000071 || Production | 005510 || tonnes
0,Africa,,1961,10290
1,Africa,,1962,9100
2,Africa,,1963,13800
3,Africa,,1964,16100
4,Africa,,1965,13900


Filtering Data for Ireland and Other Countries

Merging Grain Dataframes from Filtered Ireland Data

In [10]:
df_fish_seafood_production_ireland = df_fish_seafood_production[(df_fish_seafood_production.Code=='IRL') |
                                                                (df_fish_seafood_production.Code=='FRA') |
                                                                (df_fish_seafood_production.Code=='ITA') |
                                                                (df_fish_seafood_production.Code=='GBR') |
                                                                (df_fish_seafood_production.Code=='ESP') |
                                                                (df_fish_seafood_production.Code=='NLD') |
                                                                (df_fish_seafood_production.Code=='DEU') |
                                                                (df_fish_seafood_production.Code=='PRT') |
                                                                (df_fish_seafood_production.Code=='DNK')].reset_index(drop=True)

In [11]:
df_global_meat_production_ireland = df_global_meat_production[(df_global_meat_production.Code=='IRL') |
                                                              (df_global_meat_production.Code=='FRA') |
                                                              (df_global_meat_production.Code=='ITA') |
                                                              (df_global_meat_production.Code=='GBR') |
                                                              (df_global_meat_production.Code=='ESP') |
                                                              (df_global_meat_production.Code=='NLD') |
                                                              (df_global_meat_production.Code=='DEU') |
                                                              (df_global_meat_production.Code=='PRT') |
                                                              (df_global_meat_production.Code=='DNK')].reset_index(drop=True)
df_global_meat_production_ireland.head()

Unnamed: 0,Entity,Code,Year,"Meat, game | 00001163 || Production | 005510 || tonnes","Meat, duck | 00001069 || Production | 005510 || tonnes","Meat, horse | 00001097 || Production | 005510 || tonnes","Meat, camel | 00001127 || Production | 005510 || tonnes","Meat, goose and guinea fowl | 00001073 || Production | 005510 || tonnes","Meat, sheep and goat | 00001807 || Production | 005510 || tonnes","Meat, beef and buffalo | 00001806 || Production | 005510 || tonnes","Meat, pig | 00001035 || Production | 005510 || tonnes","Meat, poultry | 00001808 || Production | 005510 || tonnes"
0,Denmark,DNK,1961,3300.0,5000.0,3600.0,,600.0,1200.0,140500.0,614300.0,83350.0
1,Denmark,DNK,1962,3600.0,3200.0,2100.0,,600.0,1100.0,177800.0,632100.0,88990.0
2,Denmark,DNK,1963,3200.0,2000.0,2300.0,,400.0,1100.0,180100.0,645400.0,81220.0
3,Denmark,DNK,1964,4000.0,2400.0,2100.0,,400.0,1200.0,156500.0,688000.0,95150.0
4,Denmark,DNK,1965,4500.0,3000.0,1700.0,,600.0,1400.0,154400.0,744000.0,83800.0


In [12]:
fish_df = df_fish_seafood_production_ireland[["Fish and seafood | 00002960 || Production | 005511 || tonnes"]].reset_index(drop=True)
merged_animal_df = pd.concat([df_global_meat_production_ireland,fish_df],axis=1).reset_index(drop=True)
merged_animal_df.head()

Unnamed: 0,Entity,Code,Year,"Meat, game | 00001163 || Production | 005510 || tonnes","Meat, duck | 00001069 || Production | 005510 || tonnes","Meat, horse | 00001097 || Production | 005510 || tonnes","Meat, camel | 00001127 || Production | 005510 || tonnes","Meat, goose and guinea fowl | 00001073 || Production | 005510 || tonnes","Meat, sheep and goat | 00001807 || Production | 005510 || tonnes","Meat, beef and buffalo | 00001806 || Production | 005510 || tonnes","Meat, pig | 00001035 || Production | 005510 || tonnes","Meat, poultry | 00001808 || Production | 005510 || tonnes",Fish and seafood | 00002960 || Production | 005511 || tonnes
0,Denmark,DNK,1961,3300.0,5000.0,3600.0,,600.0,1200.0,140500.0,614300.0,83350.0,637490.0
1,Denmark,DNK,1962,3600.0,3200.0,2100.0,,600.0,1100.0,177800.0,632100.0,88990.0,785100.0
2,Denmark,DNK,1963,3200.0,2000.0,2300.0,,400.0,1100.0,180100.0,645400.0,81220.0,847900.0
3,Denmark,DNK,1964,4000.0,2400.0,2100.0,,400.0,1200.0,156500.0,688000.0,95150.0,871060.0
4,Denmark,DNK,1965,4500.0,3000.0,1700.0,,600.0,1400.0,154400.0,744000.0,83800.0,840830.0


In [13]:
merged_animal_df.Code.value_counts()

DNK    60
FRA    60
DEU    60
IRL    60
ITA    60
NLD    60
PRT    60
ESP    60
GBR    60
Name: Code, dtype: int64

Merging Grain Dataframes from Filtered Ireland Data

In [14]:
df_rye_production_ireland = df_rye_production[(df_rye_production.Code=='IRL') |
                                              (df_rye_production.Code=='FRA') |
                                              (df_rye_production.Code=='ITA') |
                                              (df_rye_production.Code=='GBR') |
                                              (df_rye_production.Code=='ESP') |
                                              (df_rye_production.Code=='NLD') |
                                              (df_rye_production.Code=='DEU') |
                                              (df_rye_production.Code=='PRT') |
                                              (df_rye_production.Code=='DNK')].reset_index(drop=True)
df_rye_production_ireland.head()

Unnamed: 0,Entity,Code,Year,Rye | 00000071 || Production | 005510 || tonnes
0,Denmark,DNK,1961,513876
1,Denmark,DNK,1962,512262
2,Denmark,DNK,1963,319871
3,Denmark,DNK,1964,291633
4,Denmark,DNK,1965,264340


In [15]:
df_barley_production_ireland = df_barley_production[(df_barley_production.Code=='IRL') |
                                                    (df_barley_production.Code=='FRA') |
                                                    (df_barley_production.Code=='ITA') |
                                                    (df_barley_production.Code=='GBR') |
                                                    (df_barley_production.Code=='ESP') |
                                                    (df_barley_production.Code=='NLD') |
                                                    (df_barley_production.Code=='DEU') |
                                                    (df_barley_production.Code=='PRT') |
                                                    (df_barley_production.Code=='DNK')].reset_index(drop=True)
df_barley_production_ireland.head()

Unnamed: 0,Entity,Code,Year,Barley | 00000044 || Production | 005510 || tonnes
0,Denmark,DNK,1961,2808149
1,Denmark,DNK,1962,3299515
2,Denmark,DNK,1963,3398857
3,Denmark,DNK,1964,3899590
4,Denmark,DNK,1965,4125804


In [16]:
df_cereal_production_ireland = df_cereal_production[(df_cereal_production.Code=='IRL') |
                                                    (df_cereal_production.Code=='FRA') |
                                                    (df_cereal_production.Code=='ITA') |
                                                    (df_cereal_production.Code=='GBR') |
                                                    (df_cereal_production.Code=='ESP') |
                                                    (df_cereal_production.Code=='NLD') |
                                                    (df_cereal_production.Code=='DEU') |
                                                    (df_cereal_production.Code=='PRT') |
                                                    (df_cereal_production.Code=='DNK')].reset_index(drop=True)
df_cereal_production_ireland.head()

Unnamed: 0,Entity,Code,Year,Cereals | 00001717 || Production | 005510 || tonnes
0,Denmark,DNK,1961,5198900
1,Denmark,DNK,1962,5783409
2,Denmark,DNK,1963,5502930
3,Denmark,DNK,1964,6213090
4,Denmark,DNK,1965,6213087


In [17]:
df_wheat_production_ireland = df_wheat_production[(df_wheat_production.Code=='IRL') |
                                                  (df_wheat_production.Code=='FRA') |
                                                  (df_wheat_production.Code=='ITA') |
                                                  (df_wheat_production.Code=='GBR') |
                                                  (df_wheat_production.Code=='ESP') |
                                                  (df_wheat_production.Code=='NLD') |
                                                  (df_wheat_production.Code=='DEU') |
                                                  (df_wheat_production.Code=='PRT') |
                                                  (df_wheat_production.Code=='DNK')].reset_index(drop=True)
df_wheat_production_ireland.head()

Unnamed: 0,Entity,Code,Year,Wheat | 00000015 || Production | 005510 || tonnes
0,Denmark,DNK,1961,434057
1,Denmark,DNK,1962,643992
2,Denmark,DNK,1963,494060
3,Denmark,DNK,1964,541454
4,Denmark,DNK,1965,563769


In [18]:
wheat_df = df_wheat_production_ireland[["Wheat | 00000015 || Production | 005510 || tonnes"]].reset_index(drop=True)
cereal_df = df_cereal_production_ireland[["Cereals | 00001717 || Production | 005510 || tonnes"]].reset_index(drop=True)
barley_df = df_barley_production_ireland[["Barley | 00000044 || Production | 005510 || tonnes"]].reset_index(drop=True)
merged_grain_df = pd.concat([df_rye_production_ireland,wheat_df,cereal_df,barley_df],axis=1).reset_index(drop=True)
merged_grain_df.head()

Unnamed: 0,Entity,Code,Year,Rye | 00000071 || Production | 005510 || tonnes,Wheat | 00000015 || Production | 005510 || tonnes,Cereals | 00001717 || Production | 005510 || tonnes,Barley | 00000044 || Production | 005510 || tonnes
0,Denmark,DNK,1961,513876,434057,5198900,2808149
1,Denmark,DNK,1962,512262,643992,5783409,3299515
2,Denmark,DNK,1963,319871,494060,5502930,3398857
3,Denmark,DNK,1964,291633,541454,6213090,3899590
4,Denmark,DNK,1965,264340,563769,6213087,4125804


In [19]:
merged_grain_df.Entity.value_counts()

Denmark           60
France            60
Germany           60
Ireland           60
Italy             60
Netherlands       60
Portugal          60
Spain             60
United Kingdom    60
Name: Entity, dtype: int64

Merging Grain and Animal Dataframes

In [20]:
grain_df_filtered = merged_grain_df[["Rye | 00000071 || Production | 005510 || tonnes",
                                     "Wheat | 00000015 || Production | 005510 || tonnes",
                                     "Cereals | 00001717 || Production | 005510 || tonnes",
                                     "Barley | 00000044 || Production | 005510 || tonnes"]].reset_index(drop=True)
final_merged_df = pd.concat([merged_animal_df,grain_df_filtered],axis=1).reset_index(drop=True)
final_merged_df.head()

Unnamed: 0,Entity,Code,Year,"Meat, game | 00001163 || Production | 005510 || tonnes","Meat, duck | 00001069 || Production | 005510 || tonnes","Meat, horse | 00001097 || Production | 005510 || tonnes","Meat, camel | 00001127 || Production | 005510 || tonnes","Meat, goose and guinea fowl | 00001073 || Production | 005510 || tonnes","Meat, sheep and goat | 00001807 || Production | 005510 || tonnes","Meat, beef and buffalo | 00001806 || Production | 005510 || tonnes","Meat, pig | 00001035 || Production | 005510 || tonnes","Meat, poultry | 00001808 || Production | 005510 || tonnes",Fish and seafood | 00002960 || Production | 005511 || tonnes,Rye | 00000071 || Production | 005510 || tonnes,Wheat | 00000015 || Production | 005510 || tonnes,Cereals | 00001717 || Production | 005510 || tonnes,Barley | 00000044 || Production | 005510 || tonnes
0,Denmark,DNK,1961,3300.0,5000.0,3600.0,,600.0,1200.0,140500.0,614300.0,83350.0,637490.0,513876,434057,5198900,2808149
1,Denmark,DNK,1962,3600.0,3200.0,2100.0,,600.0,1100.0,177800.0,632100.0,88990.0,785100.0,512262,643992,5783409,3299515
2,Denmark,DNK,1963,3200.0,2000.0,2300.0,,400.0,1100.0,180100.0,645400.0,81220.0,847900.0,319871,494060,5502930,3398857
3,Denmark,DNK,1964,4000.0,2400.0,2100.0,,400.0,1200.0,156500.0,688000.0,95150.0,871060.0,291633,541454,6213090,3899590
4,Denmark,DNK,1965,4500.0,3000.0,1700.0,,600.0,1400.0,154400.0,744000.0,83800.0,840830.0,264340,563769,6213087,4125804


In [21]:
final_merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 17 columns):
 #   Column                                                                   Non-Null Count  Dtype  
---  ------                                                                   --------------  -----  
 0   Entity                                                                   540 non-null    object 
 1   Code                                                                     540 non-null    object 
 2   Year                                                                     540 non-null    int64  
 3   Meat, game | 00001163 || Production | 005510 || tonnes                   314 non-null    float64
 4   Meat, duck | 00001069 || Production | 005510 || tonnes                   348 non-null    float64
 5   Meat, horse | 00001097 || Production | 005510 || tonnes                  525 non-null    float64
 6   Meat, camel | 00001127 || Production | 005510 || tonnes                  0

In [22]:
final_merged_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Year,540.0,1990.5,17.33416,1961.0,1975.75,1990.5,2005.25,2020.0
"Meat, game | 00001163 || Production | 005510 || tonnes",314.0,11305.14,18169.46,0.0,3000.0,4708.0,10000.0,90000.0
"Meat, duck | 00001069 || Production | 005510 || tonnes",348.0,37687.73,67153.84,2.0,3884.5,15000.0,33517.5,290902.0
"Meat, horse | 00001097 || Production | 005510 || tonnes",525.0,10939.95,18822.82,49.0,1200.0,3173.0,8008.0,108000.0
"Meat, camel | 00001127 || Production | 005510 || tonnes",0.0,,,,,,,
"Meat, goose and guinea fowl | 00001073 || Production | 005510 || tonnes",311.0,2708.675,3371.214,18.0,500.0,1222.0,3000.0,14750.0
"Meat, sheep and goat | 00001807 || Production | 005510 || tonnes",540.0,90293.45,96090.58,363.0,20923.0,46306.5,134347.75,402000.0
"Meat, beef and buffalo | 00001806 || Production | 005510 || tonnes",540.0,742614.8,549431.4,45597.0,240915.75,593526.0,1143430.0,2180707.0
"Meat, pig | 00001035 || Production | 005510 || tonnes",540.0,1451521.0,1288475.0,75419.0,431379.0,1166683.0,1725486.75,5616074.0
"Meat, poultry | 00001808 || Production | 005510 || tonnes",540.0,688520.1,598407.0,14715.0,164207.0,556650.0,1055012.5,3247205.0


In [23]:
final_merged_df.skew().sort_values(ascending=False)

Meat, game | 00001163 || Production | 005510 || tonnes                     3.004394
Meat, duck | 00001069 || Production | 005510 || tonnes                     2.714708
Rye | 00000071 || Production | 005510 || tonnes                            2.612416
Meat, horse | 00001097 || Production | 005510 || tonnes                    2.609793
Meat, goose and guinea fowl | 00001073 || Production | 005510 || tonnes    1.778854
Wheat | 00000015 || Production | 005510 || tonnes                          1.643500
Meat, pig | 00001035 || Production | 005510 || tonnes                      1.401369
Meat, sheep and goat | 00001807 || Production | 005510 || tonnes           1.326737
Cereals | 00001717 || Production | 005510 || tonnes                        1.275547
Meat, poultry | 00001808 || Production | 005510 || tonnes                  1.109284
Fish and seafood | 00002960 || Production | 005511 || tonnes               0.885435
Meat, beef and buffalo | 00001806 || Production | 005510 || tonnes         0