In [174]:
# libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import sklearn as scikit_learn

In [175]:
# importing dataset
from google.colab import drive

drive.mount('/content/drive')

# defining dataframe
df = pd.read_csv('drive/My Drive/AI-ML/Learning/countries.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [176]:
# overviewing dataset
df.head()

Unnamed: 0,Rank,ID,Country,Continent,Population,IMF_GDP,UN_GDP,GDP_per_capita
0,1,840,United States,North America,339996.56,26695150000000.0,18624480000000.0,78515.94
1,2,156,China,Asia,1425671.35,21865480000000.0,11218280000000.0,15336.97
2,3,392,Japan,Asia,123294.51,5291351000000.0,4936212000000.0,42916.35
3,4,276,Germany,Europe,83294.63,4564778000000.0,3477796000000.0,54802.79
4,5,356,India,Asia,1428627.66,3893670000000.0,2259642000000.0,2725.46


In [177]:
# getting all columns
print(df.columns)

Index(['Rank', 'ID', 'Country', 'Continent', 'Population', 'IMF_GDP', 'UN_GDP',
       'GDP_per_capita'],
      dtype='object')


In [178]:
# get first 5 entries
print(df.head(5))

print("\nTotal number of rows is ", len(df))

   Rank   ID        Country      Continent  Population       IMF_GDP  \
0     1  840  United States  North America   339996.56  2.669515e+13   
1     2  156          China           Asia  1425671.35  2.186548e+13   
2     3  392          Japan           Asia   123294.51  5.291351e+12   
3     4  276        Germany         Europe    83294.63  4.564778e+12   
4     5  356          India           Asia  1428627.66  3.893670e+12   

         UN_GDP  GDP_per_capita  
0  1.862448e+13        78515.94  
1  1.121828e+13        15336.97  
2  4.936212e+12        42916.35  
3  3.477796e+12        54802.79  
4  2.259642e+12         2725.46  

Total number of rows is  212


In [179]:
# printing table
df

Unnamed: 0,Rank,ID,Country,Continent,Population,IMF_GDP,UN_GDP,GDP_per_capita
0,1,840,United States,North America,339996.56,2.669515e+13,1.862448e+13,78515.94
1,2,156,China,Asia,1425671.35,2.186548e+13,1.121828e+13,15336.97
2,3,392,Japan,Asia,123294.51,5.291351e+12,4.936212e+12,42916.35
3,4,276,Germany,Europe,83294.63,4.564778e+12,3.477796e+12,54802.79
4,5,356,India,Asia,1428627.66,3.893670e+12,2.259642e+12,2725.46
...,...,...,...,...,...,...,...,...
207,208,729,Sudan,Africa,48109.01,0.000000e+00,8.288740e+10,1722.91
208,209,760,Syria,Asia,23227.01,0.000000e+00,2.216308e+10,954.19
209,210,788,Tunisia,Africa,12458.22,0.000000e+00,4.170356e+10,3347.47
210,211,796,Turks and Caicos Islands,North America,46.06,0.000000e+00,9.175505e+08,19919.90


# Prerequisites

- GDP (Gross Domenstic Product)

    Gross Domestic Product (GDP) is the sum of consumption expenditure (of households, NPISHs, and general government), gross fixed capital formation, changes in inventories, and exports of goods and services, less the value of imports of goods and services.

# What is meant by specific fields?

1. GDP_per_capita

    Gross Domestic Product (GDP) Per Capita is an economic metric used to determine how prosperous countries are based on their economic growth. It is calculated by dividing the GDP of a nation by its total population.

2. UN_GDP

    Gross Domestic Product (GDP) as pertaining to trade at UN.

3. IMF_GDP

    Gross Domestic Product (GDP) as pertaining to trade at International Monetory Fund.

In [180]:
# print("Country with lowest GDP per capita : ", max_gdp_per_capita)
country_max_gdp = df.at[df.GDP_per_capita.idxmax(), 'Country']
country_min_gdp = df.at[df.GDP_per_capita.idxmin(), 'Country']

print("Max GDP per capita is ", country_max_gdp)
print("---------------------------------------")
print("Min GDP per capita is ", country_min_gdp)

Max GDP per capita is  Monaco
---------------------------------------
Min GDP per capita is  Burundi


In [181]:
# Analysing data
mean_diff = (df.GDP_per_capita.max() - df.GDP_per_capita.min()) / 2
print("Mean of difference of GDP per capita: ", mean_diff)

mean_gdp = df.GDP_per_capita.mean()
print("Mean GDP_per_capita is ", mean_gdp)

Mean of difference of GDP per capita:  88959.07
Mean GDP_per_capita is  19669.11547169811


In [182]:
continents = df['Continent'].drop_duplicates()

for continent in continents:
  countries = df[df['Continent'] == continent]
  country_max_gdp = countries.at[countries.GDP_per_capita.idxmax(), 'Country']
  country_min_gdp = countries.at[countries.GDP_per_capita.idxmin(), 'Country']
  print("Region: ", continent)
  print("-------------------------------------------------")
  print("Max GDP per Capita: ", country_max_gdp)
  print("Min GDP per Capita: ", country_min_gdp)
  print("Mean GDP per Capita: ", countries['GDP_per_capita'].mean())
  print("Mean Difference of GDP per Capita: ", ((countries['GDP_per_capita'].max() - countries['GDP_per_capita'].min())/2))
  print("-------------------------------------------------\n")

Region:  North America
-------------------------------------------------
Max GDP per Capita:  Bermuda
Min GDP per Capita:  Haiti
Mean GDP per Capita:  22679.46352941177
Mean Difference of GDP per Capita:  46914.439999999995
-------------------------------------------------

Region:  Asia
-------------------------------------------------
Max GDP per Capita:  Qatar
Min GDP per Capita:  Afghanistan
Mean GDP per Capita:  18767.680888888888
Mean Difference of GDP per Capita:  41793.45
-------------------------------------------------

Region:  Europe
-------------------------------------------------
Max GDP per Capita:  Monaco
Min GDP per Capita:  Ukraine
Mean GDP per Capita:  40568.229400000004
Mean Difference of GDP per Capita:  87829.115
-------------------------------------------------

Region:  South America
-------------------------------------------------
Max GDP per Capita:  Guyana
Min GDP per Capita:  Venezuela
Mean GDP per Capita:  9769.385833333336
Mean Difference of GDP per Capi