In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import geopandas as gpd
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option("display.max_rows",500)
pd.set_option("display.max_columns",500)

<h3>GDP: <a href=https://worldpopulationreview.com/countries/countries-by-gdp> Source of Data</a></h3>

In [None]:
#Read the Data
africa = pd.read_csv('../input/countries/africa.csv')
asia = pd.read_csv('../input/countries/asia.csv')
europe = pd.read_csv('../input/countries/europe.csv')
north_america = pd.read_csv('../input/countries/north_america.csv')
south_america = pd.read_csv('../input/countries/south_america.csv')
oceania = pd.read_csv('../input/countries/oceania.csv')

In [None]:
#Add continent columns to all datasets
africa['Continent'] = 'Africa'
asia['Continent'] = 'Asia'
europe['Continent'] = 'Europe'
north_america['Continent'] = 'North America'
south_america['Continent'] = 'South America'
oceania['Continent'] = 'Oceania'

In [None]:
#Merge into single dataframe
world = pd.concat([africa, asia, europe, north_america, south_america, oceania])

#Delete rank column
world.drop('rank',axis=1,inplace=True)

#Save to CSV
world.to_csv('world_gdp.csv')

In [None]:
#Rename Columns
world.rename(columns={'country':'Country',
              'imfGDP': 'IMF GDP',
              'unGDP': 'UN GDP',
              'gdpPerCapita':'GDP Capita',
              'pop':'Population'   
}, inplace=True)

In [None]:
#Sort Countries by GDP and Rank
world = world.sort_values(by=['IMF GDP'], ascending=False).reset_index().drop('index',axis=1)
world['Rank'] = range(1,len(world)+1)
world.set_index('Rank', inplace=True)

#Change from int to float
world['UN GDP'] = world['UN GDP'].astype('float')

In [None]:
#Sum of GDP by continent and sort by GDP and plot using pandas
gdp_continent = world.groupby(['Continent'])['IMF GDP'].agg(['sum']).sort_values(by=['sum'], ascending=False)

#Data
display(gdp_continent)

#Plot
gdp_continent.plot(kind='bar',figsize=(10,5))
plt.title('Total GDP By Continent', fontsize=20)
plt.xlabel('Continent',fontsize=20)
plt.ylabel('GDP (Trillions)',fontsize=20)
plt.xticks(rotation='vertical', fontsize=20)
#plt.grid()
plt.show()

In [None]:
#Data Visualization using seaborn
#Change size of plot

plt.figure(figsize=(10,5))
sns.barplot(x='Continent',y='IMF GDP', data=world, ci=None)

#Add GDP label on top of each bar

#plt.grid()
plt.title('Total GDP By Continent', fontsize=20)
plt.xlabel('Continent',fontsize=20)
plt.ylabel('GDP (Trillions)',fontsize=20)
plt.xticks(rotation='vertical', fontsize=20)
plt.show()

In [None]:
#geopandas
world_visual = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
cities = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))

world_visual.rename(columns={'name':'Country','pop_est':'Population','continent':'Continent'}, inplace=True)
world_visual.head()

#Drop values from continent in Antarctica and seven seas
world_visual = world_visual[~world_visual['Continent'].isin(['Seven seas (open ocean)','Antarctica'])]

display(world_visual.head())
world_visual.plot(column='Population',figsize=(20,10))
plt.show()

<h3>GeoPandas: <a href='https://geopandas.org/docs/user_guide/mapping.html#pandas-plots'>Data Source</a></h3>

In [None]:
#uneven
print('Length of world_visual: {} Countries'.format(len(world_visual['Country'])))
print('Length of world: {} Countries'.format(len(world['Country'])))

In [None]:
#Last 3 columns
world_visual = world_visual.loc[:,['Country', 'iso_a3', 'gdp_md_est','geometry']]

#Rename united states
world_visual['Country'] = world_visual['Country'].replace('United States of America','United States') 

In [None]:
#Update so Country columns match
update_world = world[world['Country'].isin(world_visual['Country'].tolist())]
update_world_visual = world_visual[world_visual['Country'].isin(world['Country'].tolist())]

In [None]:
#Verify Columns length match
print('Length of world_visual: {} Countries'.format(len(update_world_visual['Country'])))
print('Length of world: {} Countries'.format(len(update_world['Country'])))

<h1>Merge 2 DataFrames on Country column</h1>

In [None]:
#Join original world data set with coordinates SUCCESS 
#order of merge matters!
merge_world = pd.merge(update_world_visual,update_world, how='outer')
merge_world.dropna(inplace=True)

display(merge_world.head())

#Save to CSV
merge_world.to_csv('merge_world_gdp.csv')

In [None]:
merge_world.plot(column='Continent',legend=True,figsize=(20,10))
plt.title('World', fontsize=20)
plt.xlabel('Longitude', fontsize=20)
plt.ylabel('Latitude', fontsize=20)
plt.legend()
plt.show()

In [None]:
merge_world.plot(column='IMF GDP', legend=True, figsize=(20,10))
plt.title('GDP', fontsize=20)
plt.xlabel('Longitude', fontsize=20)
plt.ylabel('Latitude', fontsize=20)
plt.legend()
plt.show()

In [None]:
merge_world.plot(column='GDP Capita', legend=True, figsize=(20,10))
plt.title('GDP Capita', fontsize=20)
plt.xlabel('Longitude', fontsize=20)
plt.ylabel('Latitude', fontsize=20)
plt.legend()
plt.show()

In [None]:
merge_world.head()

<h1>Select a Continent</h1>

In [None]:
def continent_plot(continent='Asia'):
    merge_world[merge_world['Continent']==continent].sort_values('IMF GDP').plot(column='IMF GDP', legend=True,figsize=(20,10))
    plt.title('GDP Capita', fontsize=20)
    plt.xlabel('Longitude', fontsize=20)
    plt.ylabel('Latitude', fontsize=20)
    plt.show()
    
continent_plot()

In [None]:
#pie charts?