# PDS: Analyzing countries
## Import and show data

In [8]:
import numpy as np
import pandas as pd

url = 'https://raw.githubusercontent.com/edlich/eternalrepo/master/DS-WAHLFACH/countries.csv'
countries = pd.read_csv(url, index_col=0)
countriesDataFrame = pd.DataFrame(countries)
countriesDataFrame

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Japan,126045000,377835,4938,YEN
Canada,36503097,9984670,1529,CAD
Italy,60501718,301338,1850,EUR
Brazilia,208360000,8515770,1798,REAL


## Basic information

In [371]:
countriesDataFrame.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Germany to Brazilia
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   People    5 non-null      int64 
 1   Area      5 non-null      int64 
 2   BIP       5 non-null      int64 
 3   Currency  5 non-null      object
dtypes: int64(3), object(1)
memory usage: 200.0+ bytes


## Last 4 rows of countries data frame

In [372]:
countriesDataFrame.tail(4)

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Japan,126045000,377835,4938,YEN
Canada,36503097,9984670,1529,CAD
Italy,60501718,301338,1850,EUR
Brazilia,208360000,8515770,1798,REAL


## Show all the row of countries who have the EURO

In [373]:
countriesEUR = countriesDataFrame.loc[countriesDataFrame["Currency"] == "EUR"]
countriesEUR

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Italy,60501718,301338,1850,EUR


## Show only name and Currency in a new data frame

In [383]:
countriesCurrencies = pd.DataFrame(countriesDataFrame['Currency'])
countriesCurrencies

Unnamed: 0_level_0,Currency
Name,Unnamed: 1_level_1
Germany,EUR
Japan,YEN
Canada,CAD
Italy,EUR
Brazilia,REAL


## Show only the rows/countries that have more than 2000 billions USD GDP 

In [329]:
# https://stackoverflow.com/questions/46227944/pandas-dataframe-currency-conversion
def convertToUSD(args):  # placeholder for your fancy conversion function
    people, area, bip, currency = args
    # Conversion rates as of 2021-05-16
    conversion = {'EUR': 0.82, 'YEN': 109, 'REAL': 5.27, 'CAD': 1.21}
    return conversion[currency] * bip

countriesDataFrame
countriesWithGDP = countriesDataFrame.assign(**{'GDP in USD': countriesDataFrame.apply(convertToUSD, axis=1)})
countriesWithGDP.loc[countriesWithGDP['GDP in USD'] > 2000]

Unnamed: 0_level_0,People,Area,BIP,Currency,GDP in USD
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Germany,82521653,357385,3466,EUR,2842.12
Japan,126045000,377835,4938,YEN,538242.0
Brazilia,208360000,8515770,1798,REAL,9475.46


## Select all countries where with inhabitants between 50 and 150 Mio

In [330]:
countriesPopulation = countriesDataFrame.loc[(countriesDataFrame["People"] >= 50000000) & (countriesDataFrame["People"] <= 150000000)]
countriesPopulation

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Japan,126045000,377835,4938,YEN
Italy,60501718,301338,1850,EUR


## Change BIP to Bip

In [15]:
countriesBip = countriesDataFrame.rename(columns={
        'BIP': 'Bip', 
    }, inplace=False)
countriesBip

Unnamed: 0_level_0,People,Area,Bip,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Japan,126045000,377835,4938,YEN
Canada,36503097,9984670,1529,CAD
Italy,60501718,301338,1850,EUR
Brazilia,208360000,8515770,1798,REAL


## Calculate the Bip sum

In [332]:
countriesBipSum = countriesBip['Bip'].sum()
print("The GDP sum is", int(countriesBipSum),".")

The GDP sum is 13581 .


## Calculate the average people of all countries

In [64]:
countriesPopMean = countriesDataFrame['People'].mean()
print("There's an average of", int(countriesPopMean), "people in all given countries.")

There's an average of 102786293 people in all given countries.


## Sort by name alphabetically

In [58]:
countriesDataFrame.sort_index()

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brazilia,208360000,8515770,1798,REAL
Canada,36503097,9984670,1529,CAD
Germany,82521653,357385,3466,EUR
Italy,60501718,301338,1850,EUR
Japan,126045000,377835,4938,YEN


## All countries with > 1000000 get BIG and <= 1000000 get SMALL in the cell replaced!

In [388]:
countriesAreaLabels = countriesDataFrame.copy(deep=True)
countriesAreaLabels['Area'] = np.where((countriesAreaLabels['Area'] <= 1000000), 'SMALL', 'BIG')
countriesAreaLabels

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,SMALL,3466,EUR
Japan,126045000,SMALL,4938,YEN
Canada,36503097,BIG,1529,CAD
Italy,60501718,SMALL,1850,EUR
Brazilia,208360000,BIG,1798,REAL
