In [65]:
import pandas as pd

# Load data to pandas dataframe
url = "country.csv"
countries = pd.read_csv(url)
print(countries)

        Name  Population     Area     GDP Currency
0    Germany    82521653   357385  3466.0      EUR
1      Japan   126045000   377835  4938.0      YEN
2     Canada    36503097  9984670  1529.0      CAD
3      Italy    60501718   301338  1850.0      EUR
4   Brazilia   208360000  8515770  1798.0     REAL
5     Taiwan    23938272    36197   744.0      NTD
6  Venezuela    28208977   912050     NaN      VED


In [66]:
# Display descriptive statistics for the numerical columns
numerical_stats = countries.describe()
print(numerical_stats)

         Population          Area          GDP
count  7.000000e+00  7.000000e+00     6.000000
mean   8.086839e+07  2.926464e+06  2387.500000
std    6.670195e+07  4.348507e+06  1532.497537
min    2.393827e+07  3.619700e+04   744.000000
25%    3.235604e+07  3.293615e+05  1596.250000
50%    6.050172e+07  3.778350e+05  1824.000000
75%    1.042833e+08  4.713910e+06  3062.000000
max    2.083600e+08  9.984670e+06  4938.000000


In [67]:
# Show last 4 rows
last_four_rows = countries.tail(4)
print(last_four_rows)

        Name  Population     Area     GDP Currency
3      Italy    60501718   301338  1850.0      EUR
4   Brazilia   208360000  8515770  1798.0     REAL
5     Taiwan    23938272    36197   744.0      NTD
6  Venezuela    28208977   912050     NaN      VED


In [68]:
# Show all the rows of countries that have the EURO
countries_with_euro = countries[countries['Currency'] == 'EUR']
print(countries_with_euro)

      Name  Population    Area     GDP Currency
0  Germany    82521653  357385  3466.0      EUR
3    Italy    60501718  301338  1850.0      EUR


In [69]:
# Show only the rows/countries that have more than 2000 GDP
countries_high_gdp = countries[countries['GDP'] > 2000]
print(countries_high_gdp)

      Name  Population    Area     GDP Currency
0  Germany    82521653  357385  3466.0      EUR
1    Japan   126045000  377835  4938.0      YEN


In [70]:
# Select all countries with inhabitants between 50 and 150 Mio
countries_population_in_specific_range = countries[(countries['Population'] >= 50000000) & (countries['Population'] <= 150000000)]
print(countries_population_in_specific_range)

      Name  Population    Area     GDP Currency
0  Germany    82521653  357385  3466.0      EUR
1    Japan   126045000  377835  4938.0      YEN
3    Italy    60501718  301338  1850.0      EUR


In [71]:
# Calculate the GDP average (ignore the missing value)
average_gdp_nan_ignored = countries['GDP'].mean()
print(average_gdp_nan_ignored)

2387.5


In [73]:
# Task: Calculate the GDP average (missing value treated as 0)
countries['GDP'] = countries['GDP'].fillna(0)
average_gdp_with_fillna = countries['GDP'].mean()
print(average_gdp_with_fillna)

2046.4285714285713


In [74]:
# Calculate the population density (population/area) of all countries and add as new column
countries['Population Density'] = countries['Population'] / countries['Area']
print(countries)

        Name  Population     Area     GDP Currency  Population Density
0    Germany    82521653   357385  3466.0      EUR          230.904075
1      Japan   126045000   377835  4938.0      YEN          333.597999
2     Canada    36503097  9984670  1529.0      CAD            3.655914
3      Italy    60501718   301338  1850.0      EUR          200.776928
4   Brazilia   208360000  8515770  1798.0     REAL           24.467547
5     Taiwan    23938272    36197   744.0      NTD          661.333039
6  Venezuela    28208977   912050     0.0      VED           30.929200


In [75]:
# Sort by country name alphabetically
countries_sorted_by_name = countries.sort_values('Name')
print(countries_sorted_by_name)

        Name  Population     Area     GDP Currency  Population Density
4   Brazilia   208360000  8515770  1798.0     REAL           24.467547
2     Canada    36503097  9984670  1529.0      CAD            3.655914
0    Germany    82521653   357385  3466.0      EUR          230.904075
3      Italy    60501718   301338  1850.0      EUR          200.776928
1      Japan   126045000   377835  4938.0      YEN          333.597999
5     Taiwan    23938272    36197   744.0      NTD          661.333039
6  Venezuela    28208977   912050     0.0      VED           30.929200


In [76]:
# Create a new data frame where the area is changed: all countries with > 1000000 get "BIG" and <= 1000000 get "SMALL"
area_size_df = countries.copy()
area_size_df['Area'] = area_size_df['Area'].apply(lambda x: 'BIG' if x > 1000000 else 'SMALL')

print(area_size_df)

        Name  Population   Area     GDP Currency  Population Density
0    Germany    82521653  SMALL  3466.0      EUR          230.904075
1      Japan   126045000  SMALL  4938.0      YEN          333.597999
2     Canada    36503097    BIG  1529.0      CAD            3.655914
3      Italy    60501718  SMALL  1850.0      EUR          200.776928
4   Brazilia   208360000    BIG  1798.0     REAL           24.467547
5     Taiwan    23938272  SMALL   744.0      NTD          661.333039
6  Venezuela    28208977  SMALL     0.0      VED           30.929200
