In [None]:
import pandas as pd
import numpy as np

# Load the attached "countries.csv" into a pandas data frame.

In [1]:
countries_df = pd.read_csv('DSC/countries.csv')

# Display some basic information about the data frame:

# (i) the number of rows and columns

In [2]:
countries_df.shape

(5, 5)

# (ii) for every column: the min, max and mean value.

In [3]:
countries_df.min()

Name        Brazilia
People      36503097
Area          301338
BIP             1529
Currency         CAD
dtype: object

In [4]:
countries_df.max()

Name            Japan
People      208360000
Area          9984670
BIP              4938
Currency          YEN
dtype: object

In [5]:
countries_df.mean()

People    102786293.6
Area        3907399.6
BIP            2716.2
dtype: float64

# Show the last 4 rows of the data frame.

In [6]:
countries_df.tail(4)

Unnamed: 0,Name,People,Area,BIP,Currency
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


# Show all the row of countries who have "EUR" as currency.

In [7]:
countries_df[countries_df['Currency'] == "EUR"]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
3,Italy,60501718,301338,1850,EUR


# Show only "Name" and "Currency" in a new data frame.

In [8]:
new_name_df = countries_df[['Name', 'Currency']]
new_name_df

Unnamed: 0,Name,Currency
0,Germany,EUR
1,Japan,YEN
2,Canada,CAD
3,Italy,EUR
4,Brazilia,REAL


# Show only the rows/countries that have more than 2000 BIP (it is in Milliarden USD Bruttoinlandsprodukt).

In [9]:
countries_df[countries_df['BIP'] >= 2000]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN


# Select all countries with inhabitants between 50 and 150 Mio.

In [10]:
countries_df[countries_df['People'].between(50000000, 150000000)]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN
3,Italy,60501718,301338,1850,EUR


# Change the column name "BIP" to "Bip".

In [11]:
countries_df = countries_df.rename(columns={"BIP": "Bip"})
countries_df

Unnamed: 0,Name,People,Area,Bip,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


# Calculate the "Bip" sum over all rows.

In [12]:
countries_df['Bip'].sum()

13581

# Calculate the average people of all countries.

In [13]:
countries_df['People'].mean()

102786293.6

# Sort by "Name" alphabetically.

In [14]:
countries_df.sort_values('Name')

Unnamed: 0,Name,People,Area,Bip,Currency
4,Brazilia,208360000,8515770,1798,REAL
2,Canada,36503097,9984670,1529,CAD
0,Germany,82521653,357385,3466,EUR
3,Italy,60501718,301338,1850,EUR
1,Japan,126045000,377835,4938,YEN


# Create a new data frame from the original where the area is changed as follows: all countries with > 1000000 get "BIG" and <= 1000000 get "SMALL" in the cell replaced.

In [15]:
new_bigsmall_df = countries_df.copy()
new_bigsmall_df['Area'] = np.where(new_bigsmall_df['Area'] > 1000000, 'BIG','SMALL')
new_bigsmall_df

Unnamed: 0,Name,People,Area,Bip,Currency
0,Germany,82521653,SMALL,3466,EUR
1,Japan,126045000,SMALL,4938,YEN
2,Canada,36503097,BIG,1529,CAD
3,Italy,60501718,SMALL,1850,EUR
4,Brazilia,208360000,BIG,1798,REAL
