In [1]:
# Load the countries.csv directly via URL import into your panda data frame!
import pandas as pd

url = 'https://raw.github.com/edlich/eternalrepo/master/DS-WAHLFACH/countries.csv'
df = pd.read_csv(url, index_col=0)


In [2]:
# Display some basic information as rows, columns and some basic statistical info.
print(df.head(5))

             People     Area   BIP Currency
Name                                       
Germany    82521653   357385  3466      EUR
Japan     126045000   377835  4938      YEN
Canada     36503097  9984670  1529      CAD
Italy      60501718   301338  1850      EUR
Brazilia  208360000  8515770  1798     REAL


In [3]:
df.shape

(5, 4)

In [4]:
print(df.describe())

             People          Area         BIP
count  5.000000e+00  5.000000e+00     5.00000
mean   1.027863e+08  3.907400e+06  2716.20000
std    6.759970e+07  4.904957e+06  1457.86083
min    3.650310e+07  3.013380e+05  1529.00000
25%    6.050172e+07  3.573850e+05  1798.00000
50%    8.252165e+07  3.778350e+05  1850.00000
75%    1.260450e+08  8.515770e+06  3466.00000
max    2.083600e+08  9.984670e+06  4938.00000


In [5]:
# Show the last 4 rows of the data frame.
df.tail(4)

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Japan,126045000,377835,4938,YEN
Canada,36503097,9984670,1529,CAD
Italy,60501718,301338,1850,EUR
Brazilia,208360000,8515770,1798,REAL


In [6]:
# Show all the row of countries who have the EURO
df.query('Currency =="EUR"').head()

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Italy,60501718,301338,1850,EUR


In [7]:
#Show only name and Currency in a new data frame
df2 = df.drop(['People','Area','BIP'], axis=1)
df2

Unnamed: 0_level_0,Currency
Name,Unnamed: 1_level_1
Germany,EUR
Japan,YEN
Canada,CAD
Italy,EUR
Brazilia,REAL


In [8]:
#Show only the rows/countries that have more than 2000 BIP (it is in Milliarden USD Bruttoinlandsprodukt)
df.loc[df['BIP'] >= 2000]

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Japan,126045000,377835,4938,YEN


In [9]:
#Select all countries where with inhabitants between 50 and 150 Mio
df.loc[(df['People'] >= 50000000)& (df['People'] <= 150000000)]

Unnamed: 0_level_0,People,Area,BIP,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Japan,126045000,377835,4938,YEN
Italy,60501718,301338,1850,EUR


In [10]:
#Change BIP to Bip
df = df.rename(columns={"BIP": "Bip"})
df

Unnamed: 0_level_0,People,Area,Bip,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,357385,3466,EUR
Japan,126045000,377835,4938,YEN
Canada,36503097,9984670,1529,CAD
Italy,60501718,301338,1850,EUR
Brazilia,208360000,8515770,1798,REAL


In [11]:
#Calculate the Bip sum
df['Bip'].sum()

13581

In [12]:
#Calculate the average people of all countries
df["People"].mean()

102786293.6

In [13]:
#Sort by name alphabetically
df.sort_values('Name')

Unnamed: 0_level_0,People,Area,Bip,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Brazilia,208360000,8515770,1798,REAL
Canada,36503097,9984670,1529,CAD
Germany,82521653,357385,3466,EUR
Italy,60501718,301338,1850,EUR
Japan,126045000,377835,4938,YEN


In [17]:
#Create a new data frame from the original where the area is changed as follows: 
#all countries with > 1000000 get BIG and <= 1000000 get SMALL in the cell replaced!
df3 = df.copy()
df3.loc[(df.Area >=1000000), 'Area'] = 'BIG'
df3.loc[(df.Area <= 1000000), 'Area'] = 'SMALL'
df3

Unnamed: 0_level_0,People,Area,Bip,Currency
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Germany,82521653,SMALL,3466,EUR
Japan,126045000,SMALL,4938,YEN
Canada,36503097,BIG,1529,CAD
Italy,60501718,SMALL,1850,EUR
Brazilia,208360000,BIG,1798,REAL
