In [1]:
import numpy as np
import pandas as pd

In [2]:
g7_pop= pd.Series({
    'Canada': 35.467,
    'France': 63.951,
    'Germany': 80.940,
    'Italy': 60.665,
    'Japan': 127.061,
    'United Kingdom': 64.511,
    'United States': 318.523    
}, name= 'G7 populations in millions')

In [3]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 populations in millions, dtype: float64

# Conditional selection(Boolean arrays)

In [4]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 populations in millions, dtype: float64

In [5]:
g7_pop>70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 populations in millions, dtype: bool

In [6]:
g7_pop[g7_pop>70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 populations in millions, dtype: float64

In [7]:
g7_pop.mean()

107.30257142857144

In [9]:
g7_pop[g7_pop>g7_pop.mean()]

Japan            127.061
United States    318.523
Name: G7 populations in millions, dtype: float64

In [10]:
g7_pop.std()

97.24996987121581

## Operations and methods

In [11]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 populations in millions, dtype: float64

In [12]:
g7_pop+1_000_000

Canada            1000035.467
France            1000063.951
Germany           1000080.940
Italy             1000060.665
Japan             1000127.061
United Kingdom    1000064.511
United States     1000318.523
Name: G7 populations in millions, dtype: float64

In [13]:
g7_pop.mean()

107.30257142857144

In [14]:
np.log(g7_pop)

Canada            3.568603
France            4.158117
Germany           4.393708
Italy             4.105367
Japan             4.844667
United Kingdom    4.166836
United States     5.763695
Name: G7 populations in millions, dtype: float64

In [15]:
g7_pop['France' : 'Italy'].mean()

68.51866666666666

## Boolean Arrays

In [16]:
g7_pop

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 populations in millions, dtype: float64

In [17]:
g7_pop>70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: G7 populations in millions, dtype: bool

In [18]:
g7_pop[g7_pop>70]

Germany           80.940
Japan            127.061
United States    318.523
Name: G7 populations in millions, dtype: float64

In [19]:
g7_pop[(g7_pop>80) | (g7_pop<90)]

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 populations in millions, dtype: float64

In [20]:
g7_pop[(g7_pop>80) & (g7_pop<90)]

Germany    80.94
Name: G7 populations in millions, dtype: float64

## Modifying series

In [21]:
g7_pop['Canada']= 40.5

In [22]:
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: G7 populations in millions, dtype: float64

In [23]:
g7_pop.iloc[-1]=500

In [24]:
g7_pop

Canada             40.500
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     500.000
Name: G7 populations in millions, dtype: float64

In [25]:
g7_pop[g7_pop<70]=99.99

In [26]:
g7_pop

Canada             99.990
France             99.990
Germany            80.940
Italy              99.990
Japan             127.061
United Kingdom     99.990
United States     500.000
Name: G7 populations in millions, dtype: float64

# DATA FRAMES

In [31]:
df= pd.DataFrame({
    'Population': [35.487, 63.951, 80.94, 60.685, 127.061, 64.511, 318.523],
    'GDP': [1785387, 2833687, 3874437, 2167744, 4602387, 2950039, 17348075],
    'Surface': [9964670, 640679, 357114, 301336, 377930, 242495, 9525067],
    'HDI': [0.913, 0.888, 0.916, 0.873, 0.891, 0.907, 0.915], 
    'Continent': ['America', 'Europe', 'Europe', 'Europe', 'Asia', 'Europe', 'America']
}, columns= ['Population', 'GDP', 'Surface', 'HDI', 'Continent'])

In [32]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
0,35.487,1785387,9964670,0.913,America
1,63.951,2833687,640679,0.888,Europe
2,80.94,3874437,357114,0.916,Europe
3,60.685,2167744,301336,0.873,Europe
4,127.061,4602387,377930,0.891,Asia
5,64.511,2950039,242495,0.907,Europe
6,318.523,17348075,9525067,0.915,America


In [33]:
df.index=[
    'Canada',
    'France', 
    'Germany',
    'Italy',
    'Japan',
    'United Kingdom',
    'United States'
]

In [34]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [35]:
df.columns

Index(['Population', 'GDP', 'Surface', 'HDI', 'Continent'], dtype='object')

In [36]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
      dtype='object')

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Population  7 non-null      float64
 1   GDP         7 non-null      int64  
 2   Surface     7 non-null      int64  
 3   HDI         7 non-null      float64
 4   Continent   7 non-null      object 
dtypes: float64(2), int64(2), object(1)
memory usage: 336.0+ bytes


In [38]:
df.size

35

In [39]:
df.shape

(7, 5)

In [40]:
df.describe()

Unnamed: 0,Population,GDP,Surface,HDI
count,7.0,7.0,7.0,7.0
mean,107.308286,5080251.0,3058470.0,0.900429
std,97.245909,5494020.0,4571147.0,0.016592
min,35.487,1785387.0,242495.0,0.873
25%,62.318,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238412.0,5082873.0,0.914
max,318.523,17348080.0,9964670.0,0.916


In [41]:
df.dtypes

Population    float64
GDP             int64
Surface         int64
HDI           float64
Continent      object
dtype: object

In [46]:
df.dtypes.values

array([dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
       dtype('O')], dtype=object)

In [44]:
df.dtypes.value_counts()

int64      2
float64    2
object     1
dtype: int64

## Indexing, Selection and Slicing

In [48]:
df['Population']         #individual column can be selected,  returns series

Canada             35.487
France             63.951
Germany            80.940
Italy              60.685
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population, dtype: float64

In [49]:
df['Population'].to_frame()          #to view as data frame format

Unnamed: 0,Population
Canada,35.487
France,63.951
Germany,80.94
Italy,60.685
Japan,127.061
United Kingdom,64.511
United States,318.523


In [51]:
df[['Population', 'GDP']]                #selecting multiple columns

Unnamed: 0,Population,GDP
Canada,35.487,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.685,2167744
Japan,127.061,4602387
United Kingdom,64.511,2950039
United States,318.523,17348075


In [53]:
df[0:3]    #slicing acts at row level

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [54]:
df.loc['Canada']           #to select individual row by index values, returns series


Population     35.487
GDP           1785387
Surface       9964670
HDI             0.913
Continent     America
Name: Canada, dtype: object

In [55]:
df.iloc[-1]                     #select rows by sequential position, returns series

Population     318.523
GDP           17348075
Surface        9525067
HDI              0.915
Continent      America
Name: United States, dtype: object

In [56]:
df.loc['Italy']

Population     60.685
GDP           2167744
Surface        301336
HDI             0.873
Continent      Europe
Name: Italy, dtype: object

In [57]:
df.loc['France': 'Italy']     #upper limit is included

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe


In [59]:
df.loc['France': 'Italy', 'Population']        #second index denotes which column u want to select

France     63.951
Germany    80.940
Italy      60.685
Name: Population, dtype: float64

In [65]:
df.loc['France': 'Italy', ['Population', 'GDP']] 

Unnamed: 0,Population,GDP
France,63.951,2833687
Germany,80.94,3874437
Italy,60.685,2167744


In [60]:
df.iloc[0]

Population     35.487
GDP           1785387
Surface       9964670
HDI             0.913
Continent     America
Name: Canada, dtype: object

In [61]:
df.iloc[-1]

Population     318.523
GDP           17348075
Surface        9525067
HDI              0.915
Continent      America
Name: United States, dtype: object

In [63]:
df.iloc[[0, 1, -1]]

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
United States,318.523,17348075,9525067,0.915,America


In [64]:
df.iloc[1:3]          #upper limit is not included

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe


In [66]:
df.iloc[1:3, 3]                #column at index 3

France     0.888
Germany    0.916
Name: HDI, dtype: float64

In [67]:
df.iloc[1:3, [0, 3]]   

Unnamed: 0,Population,HDI
France,63.951,0.888
Germany,80.94,0.916


In [68]:
df.iloc[1:3, 1:3]   

Unnamed: 0,GDP,Surface
France,2833687,640679
Germany,3874437,357114


In [69]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


# Conditional Selection(Boolean Arrays)

In [70]:
#Data frame is a collection of series so conditional selection works in similar way
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [71]:
df['Population']>70

Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Name: Population, dtype: bool

In [72]:
df.loc[df['Population']>70]    #the boolean matching is done at index level, so u can filter by any row as long as 
#it contains the right indexes

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602387,377930,0.891,Asia
United States,318.523,17348075,9525067,0.915,America


In [73]:
df.loc[df['Population']>70, 'Population'] 

Germany           80.940
Japan            127.061
United States    318.523
Name: Population, dtype: float64

In [74]:
df.loc[df['Population']>70, ['Population', 'GDP']] 

Unnamed: 0,Population,GDP
Germany,80.94,3874437
Japan,127.061,4602387
United States,318.523,17348075


## Dropping stuff

In [76]:
#instead of selecting u can point which ones you'd like to drop, but they donot change the underlying dataframe
#these are immutable operations, a new dataframe is created to store the result
df.drop('Canada')

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [100]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [77]:
df.drop(['Canada', 'Japan'])

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [78]:
df.drop(columns=['Population', 'HDI'])

Unnamed: 0,GDP,Surface,Continent
Canada,1785387,9964670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602387,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [79]:
df.drop(['Italy', 'Canada'], axis=0)

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [80]:
df.drop(['Population', 'HDI'], axis=1)

Unnamed: 0,GDP,Surface,Continent
Canada,1785387,9964670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602387,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [81]:
df.drop(['Population', 'HDI'], axis='columns')

Unnamed: 0,GDP,Surface,Continent
Canada,1785387,9964670,America
France,2833687,640679,Europe
Germany,3874437,357114,Europe
Italy,2167744,301336,Europe
Japan,4602387,377930,Asia
United Kingdom,2950039,242495,Europe
United States,17348075,9525067,America


In [82]:
df.drop(['Canada', 'Japan'], axis= 'rows')

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


## Operations

In [83]:
df[['Population', 'GDP']]

Unnamed: 0,Population,GDP
Canada,35.487,1785387
France,63.951,2833687
Germany,80.94,3874437
Italy,60.685,2167744
Japan,127.061,4602387
United Kingdom,64.511,2950039
United States,318.523,17348075


In [84]:
df[['Population', 'GDP']]/100

Unnamed: 0,Population,GDP
Canada,0.35487,17853.87
France,0.63951,28336.87
Germany,0.8094,38744.37
Italy,0.60685,21677.44
Japan,1.27061,46023.87
United Kingdom,0.64511,29500.39
United States,3.18523,173480.75


In [85]:
#Operation with series works at column level, broadcasting down the rows

In [90]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [97]:
crisis= pd.Series([-1_000_000, -0.3], index=['GDP', 'HDI'])
crisis

GDP   -1000000.0
HDI         -0.3
dtype: float64

In [98]:
df[['GDP', 'HDI']]

Unnamed: 0,GDP,HDI
Canada,1785387,0.913
France,2833687,0.888
Germany,3874437,0.916
Italy,2167744,0.873
Japan,4602387,0.891
United Kingdom,2950039,0.907
United States,17348075,0.915


In [99]:
df[['GDP', 'HDI']] + crisis

Unnamed: 0,GDP,HDI
Canada,785387.0,0.613
France,1833687.0,0.588
Germany,2874437.0,0.616
Italy,1167744.0,0.573
Japan,3602387.0,0.591
United Kingdom,1950039.0,0.607
United States,16348075.0,0.615


# Modifying Dataframes

## Adding a new column

In [147]:
langs= pd.Series(
    ['French', 'German', 'Italian'],
    index= ['France', 'Germany', 'Italy'],
    name= 'Languages'
)
langs

France      French
Germany     German
Italy      Italian
Name: Languages, dtype: object

In [148]:
df['Language']= langs

In [135]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
Canada,35.487,1785387,9964670,0.913,America,
France,63.951,2833687,640679,0.888,Europe,French
Germany,80.94,3874437,357114,0.916,Europe,German
Italy,60.685,2167744,301336,0.873,Europe,Italian
Japan,127.061,4602387,377930,0.891,Asia,
United Kingdom,64.511,2950039,242495,0.907,Europe,
United States,318.523,17348075,9525067,0.915,America,


## Replacing values per column

In [136]:
df['Language']= 'English'    #when we modify the underlying dataframe we have to use an '=' symbols

In [137]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
Canada,35.487,1785387,9964670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.685,2167744,301336,0.873,Europe,English
Japan,127.061,4602387,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


## Renaming columns

In [146]:
df.rename(
    columns={'HDI': 'Human Development Index'}
, index= {'United Kingdom': 'UK',
         'United States': 'US',
         'Argentina': 'AR'}
    )      #Argentina is not present in data frame but it doesn't effect the operation

Unnamed: 0,Population,GDP,Surface,Human Development Index,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
UK,64.511,2950039,242495,0.907,Europe
US,318.523,17348075,9525067,0.915,America


In [139]:
df              #the original dataframe is not changed

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
Canada,35.487,1785387,9964670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.685,2167744,301336,0.873,Europe,English
Japan,127.061,4602387,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


In [120]:
df.rename(index= str.upper)

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
CANADA,35.487,1785387,9964670,0.913,America,English
FRANCE,63.951,2833687,640679,0.888,Europe,English
GERMANY,80.94,3874437,357114,0.916,Europe,English
ITALY,60.685,2167744,301336,0.873,Europe,English
JAPAN,127.061,4602387,377930,0.891,Asia,English
UNITED KINGDOM,64.511,2950039,242495,0.907,Europe,English
UNITED STATES,318.523,17348075,9525067,0.915,America,English


In [140]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
Canada,35.487,1785387,9964670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.685,2167744,301336,0.873,Europe,English
Japan,127.061,4602387,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


In [141]:
df.rename(index=lambda x: x.lower())

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
canada,35.487,1785387,9964670,0.913,America,English
france,63.951,2833687,640679,0.888,Europe,English
germany,80.94,3874437,357114,0.916,Europe,English
italy,60.685,2167744,301336,0.873,Europe,English
japan,127.061,4602387,377930,0.891,Asia,English
united kingdom,64.511,2950039,242495,0.907,Europe,English
united states,318.523,17348075,9525067,0.915,America,English


In [142]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,Language
Canada,35.487,1785387,9964670,0.913,America,English
France,63.951,2833687,640679,0.888,Europe,English
Germany,80.94,3874437,357114,0.916,Europe,English
Italy,60.685,2167744,301336,0.873,Europe,English
Japan,127.061,4602387,377930,0.891,Asia,English
United Kingdom,64.511,2950039,242495,0.907,Europe,English
United States,318.523,17348075,9525067,0.915,America,English


In [149]:
df.drop(columns='Language', inplace= True)                 #dropping column

In [144]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


# Adding values

In [150]:
df.append(pd.Series({
    'Population': 3, 
    'GDP': 5
}, name= 'China'))             #append retuens a new dataframe

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387.0,9964670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.685,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602387.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America
China,3.0,5.0,,,


In [151]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387,9964670,0.913,America
France,63.951,2833687,640679,0.888,Europe
Germany,80.94,3874437,357114,0.916,Europe
Italy,60.685,2167744,301336,0.873,Europe
Japan,127.061,4602387,377930,0.891,Asia
United Kingdom,64.511,2950039,242495,0.907,Europe
United States,318.523,17348075,9525067,0.915,America


In [152]:
#directly set the new index and value to the dataframe
df.loc['China']= pd.Series({'Population': 14_000_000, 'Continent':'Asia'})

In [153]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387.0,9964670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.685,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602387.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America
China,14000000.0,,,,Asia


In [154]:
df.drop('China', inplace= True)

In [155]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.487,1785387.0,9964670.0,0.913,America
France,63.951,2833687.0,640679.0,0.888,Europe
Germany,80.94,3874437.0,357114.0,0.916,Europe
Italy,60.685,2167744.0,301336.0,0.873,Europe
Japan,127.061,4602387.0,377930.0,0.891,Asia
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe
United States,318.523,17348075.0,9525067.0,0.915,America


## More radical index change

In [156]:
df.set_index('Population')

Unnamed: 0_level_0,GDP,Surface,HDI,Continent
Population,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
35.487,1785387.0,9964670.0,0.913,America
63.951,2833687.0,640679.0,0.888,Europe
80.94,3874437.0,357114.0,0.916,Europe
60.685,2167744.0,301336.0,0.873,Europe
127.061,4602387.0,377930.0,0.891,Asia
64.511,2950039.0,242495.0,0.907,Europe
318.523,17348075.0,9525067.0,0.915,America


# Creating column from other columns

In [157]:
df[['Population', 'GDP']]

Unnamed: 0,Population,GDP
Canada,35.487,1785387.0
France,63.951,2833687.0
Germany,80.94,3874437.0
Italy,60.685,2167744.0
Japan,127.061,4602387.0
United Kingdom,64.511,2950039.0
United States,318.523,17348075.0


In [158]:
df['Population']/df['GDP']            #result is series

Canada            0.000020
France            0.000023
Germany           0.000021
Italy             0.000028
Japan             0.000028
United Kingdom    0.000022
United States     0.000018
dtype: float64

In [159]:
df['GDP per Capita']= df['Population']/df['GDP']

In [160]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,GDP per Capita
Canada,35.487,1785387.0,9964670.0,0.913,America,2e-05
France,63.951,2833687.0,640679.0,0.888,Europe,2.3e-05
Germany,80.94,3874437.0,357114.0,0.916,Europe,2.1e-05
Italy,60.685,2167744.0,301336.0,0.873,Europe,2.8e-05
Japan,127.061,4602387.0,377930.0,0.891,Asia,2.8e-05
United Kingdom,64.511,2950039.0,242495.0,0.907,Europe,2.2e-05
United States,318.523,17348075.0,9525067.0,0.915,America,1.8e-05


# Statistical Info

In [161]:
df.head()

Unnamed: 0,Population,GDP,Surface,HDI,Continent,GDP per Capita
Canada,35.487,1785387.0,9964670.0,0.913,America,2e-05
France,63.951,2833687.0,640679.0,0.888,Europe,2.3e-05
Germany,80.94,3874437.0,357114.0,0.916,Europe,2.1e-05
Italy,60.685,2167744.0,301336.0,0.873,Europe,2.8e-05
Japan,127.061,4602387.0,377930.0,0.891,Asia,2.8e-05


In [162]:
df.describe()

Unnamed: 0,Population,GDP,Surface,HDI,GDP per Capita
count,7.0,7.0,7.0,7.0,7.0
mean,107.308286,5080251.0,3058470.0,0.900429,2.3e-05
std,97.245909,5494020.0,4571147.0,0.016592,4e-06
min,35.487,1785387.0,242495.0,0.873,1.8e-05
25%,62.318,2500716.0,329225.0,0.8895,2e-05
50%,64.511,2950039.0,377930.0,0.907,2.2e-05
75%,104.0005,4238412.0,5082873.0,0.914,2.5e-05
max,318.523,17348080.0,9964670.0,0.916,2.8e-05


In [164]:
population= df['Population']

In [165]:
population.min(), population.max()

(35.487, 318.523)

In [166]:
population.sum()

751.158

In [167]:
population.sum()/len(population)

107.30828571428572

In [168]:
population.mean()

107.30828571428572

In [169]:
population.std()    #standard deviation

97.24590949874496

In [170]:
population.median()

64.511

In [171]:
population.describe()

count      7.000000
mean     107.308286
std       97.245909
min       35.487000
25%       62.318000
50%       64.511000
75%      104.000500
max      318.523000
Name: Population, dtype: float64

In [173]:
population.quantile(.25)

62.318

In [175]:
population.quantile([.2, .4, .6, .8, 1])

0.2     61.3382
0.4     64.1750
0.6     74.3684
0.8    117.8368
1.0    318.5230
Name: Population, dtype: float64