# DataFrame

In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.DataFrame({
    'Population': [35.467, 63.951, 80.94, 60.665, 127.061, 64.511, 318.523],
    'GDP': [1785387.00, 2833687.00, 3874437.00, 2167744.00, 4602367.00, 2950039.00, 17348075.00],
    'Surface': [9984670, 640679, 357114, 301336, 377930, 242495, 9525067],
    'HDI': [0.913, 0.888, 0.916, 0.873, 0.891, 0.907, 0.915],
    'Continent': ['America', 'Europe', 'Europe', 'Europe', 'Asia', 'Europe', 'America']
})

In [4]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
0,35.467,1785387.0,9984670,0.913,America
1,63.951,2833687.0,640679,0.888,Europe
2,80.94,3874437.0,357114,0.916,Europe
3,60.665,2167744.0,301336,0.873,Europe
4,127.061,4602367.0,377930,0.891,Asia
5,64.511,2950039.0,242495,0.907,Europe
6,318.523,17348075.0,9525067,0.915,America


In [5]:
df.index = [
    'Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom', 'United States'
]

In [6]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Canada,35.467,1785387.0,9984670,0.913,America
France,63.951,2833687.0,640679,0.888,Europe
Germany,80.94,3874437.0,357114,0.916,Europe
Italy,60.665,2167744.0,301336,0.873,Europe
Japan,127.061,4602367.0,377930,0.891,Asia
United Kingdom,64.511,2950039.0,242495,0.907,Europe
United States,318.523,17348075.0,9525067,0.915,America


In [7]:
df.index

Index(['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom',
       'United States'],
      dtype='object')

In [8]:
df.columns

Index(['Population', 'GDP', 'Surface', 'HDI', 'Continent'], dtype='object')

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Population  7 non-null      float64
 1   GDP         7 non-null      float64
 2   Surface     7 non-null      int64  
 3   HDI         7 non-null      float64
 4   Continent   7 non-null      object 
dtypes: float64(3), int64(1), object(1)
memory usage: 336.0+ bytes


In [10]:
df.describe()

Unnamed: 0,Population,GDP,Surface,HDI
count,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,0.900429
std,97.24997,5494020.0,4576187.0,0.016592
min,35.467,1785387.0,242495.0,0.873
25%,62.308,2500716.0,329225.0,0.8895
50%,64.511,2950039.0,377930.0,0.907
75%,104.0005,4238402.0,5082873.0,0.914
max,318.523,17348080.0,9984670.0,0.916


In [11]:
df.shape

(7, 5)

In [12]:
df.size

35

In [13]:
df.dtypes

Population    float64
GDP           float64
Surface         int64
HDI           float64
Continent      object
dtype: object

In [14]:
df.dtypes.value_counts()

float64    3
int64      1
object     1
Name: count, dtype: int64

# **Indexing, Selection, Slicing**

In [15]:
df["Population"]

Canada             35.467
France             63.951
Germany            80.940
Italy              60.665
Japan             127.061
United Kingdom     64.511
United States     318.523
Name: Population, dtype: float64

In [16]:
df["Population"].to_frame()

Unnamed: 0,Population
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [17]:
df[["Population", "GDP"]]

Unnamed: 0,Population,GDP
Canada,35.467,1785387.0
France,63.951,2833687.0
Germany,80.94,3874437.0
Italy,60.665,2167744.0
Japan,127.061,4602367.0
United Kingdom,64.511,2950039.0
United States,318.523,17348075.0


In [18]:
df.loc["Canada"]

Population       35.467
GDP           1785387.0
Surface         9984670
HDI               0.913
Continent       America
Name: Canada, dtype: object

In [19]:
df.iloc[2]

Population        80.94
GDP           3874437.0
Surface          357114
HDI               0.916
Continent        Europe
Name: Germany, dtype: object

In [20]:
df.iloc[2].to_frame().T

Unnamed: 0,Population,GDP,Surface,HDI,Continent
Germany,80.94,3874437.0,357114,0.916,Europe


In [21]:
df.loc['France' : 'Japan']

Unnamed: 0,Population,GDP,Surface,HDI,Continent
France,63.951,2833687.0,640679,0.888,Europe
Germany,80.94,3874437.0,357114,0.916,Europe
Italy,60.665,2167744.0,301336,0.873,Europe
Japan,127.061,4602367.0,377930,0.891,Asia


In [22]:
df.loc['France' : 'Japan', 'GDP'].to_frame

<bound method Series.to_frame of France     2833687.0
Germany    3874437.0
Italy      2167744.0
Japan      4602367.0
Name: GDP, dtype: float64>

In [23]:
df.iloc[1:4, [0, 3]]

Unnamed: 0,Population,HDI
France,63.951,0.888
Germany,80.94,0.916
Italy,60.665,0.873


In [24]:
df["GDPPP"] = df["GDP"] / df["Population"]

In [25]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,GDPPP
Canada,35.467,1785387.0,9984670,0.913,America,50339.385908
France,63.951,2833687.0,640679,0.888,Europe,44310.284437
Germany,80.94,3874437.0,357114,0.916,Europe,47868.013343
Italy,60.665,2167744.0,301336,0.873,Europe,35733.025633
Japan,127.061,4602367.0,377930,0.891,Asia,36221.712406
United Kingdom,64.511,2950039.0,242495,0.907,Europe,45729.239975
United States,318.523,17348075.0,9525067,0.915,America,54464.12033


In [28]:
df1 = pd.DataFrame({
    'Name':[1, 2, 3],
    'Age':[10, 90, 30]
},index=[11, 22, 33])

In [29]:
df1

Unnamed: 0,Name,Age
11,1,10
22,2,90
33,3,30


In [30]:
df.loc[:, "HDI"] = 99.99

In [31]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,GDPPP
Canada,35.467,1785387.0,9984670,99.99,America,50339.385908
France,63.951,2833687.0,640679,99.99,Europe,44310.284437
Germany,80.94,3874437.0,357114,99.99,Europe,47868.013343
Italy,60.665,2167744.0,301336,99.99,Europe,35733.025633
Japan,127.061,4602367.0,377930,99.99,Asia,36221.712406
United Kingdom,64.511,2950039.0,242495,99.99,Europe,45729.239975
United States,318.523,17348075.0,9525067,99.99,America,54464.12033


# Statistical info

In [32]:
df.head()

Unnamed: 0,Population,GDP,Surface,HDI,Continent,GDPPP
Canada,35.467,1785387.0,9984670,99.99,America,50339.385908
France,63.951,2833687.0,640679,99.99,Europe,44310.284437
Germany,80.94,3874437.0,357114,99.99,Europe,47868.013343
Italy,60.665,2167744.0,301336,99.99,Europe,35733.025633
Japan,127.061,4602367.0,377930,99.99,Asia,36221.712406


In [33]:
df.describe()

Unnamed: 0,Population,GDP,Surface,HDI,GDPPP
count,7.0,7.0,7.0,7.0,7.0
mean,107.302571,5080248.0,3061327.0,99.99,44952.254576
std,97.24997,5494020.0,4576187.0,0.0,6954.983875
min,35.467,1785387.0,242495.0,99.99,35733.025633
25%,62.308,2500716.0,329225.0,99.99,40265.998421
50%,64.511,2950039.0,377930.0,99.99,45729.239975
75%,104.0005,4238402.0,5082873.0,99.99,49103.699626
max,318.523,17348080.0,9984670.0,99.99,54464.12033


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, Canada to United States
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Population  7 non-null      float64
 1   GDP         7 non-null      float64
 2   Surface     7 non-null      int64  
 3   HDI         7 non-null      float64
 4   Continent   7 non-null      object 
 5   GDPPP       7 non-null      float64
dtypes: float64(4), int64(1), object(1)
memory usage: 692.0+ bytes


In [36]:
population = df["Population"]

In [38]:
population.max(), population.min()

(np.float64(318.523), np.float64(35.467))

In [40]:
population.sum()

np.float64(751.118)

In [41]:
population.std()

np.float64(97.24996987121581)

In [42]:
population.mean()

np.float64(107.30257142857144)

In [43]:
population.median()

np.float64(64.511)

In [45]:
population.to_frame()

Unnamed: 0,Population
Canada,35.467
France,63.951
Germany,80.94
Italy,60.665
Japan,127.061
United Kingdom,64.511
United States,318.523


In [49]:
df.loc['China'] = pd.Series([])

In [50]:
df

Unnamed: 0,Population,GDP,Surface,HDI,Continent,GDPPP
Canada,35.467,1785387.0,9984670.0,99.99,America,50339.385908
France,63.951,2833687.0,640679.0,99.99,Europe,44310.284437
Germany,80.94,3874437.0,357114.0,99.99,Europe,47868.013343
Italy,60.665,2167744.0,301336.0,99.99,Europe,35733.025633
Japan,127.061,4602367.0,377930.0,99.99,Asia,36221.712406
United Kingdom,64.511,2950039.0,242495.0,99.99,Europe,45729.239975
United States,318.523,17348075.0,9525067.0,99.99,America,54464.12033
China,,,,,,
