In [3]:
import pandas as pd
import numpy as np

In [4]:
ser1 = pd.Series( [10, 20, 30] )
ser1

0    10
1    20
2    30
dtype: int64

In [5]:
ser1.index = ['a', 'b', 'c']
ser1

a    10
b    20
c    30
dtype: int64

In [6]:
ser2 = pd.Series( {'a' : 100, 'b' : 200, 'c' : 300} )
ser2

a    100
b    200
c    300
dtype: int64

In [7]:
ser3 = pd.Series( data = [1,2,3,4], index = ['USA', 'Germany', 'Fance', 'Japan'])
ser3

USA        1
Germany    2
Fance      3
Japan      4
dtype: int64

In [8]:
ser4 = pd.Series( [1,2,5,4], index = ['USA', 'Germany', 'Italy', 'Japan'])
ser4

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [9]:
ser3 + ser4

Fance      NaN
Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
dtype: float64

In [10]:
df = pd.read_csv('gapminder.csv', index_col="Unnamed: 0")

In [11]:
df.head()

Unnamed: 0,year,continent,country,income,life_exp,population
0,2014,asia,Philippines,6598.0,70.7,100102249.0
1,2014,americas,Paraguay,8038.0,74.3,6552584.0
2,2014,asia,Palau,14078.0,,21094.0
3,2014,asia,Pakistan,4619.0,65.6,185546257.0
4,2014,americas,St.-Pierre-et-Miquelon,,,6277.0


In [12]:
df['income'][:5]

0     6598.0
1     8038.0
2    14078.0
3     4619.0
4        NaN
Name: income, dtype: float64

In [13]:
df['gross_income'] = df['income'] * df['population']
df

Unnamed: 0,year,continent,country,income,life_exp,population,gross_income
0,2014,asia,Philippines,6598.0,70.7,100102249.0,6.604746e+11
1,2014,americas,Paraguay,8038.0,74.3,6552584.0,5.266967e+10
2,2014,asia,Palau,14078.0,,21094.0,2.969613e+08
3,2014,asia,Pakistan,4619.0,65.6,185546257.0,8.570382e+11
4,2014,americas,St.-Pierre-et-Miquelon,,,6277.0,
...,...,...,...,...,...,...,...
54484,1800,americas,St.-Pierre-et-Miquelon,,,1782.0,
54485,1800,europe,Svalbard,,,50.0,
54486,1800,asia,Tokelau,,,1009.0,
54487,1800,asia,United Korea (former),,,13740000.0,


In [14]:
df.drop( labels='gross_income', axis = 1, inplace = True )
df

Unnamed: 0,year,continent,country,income,life_exp,population
0,2014,asia,Philippines,6598.0,70.7,100102249.0
1,2014,americas,Paraguay,8038.0,74.3,6552584.0
2,2014,asia,Palau,14078.0,,21094.0
3,2014,asia,Pakistan,4619.0,65.6,185546257.0
4,2014,americas,St.-Pierre-et-Miquelon,,,6277.0
...,...,...,...,...,...,...
54484,1800,americas,St.-Pierre-et-Miquelon,,,1782.0
54485,1800,europe,Svalbard,,,50.0
54486,1800,asia,Tokelau,,,1009.0
54487,1800,asia,United Korea (former),,,13740000.0


In [15]:
df.loc[3]

year                 2014
continent            asia
country          Pakistan
income               4619
life_exp             65.6
population    1.85546e+08
Name: 3, dtype: object

In [16]:
df.loc[4, 'country']

'St.-Pierre-et-Miquelon'

In [17]:
df.loc[ [10, 100, 1000], ['continent', 'country'] ]

Unnamed: 0,continent,country
10,asia,Papua New Guinea
100,africa,Namibia
1000,africa,Cape Verde


In [18]:
df[ df['income'] > 50000 ]

Unnamed: 0,year,continent,country,income,life_exp,population
6,2014,europe,Norway,64020.0,82.00,5140311.0
27,2014,asia,"Macao, China",142893.0,80.61,588781.0
53,2014,asia,"Hong Kong, China",52552.0,83.56,7194563.0
56,2014,europe,Luxembourg,88203.0,82.10,556316.0
77,2014,asia,Kuwait,83394.0,80.20,3782450.0
...,...,...,...,...,...,...
15196,1954,asia,Brunei,57771.0,58.83,60120.0
15455,1953,asia,Brunei,56876.0,58.22,56968.0
15629,1952,asia,Brunei,55994.0,57.60,53927.0
15951,1951,asia,Brunei,55126.0,56.99,50961.0


In [19]:
df[ (df['income'] > 50000) & (df['life_exp'] > 80) ].head()

Unnamed: 0,year,continent,country,income,life_exp,population
6,2014,europe,Norway,64020.0,82.0,5140311.0
27,2014,asia,"Macao, China",142893.0,80.61,588781.0
53,2014,asia,"Hong Kong, China",52552.0,83.56,7194563.0
56,2014,europe,Luxembourg,88203.0,82.1,556316.0
77,2014,asia,Kuwait,83394.0,80.2,3782450.0


In [20]:
df.set_index('year')

Unnamed: 0_level_0,continent,country,income,life_exp,population
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014,asia,Philippines,6598.0,70.7,100102249.0
2014,americas,Paraguay,8038.0,74.3,6552584.0
2014,asia,Palau,14078.0,,21094.0
2014,asia,Pakistan,4619.0,65.6,185546257.0
2014,americas,St.-Pierre-et-Miquelon,,,6277.0
...,...,...,...,...,...
1800,americas,St.-Pierre-et-Miquelon,,,1782.0
1800,europe,Svalbard,,,50.0
1800,asia,Tokelau,,,1009.0
1800,asia,United Korea (former),,,13740000.0


In [21]:
df.reset_index()

Unnamed: 0,index,year,continent,country,income,life_exp,population
0,0,2014,asia,Philippines,6598.0,70.7,100102249.0
1,1,2014,americas,Paraguay,8038.0,74.3,6552584.0
2,2,2014,asia,Palau,14078.0,,21094.0
3,3,2014,asia,Pakistan,4619.0,65.6,185546257.0
4,4,2014,americas,St.-Pierre-et-Miquelon,,,6277.0
...,...,...,...,...,...,...,...
54484,54484,1800,americas,St.-Pierre-et-Miquelon,,,1782.0
54485,54485,1800,europe,Svalbard,,,50.0
54486,54486,1800,asia,Tokelau,,,1009.0
54487,54487,1800,asia,United Korea (former),,,13740000.0


In [22]:
df['income'].fillna(value = 0).head()

0     6598.0
1     8038.0
2    14078.0
3     4619.0
4        0.0
Name: income, dtype: float64

In [23]:
by_year = df.groupby('year')
by_year

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001A570A3AB88>

In [24]:
by_year.mean()

Unnamed: 0_level_0,income,life_exp,population
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1800,946.288557,31.486020,4.149630e+06
1801,946.661692,31.448905,4.167524e+06
1802,949.452736,31.463483,4.185563e+06
1803,949.194030,31.377413,4.203777e+06
1804,950.751244,31.446318,4.222136e+06
...,...,...,...
2010,17559.950739,70.969904,2.948360e+07
2011,18019.333333,71.324375,3.048871e+07
2012,18127.674877,71.663077,3.085723e+07
2013,18305.502463,71.916106,3.122610e+07


In [25]:
by_year.describe().head()

Unnamed: 0_level_0,income,income,income,income,income,income,income,income,life_exp,life_exp,life_exp,life_exp,life_exp,population,population,population,population,population,population,population,population
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1800,201.0,946.288557,502.228769,350.0,608.0,850.0,1097.0,4235.0,201.0,31.48602,...,33.9,42.85,254.0,4149630.0,23208470.0,0.0,31750.0,396432.5,1989821.0,321675014.0
1801,201.0,946.661692,500.413744,350.0,608.0,852.0,1101.0,4161.0,201.0,31.448905,...,33.9,40.3,254.0,4167524.0,23369510.0,0.0,31750.0,396432.5,1996964.5,324408863.0
1802,201.0,949.452736,510.208133,350.0,608.0,853.0,1105.0,4391.0,201.0,31.463483,...,33.9,44.37,254.0,4185563.0,23532040.0,0.0,31750.0,396432.5,1998352.0,327165946.0
1803,201.0,949.19403,505.071586,350.0,609.0,854.0,1110.0,4297.0,201.0,31.377413,...,33.8,44.84,254.0,4203777.0,23696070.0,0.0,31750.0,396432.5,1999743.5,329946461.0
1804,201.0,950.751244,512.245476,350.0,609.0,854.0,1114.0,4502.0,201.0,31.446318,...,33.87,42.83,254.0,4222136.0,23861610.0,0.0,31750.0,396432.5,2003416.25,332750607.0


In [26]:
by_year.describe()['income']

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1800,201.0,946.288557,502.228769,350.0,608.00,850.0,1097.0,4235.0
1801,201.0,946.661692,500.413744,350.0,608.00,852.0,1101.0,4161.0
1802,201.0,949.452736,510.208133,350.0,608.00,853.0,1105.0,4391.0
1803,201.0,949.194030,505.071586,350.0,609.00,854.0,1110.0,4297.0
1804,201.0,950.751244,512.245476,350.0,609.00,854.0,1114.0,4502.0
...,...,...,...,...,...,...,...,...
2010,203.0,17559.950739,19685.759131,614.0,3394.00,10515.0,24702.0,127984.0
2011,204.0,18019.333333,20420.280357,614.0,3508.75,11049.0,25232.0,133734.0
2012,203.0,18127.674877,20504.649685,616.0,3675.00,11046.0,24876.0,130990.0
2013,203.0,18305.502463,20782.622473,584.0,3788.00,11405.0,25029.5,136540.0


In [27]:
df['continent'].unique()

array(['asia', 'americas', 'europe', 'africa'], dtype=object)

In [28]:
df['continent'].nunique()

4

In [29]:
df['continent'].value_counts()

asia        16304
europe      14335
africa      12679
americas    11171
Name: continent, dtype: int64

In [30]:
asia_df = df[ df['continent'] == 'asia' ]
europe_df = df[ df['continent'] == 'europe' ]
africa_df = df[ df['continent'] == 'africa' ]
america_df = df[ df['continent'] == 'america' ]

In [31]:
df.groupby('continent').describe()

Unnamed: 0_level_0,year,year,year,year,year,year,year,year,income,income,...,life_exp,life_exp,population,population,population,population,population,population,population,population
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
continent,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
africa,12679.0,1906.950548,62.039946,1800.0,1853.0,1907.0,1961.0,2014.0,11610.0,1690.989922,...,43.035,79.89,12679.0,4597465.0,10699910.0,1997.0,378231.5,1462023.0,3996569.5,176460500.0
americas,11171.0,1906.915406,62.02061,1800.0,1853.0,1907.0,1961.0,2014.0,8601.0,4785.90722,...,60.265,81.7,11170.0,5382707.0,23636450.0,100.0,29409.0,182820.5,1675228.75,317718800.0
asia,16304.0,1906.768523,61.9388,1800.0,1853.0,1907.0,1960.0,2014.0,12900.0,4964.615349,...,54.05,83.56,16304.0,19425100.0,95394400.0,0.0,34729.25,563606.0,4797171.75,1390110000.0
europe,14335.0,1906.489431,61.785256,1800.0,1853.0,1906.0,1960.0,2014.0,10325.0,7291.621308,...,67.945,84.8,14329.0,10514250.0,25318130.0,50.0,184610.0,2501071.0,8464787.0,291429600.0


In [32]:
df[ df['continent'] == 'europe' ].groupby('country').describe()['life_exp']

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Akrotiri and Dhekelia,0.0,,,,,,,
Albania,215.0,46.402558,16.117605,19.43,35.400,35.40,63.395,77.90
Andorra,25.0,83.652000,1.055194,81.70,82.800,84.10,84.600,84.80
Armenia,215.0,46.354140,16.408810,22.63,34.000,36.20,67.095,74.50
Austria,215.0,50.223256,17.193117,31.39,34.400,41.53,69.130,81.20
...,...,...,...,...,...,...,...,...
Ukraine,215.0,47.049256,15.262505,9.05,36.600,36.60,67.245,71.97
United Kingdom,215.0,55.329023,14.535273,37.32,41.685,49.11,70.790,80.90
West Germany,0.0,,,,,,,
Yugoslavia,0.0,,,,,,,


In [33]:
df.columns

Index(['year', 'continent', 'country', 'income', 'life_exp', 'population'], dtype='object')

In [34]:
df.sort_values('life_exp')

Unnamed: 0,year,continent,country,income,life_exp,population
24449,1918,asia,Samoa,2047.0,1.0,36589.0
35272,1875,asia,Fiji,949.0,1.0,128877.0
24487,1918,asia,French Polynesia,,1.0,34036.0
49628,1819,africa,Tunisia,718.0,1.5,874347.0
31424,1890,africa,Ethiopia,537.0,4.0,7911486.0
...,...,...,...,...,...,...
54484,1800,americas,St.-Pierre-et-Miquelon,,,1782.0
54485,1800,europe,Svalbard,,,50.0
54486,1800,asia,Tokelau,,,1009.0
54487,1800,asia,United Korea (former),,,13740000.0


In [35]:
df.sort_values('life_exp', ascending = False)

Unnamed: 0,year,continent,country,income,life_exp,population
373,2013,europe,Andorra,43735.0,84.8,80788.0
117,2014,europe,Andorra,44929.0,84.8,79223.0
959,2010,europe,Andorra,38982.0,84.7,84449.0
602,2012,europe,Andorra,41926.0,84.7,82431.0
724,2011,europe,Andorra,41958.0,84.7,83751.0
...,...,...,...,...,...,...
54484,1800,americas,St.-Pierre-et-Miquelon,,,1782.0
54485,1800,europe,Svalbard,,,50.0
54486,1800,asia,Tokelau,,,1009.0
54487,1800,asia,United Korea (former),,,13740000.0


In [38]:
df.pivot_table(values = 'income', index = ['year', 'continent'])

Unnamed: 0_level_0,Unnamed: 1_level_0,income
year,continent,Unnamed: 2_level_1
1800,africa,626.833333
1800,americas,1023.900000
1800,asia,853.183333
1800,europe,1366.127660
1801,africa,627.351852
...,...,...
2013,europe,29154.428571
2014,africa,5367.425926
2014,americas,18010.750000
2014,asia,22030.183333
