In [2]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
frame1 = pd.DataFrame({
    'key1': ['a','a','b','b','a'],
    'key2': ['one','two','one','two','one'],
    'data1': np.random.randn(5),
    'data2': np.random.randn(5)    
})

In [4]:
frame1

Unnamed: 0,key1,key2,data1,data2
0,a,one,-0.468967,1.805104
1,a,two,0.659412,0.064954
2,b,one,-1.077594,-0.58308
3,b,two,-0.416233,-2.061643
4,a,one,0.55283,-1.680217


In [6]:
group1 = frame1['data1'].groupby(frame1['key1'])

In [7]:
group1

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000024D16DD1DF0>

In [8]:
group1.mean()

key1
a    0.247758
b   -0.746914
Name: data1, dtype: float64

In [9]:
group2 = frame1['data1'].groupby([frame1['key1'],frame1['key2']])

In [10]:
group2

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000024D16E87760>

In [12]:
mean2 = group2.mean()

In [14]:
mean2.index

MultiIndex([('a', 'one'),
            ('a', 'two'),
            ('b', 'one'),
            ('b', 'two')],
           names=['key1', 'key2'])

In [15]:
mean2.unstack()

key2,one,two
key1,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0.041931,0.659412
b,-1.077594,-0.416233


In [16]:
frame1.groupby(['key1','key2']).size()

key1  key2
a     one     2
      two     1
b     one     1
      two     1
dtype: int64

In [17]:
for name, group in frame1.groupby('key1'):
    print(name)
    print(group)

a
  key1 key2     data1     data2
0    a  one -0.468967  1.805104
1    a  two  0.659412  0.064954
4    a  one  0.552830 -1.680217
b
  key1 key2     data1     data2
2    b  one -1.077594 -0.583080
3    b  two -0.416233 -2.061643


In [18]:
for (k1,k2), group in frame1.groupby(['key1','key2']):
    print((k1,k2))
    print(group)

('a', 'one')
  key1 key2     data1     data2
0    a  one -0.468967  1.805104
4    a  one  0.552830 -1.680217
('a', 'two')
  key1 key2     data1     data2
1    a  two  0.659412  0.064954
('b', 'one')
  key1 key2     data1    data2
2    b  one -1.077594 -0.58308
('b', 'two')
  key1 key2     data1     data2
3    b  two -0.416233 -2.061643


In [19]:
pieces = dict(list(frame1.groupby('key1')))

In [20]:
pieces

{'a':   key1 key2     data1     data2
 0    a  one -0.468967  1.805104
 1    a  two  0.659412  0.064954
 4    a  one  0.552830 -1.680217,
 'b':   key1 key2     data1     data2
 2    b  one -1.077594 -0.583080
 3    b  two -0.416233 -2.061643}

In [21]:
len(pieces)

2

In [22]:
pieces["a"]

Unnamed: 0,key1,key2,data1,data2
0,a,one,-0.468967,1.805104
1,a,two,0.659412,0.064954
4,a,one,0.55283,-1.680217


In [23]:
pieces["b"]

Unnamed: 0,key1,key2,data1,data2
2,b,one,-1.077594,-0.58308
3,b,two,-0.416233,-2.061643


In [24]:
countries = pd.read_json("countries.json")
countries2 = countries.dropna()
countries3 = countries2.drop(columns=["cities","localName","capital"])
countries3.rename(columns={"_id": "code"}, inplace=True)

In [25]:
countries_by_continent = countries3.groupby('continent')

In [27]:
for continent, ctrys in countries_by_continent:
    print(continent)
    print(ctrys['name'])

Africa
2                                     Angola
17                                   Burundi
19                                     Benin
20                              Burkina Faso
35                                  Botswana
36                  Central African Republic
42                             Côte d’Ivoire
43                                  Cameroon
44     Congo, The Democratic Republic of the
45                                     Congo
48                                   Comoros
49                                Cape Verde
57                                  Djibouti
61                                   Algeria
63                                     Egypt
64                                   Eritrea
68                                  Ethiopia
75                                     Gabon
78                                     Ghana
80                                    Guinea
82                                    Gambia
83                             Guinea-Bissau
84 

In [29]:
le_by_government_and_continent = countries3["lifeExpectancy"].groupby([countries3["governmentForm"],countries3["continent"]])

In [30]:
for (gform, continent), lifeExp in le_by_government_and_continent:
    print((gform, continent))
    print(f"{lifeExp}")

('Constitutional Monarchy', 'Africa')
125    50.8
130    69.1
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy', 'Asia')
108    77.4
109    80.7
113    56.5
160    57.8
207    68.6
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy', 'Europe')
59     76.5
66     78.8
76     77.7
123    78.8
127    77.1
131    78.8
158    78.3
159    78.7
200    79.6
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy', 'North America')
13     70.5
24     71.1
27     70.9
31     73.0
86     64.5
107    75.2
115    70.7
122    72.3
226    72.3
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy', 'Oceania')
162    77.8
170    63.1
190    71.3
216    66.3
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy (Emirate)', 'Asia')
117    76.1
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy, Federation', 'Asia')
149    70.8
Name: lifeExpectancy, dtype: float64
('Constitutional Monarchy, Federation', 'Europe')
18    77.8
Name: lifeExpectan

In [33]:
le_by_government_and_continent.mean().unstack()

continent,Africa,Asia,Europe,North America,Oceania,South America
governmentForm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Constitutional Monarchy,59.95,68.2,78.255556,71.166667,69.625,
Constitutional Monarchy (Emirate),,76.1,,,,
"Constitutional Monarchy, Federation",,70.8,77.8,79.4,79.8,
Emirate Federation,,74.1,,,,
Federal Republic,53.3,62.7,73.24,74.3,68.6,70.366667
Federation,,,79.6,,,
Islamic Emirate,,45.9,,,,
Islamic Republic,56.6,69.7,,,,
Monarchy,40.4,64.2,,,67.9,
Monarchy (Emirate),,73.0,,,,


In [34]:
countries_by_government_and_continent = countries3.groupby([countries3["governmentForm"],countries3["continent"]])

In [35]:
countries_by_government_and_continent.mean().unstack()

Unnamed: 0_level_0,gnp,gnp,gnp,gnp,gnp,gnp,lifeExpectancy,lifeExpectancy,lifeExpectancy,lifeExpectancy,...,surfaceArea,surfaceArea,surfaceArea,surfaceArea,indepYear,indepYear,indepYear,indepYear,indepYear,indepYear
continent,Africa,Asia,Europe,North America,Oceania,South America,Africa,Asia,Europe,North America,...,Europe,North America,Oceania,South America,Africa,Asia,Europe,North America,Oceania,South America
governmentForm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Constitutional Monarchy,18592.5,784174.6,318625.222222,1704.0,14961.25,,59.95,68.2,78.255556,71.166667,...,178912.1,5573.222,190586.0,,1961.0,1271.6,1468.222222,1975.333333,1959.5,
Constitutional Monarchy (Emirate),,27037.0,,,,,,76.1,,,...,,,,,,1961.0,,,,
"Constitutional Monarchy, Federation",,69213.0,249704.0,598862.0,351182.0,,,70.8,77.8,79.4,...,30530.0,9970622.0,7741232.0,,,1957.0,1830.0,1867.0,1901.0,
Emirate Federation,,37966.0,,,,,,74.1,,,...,,,,,,1971.0,,,,
Federal Republic,34728.5,225620.5,528335.2,4462836.0,212.0,404000.0,53.3,62.7,73.24,74.3,...,3533940.0,5660872.0,714.0,4079963.0,1960.0,1969.0,1954.8,1793.0,1990.0,1816.333333
Federation,,,264478.0,,,,,,79.6,,...,41296.0,,,,,,1499.0,,,
Islamic Emirate,,5976.0,,,,,,45.9,,,...,,,,,,1919.0,,,,
Islamic Republic,10162.0,195746.0,,,,,56.6,69.7,,,...,,,,,1956.0,1906.0,,,,
Monarchy,1206.0,49159.666667,,,146.0,,40.4,64.2,,,...,,,662.0,,1968.0,1937.666667,,,1970.0,
Monarchy (Emirate),,6366.0,,,,,,73.0,,,...,,,,,,1971.0,,,,


In [36]:
countries3.groupby("continent").size()

continent
Africa           53
Asia             47
Europe           41
North America    23
Oceania          14
South America    12
dtype: int64