In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('data/gapminder_gdp_oceania.csv')

In [3]:
print(data)

       country  gdpPercap_1952  gdpPercap_1957  gdpPercap_1962  \
0    Australia     10039.59564     10949.64959     12217.22686   
1  New Zealand     10556.57566     12247.39532     13175.67800   

   gdpPercap_1967  gdpPercap_1972  gdpPercap_1977  gdpPercap_1982  \
0     14526.12465     16788.62948     18334.19751     19477.00928   
1     14463.91893     16046.03728     16233.71770     17632.41040   

   gdpPercap_1987  gdpPercap_1992  gdpPercap_1997  gdpPercap_2002  \
0     21888.88903     23424.76683     26997.93657     30687.75473   
1     19007.19129     18363.32494     21050.41377     23189.80135   

   gdpPercap_2007  
0     34435.36744  
1     25185.00911  


That's not great looking.  Let's use a jupyter notebook specific way to create a better looking table.  

In [4]:
from IPython.display import display

In [5]:
display(data)
# note - you can also just do this by writing the df name without calling print

Unnamed: 0,country,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972,gdpPercap_1977,gdpPercap_1982,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
0,Australia,10039.59564,10949.64959,12217.22686,14526.12465,16788.62948,18334.19751,19477.00928,21888.88903,23424.76683,26997.93657,30687.75473,34435.36744
1,New Zealand,10556.57566,12247.39532,13175.678,14463.91893,16046.03728,16233.7177,17632.4104,19007.19129,18363.32494,21050.41377,23189.80135,25185.00911


Notice the index (0, 1).  This is, by default, how you refer to a row.  Let's say we want the row that holds information about New Zealand.  Use iloc, for integer based location. 

In [6]:
display(data.iloc[1])

country           New Zealand
gdpPercap_1952        10556.6
gdpPercap_1957        12247.4
gdpPercap_1962        13175.7
gdpPercap_1967        14463.9
gdpPercap_1972          16046
gdpPercap_1977        16233.7
gdpPercap_1982        17632.4
gdpPercap_1987        19007.2
gdpPercap_1992        18363.3
gdpPercap_1997        21050.4
gdpPercap_2002        23189.8
gdpPercap_2007          25185
Name: 1, dtype: object

But suppose we want to get information by country name (ie., look up as "Australia" rather than 0.  

Use index_col to specify that a column’s values should be used as row headings. 
* Row headings are numbers (0 and 1 in this case).
* Really want to index by country.
* Pass the name of the column to read_csv as its index_col parameter to do this.

In [7]:
data = pd.read_csv('data/gapminder_gdp_oceania.csv', index_col='country')
display(data)

Unnamed: 0_level_0,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972,gdpPercap_1977,gdpPercap_1982,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Australia,10039.59564,10949.64959,12217.22686,14526.12465,16788.62948,18334.19751,19477.00928,21888.88903,23424.76683,26997.93657,30687.75473,34435.36744
New Zealand,10556.57566,12247.39532,13175.678,14463.91893,16046.03728,16233.7177,17632.4104,19007.19129,18363.32494,21050.41377,23189.80135,25185.00911


Now we can use **named location**, loc, rather than **integer location**, iloc

In [8]:
display(data.loc["New Zealand"])

gdpPercap_1952    10556.57566
gdpPercap_1957    12247.39532
gdpPercap_1962    13175.67800
gdpPercap_1967    14463.91893
gdpPercap_1972    16046.03728
gdpPercap_1977    16233.71770
gdpPercap_1982    17632.41040
gdpPercap_1987    19007.19129
gdpPercap_1992    18363.32494
gdpPercap_1997    21050.41377
gdpPercap_2002    23189.80135
gdpPercap_2007    25185.00911
Name: New Zealand, dtype: float64

A review of some utilities on pandas dataframes

In [9]:
print(data.shape)

(2, 12)


In [10]:
print(data.columns)

Index(['gdpPercap_1952', 'gdpPercap_1957', 'gdpPercap_1962', 'gdpPercap_1967',
       'gdpPercap_1972', 'gdpPercap_1977', 'gdpPercap_1982', 'gdpPercap_1987',
       'gdpPercap_1992', 'gdpPercap_1997', 'gdpPercap_2002', 'gdpPercap_2007'],
      dtype='object')


In [11]:
print(data.T)

country           Australia  New Zealand
gdpPercap_1952  10039.59564  10556.57566
gdpPercap_1957  10949.64959  12247.39532
gdpPercap_1962  12217.22686  13175.67800
gdpPercap_1967  14526.12465  14463.91893
gdpPercap_1972  16788.62948  16046.03728
gdpPercap_1977  18334.19751  16233.71770
gdpPercap_1982  19477.00928  17632.41040
gdpPercap_1987  21888.88903  19007.19129
gdpPercap_1992  23424.76683  18363.32494
gdpPercap_1997  26997.93657  21050.41377
gdpPercap_2002  30687.75473  23189.80135
gdpPercap_2007  34435.36744  25185.00911


In [12]:
print(data.T.loc["gdpPercap_2002"])

country
Australia      30687.75473
New Zealand    23189.80135
Name: gdpPercap_2002, dtype: float64


In [13]:
data.describe()

Unnamed: 0,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972,gdpPercap_1977,gdpPercap_1982,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
count,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
mean,10298.08565,11598.522455,12696.45243,14495.02179,16417.33338,17283.957605,18554.70984,20448.04016,20894.045885,24024.17517,26938.77804,29810.188275
std,365.560078,917.644806,677.727301,43.986086,525.09198,1485.263517,1304.328377,2037.668013,3578.979883,4205.533703,5301.85368,6540.991104
min,10039.59564,10949.64959,12217.22686,14463.91893,16046.03728,16233.7177,17632.4104,19007.19129,18363.32494,21050.41377,23189.80135,25185.00911
25%,10168.840645,11274.086022,12456.839645,14479.47036,16231.68533,16758.837652,18093.56012,19727.615725,19628.685413,22537.29447,25064.289695,27497.598692
50%,10298.08565,11598.522455,12696.45243,14495.02179,16417.33338,17283.957605,18554.70984,20448.04016,20894.045885,24024.17517,26938.77804,29810.188275
75%,10427.330655,11922.958888,12936.065215,14510.57322,16602.98143,17809.077557,19015.85956,21168.464595,22159.406358,25511.05587,28813.266385,32122.777857
max,10556.57566,12247.39532,13175.678,14526.12465,16788.62948,18334.19751,19477.00928,21888.88903,23424.76683,26997.93657,30687.75473,34435.36744


**Exercises**

Read the data in gapminder_gdp_americas.csv (which should be in the same directory as gapminder_gdp_oceania.csv) into a variable called americas and display its summary statistics.

After reading the data for the Americas, use help(americas.head) and help(americas.tail) to find out what DataFrame.head and DataFrame.tail do.
* What method call will display the first rows of this data?
* What method call will display the last columns of this data? (Hint: you may need to change your view of the data.)

In [14]:
americas = pd.read_csv('data/gapminder_gdp_americas.csv', index_col='country')

In [15]:
display(americas.head())

Unnamed: 0_level_0,continent,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972,gdpPercap_1977,gdpPercap_1982,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Argentina,Americas,5911.315053,6856.856212,7133.166023,8052.953021,9443.038526,10079.02674,8997.897412,9139.671389,9308.41871,10967.28195,8797.640716,12779.37964
Bolivia,Americas,2677.326347,2127.686326,2180.972546,2586.886053,2980.331339,3548.097832,3156.510452,2753.69149,2961.699694,3326.143191,3413.26269,3822.137084
Brazil,Americas,2108.944355,2487.365989,3336.585802,3429.864357,4985.711467,6660.118654,7030.835878,7807.095818,6950.283021,7957.980824,8131.212843,9065.800825
Canada,Americas,11367.16112,12489.95006,13462.48555,16076.58803,18970.57086,22090.88306,22898.79214,26626.51503,26342.88426,28954.92589,33328.96507,36319.23501
Chile,Americas,3939.978789,4315.622723,4519.094331,5106.654313,5494.024437,4756.763836,5095.665738,5547.063754,7596.125964,10118.05318,10778.78385,13171.63885


In [16]:
display(americas.tail())

Unnamed: 0_level_0,continent,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972,gdpPercap_1977,gdpPercap_1982,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Puerto Rico,Americas,3081.959785,3907.156189,5108.34463,6929.277714,9123.041742,9770.524921,10330.98915,12281.34191,14641.58711,16999.4333,18855.60618,19328.70901
Trinidad and Tobago,Americas,3023.271928,4100.3934,4997.523971,5621.368472,6619.551419,7899.554209,9119.528607,7388.597823,7370.990932,8792.573126,11460.60023,18008.50924
United States,Americas,13990.48208,14847.12712,16173.14586,19530.36557,21806.03594,24072.63213,25009.55914,29884.35041,32003.93224,35767.43303,39097.09955,42951.65309
Uruguay,Americas,5716.766744,6150.772969,5603.357717,5444.61962,5703.408898,6504.339663,6920.223051,7452.398969,8137.004775,9230.240708,7727.002004,10611.46299
Venezuela,Americas,7689.799761,9802.466526,8422.974165,9541.474188,10505.25966,13143.95095,11152.41011,9883.584648,10733.92631,10165.49518,8605.047831,11415.80569


In [17]:
americas_t = americas.T

In [18]:
display(americas_t.tail())

country,Argentina,Bolivia,Brazil,Canada,Chile,Colombia,Costa Rica,Cuba,Dominican Republic,Ecuador,...,Mexico,Nicaragua,Panama,Paraguay,Peru,Puerto Rico,Trinidad and Tobago,United States,Uruguay,Venezuela
gdpPercap_1987,9139.67,2753.69,7807.1,26626.5,5547.06,4903.22,5629.92,7532.92,2899.84,6481.78,...,8688.16,2955.98,7034.78,3998.88,6360.94,12281.3,7388.6,29884.4,7452.4,9883.58
gdpPercap_1992,9308.42,2961.7,6950.28,26342.9,7596.13,5444.65,6160.42,5592.84,3044.21,7103.7,...,9472.38,2170.15,6618.74,4196.41,4446.38,14641.6,7370.99,32003.9,8137.0,10733.9
gdpPercap_1997,10967.3,3326.14,7957.98,28954.9,10118.1,6117.36,6677.05,5431.99,3614.1,7429.46,...,9767.3,2253.02,7113.69,4247.4,5838.35,16999.4,8792.57,35767.4,9230.24,10165.5
gdpPercap_2002,8797.64,3413.26,8131.21,33329.0,10778.8,5755.26,7723.45,6340.65,4563.81,5773.04,...,10742.4,2474.55,7356.03,3783.67,5909.02,18855.6,11460.6,39097.1,7727.0,8605.05
gdpPercap_2007,12779.4,3822.14,9065.8,36319.2,13171.6,7006.58,9645.06,8948.1,6025.37,6873.26,...,11977.6,2749.32,9809.19,4172.84,7408.91,19328.7,18008.5,42951.7,10611.5,11415.8


**Back to selection and working with dataframes**

read in the data set for europe

In [19]:
data = pd.read_csv('data/gapminder_gdp_europe.csv', index_col='country')
display(data)

Unnamed: 0_level_0,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972,gdpPercap_1977,gdpPercap_1982,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Albania,1601.056136,1942.284244,2312.888958,2760.196931,3313.422188,3533.00391,3630.880722,3738.932735,2497.437901,3193.054604,4604.211737,5937.029526
Austria,6137.076492,8842.59803,10750.72111,12834.6024,16661.6256,19749.4223,21597.08362,23687.82607,27042.01868,29095.92066,32417.60769,36126.4927
Belgium,8343.105127,9714.960623,10991.20676,13149.04119,16672.14356,19117.97448,20979.84589,22525.56308,25575.57069,27561.19663,30485.88375,33692.60508
Bosnia and Herzegovina,973.533195,1353.989176,1709.683679,2172.352423,2860.16975,3528.481305,4126.613157,4314.114757,2546.781445,4766.355904,6018.975239,7446.298803
Bulgaria,2444.286648,3008.670727,4254.337839,5577.0028,6597.494398,7612.240438,8224.191647,8239.854824,6302.623438,5970.38876,7696.777725,10680.79282
Croatia,3119.23652,4338.231617,5477.890018,6960.297861,9164.090127,11305.38517,13221.82184,13822.58394,8447.794873,9875.604515,11628.38895,14619.22272
Czech Republic,6876.14025,8256.343918,10136.86713,11399.44489,13108.4536,14800.16062,15377.22855,16310.4434,14297.02122,16048.51424,17596.21022,22833.30851
Denmark,9692.385245,11099.65935,13583.31351,15937.21123,18866.20721,20422.9015,21688.04048,25116.17581,26406.73985,29804.34567,32166.50006,35278.41874
Finland,6424.519071,7545.415386,9371.842561,10921.63626,14358.8759,15605.42283,18533.15761,21141.01223,20647.16499,23723.9502,28204.59057,33207.0844
France,7029.809327,8662.834898,10560.48553,12999.91766,16107.19171,18292.63514,20293.89746,22066.44214,24703.79615,25889.78487,28926.03234,30470.0167


To get a single value

In [20]:
print(data.loc["Albania", "gdpPercap_1952"])

1601.056136


Use : on its own to mean all columns or all rows.
(this will do the same as getting Albania without specifying columns)

In [21]:
print(data.loc["Albania", :])

gdpPercap_1952    1601.056136
gdpPercap_1957    1942.284244
gdpPercap_1962    2312.888958
gdpPercap_1967    2760.196931
gdpPercap_1972    3313.422188
gdpPercap_1977    3533.003910
gdpPercap_1982    3630.880722
gdpPercap_1987    3738.932735
gdpPercap_1992    2497.437901
gdpPercap_1997    3193.054604
gdpPercap_2002    4604.211737
gdpPercap_2007    5937.029526
Name: Albania, dtype: float64


Selecting multiple columns with named columns and rows

Note that this is inclusive of start and end parameters, unlike lists.
The behavior is different for iloc and loc (why do you think this is the case?)

In [22]:
print(data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972'])

             gdpPercap_1962  gdpPercap_1967  gdpPercap_1972
country                                                    
Italy           8243.582340    10022.401310    12269.273780
Montenegro      4649.593785     5907.850937     7778.414017
Netherlands    12790.849560    15363.251360    18794.745670
Norway         13450.401510    16361.876470    18965.055510
Poland          5338.752143     6557.152776     8006.506993


Result of slicing can be used in further operations.

In [23]:
print(data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972'].max())

gdpPercap_1962    13450.40151
gdpPercap_1967    16361.87647
gdpPercap_1972    18965.05551
dtype: float64


**Use comparisons to select data based on value.**
* Comparison is applied element by element.
* Returns a similarly-shaped dataframe of True and False.

In [24]:
subset = data.loc['Italy':'Poland', 'gdpPercap_1962':'gdpPercap_1972']

In [25]:
display(subset)

Unnamed: 0_level_0,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Italy,8243.58234,10022.40131,12269.27378
Montenegro,4649.593785,5907.850937,7778.414017
Netherlands,12790.84956,15363.25136,18794.74567
Norway,13450.40151,16361.87647,18965.05551
Poland,5338.752143,6557.152776,8006.506993


In [26]:
display(subset > 10000)

Unnamed: 0_level_0,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Italy,False,True,True
Montenegro,False,False,False
Netherlands,True,True,True
Norway,True,True,True
Poland,False,False,False


That's useful, but suppose we actually want to do an operation on the actual numbers, not just know whether they are or are not in the subset...

**Select values or NaN using a Boolean mask.**
* A frame full of Booleans is sometimes called a mask because of how it can be used.

In [27]:
mask = subset > 10000
display(subset[mask])

Unnamed: 0_level_0,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Italy,,10022.40131,12269.27378
Montenegro,,,
Netherlands,12790.84956,15363.25136,18794.74567
Norway,13450.40151,16361.87647,18965.05551
Poland,,,


this is wordier, but may make it easier to understand

In [28]:
subset[subset > 10000]

Unnamed: 0_level_0,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Italy,,10022.40131,12269.27378
Montenegro,,,
Netherlands,12790.84956,15363.25136,18794.74567
Norway,13450.40151,16361.87647,18965.05551
Poland,,,


In [29]:
subset[subset > 1000].describe()

Unnamed: 0,gdpPercap_1962,gdpPercap_1967,gdpPercap_1972
count,5.0,5.0,5.0
mean,8894.635868,10842.506571,13162.799194
std,4093.410673,4855.106424,5517.298708
min,4649.593785,5907.850937,7778.414017
25%,5338.752143,6557.152776,8006.506993
50%,8243.58234,10022.40131,12269.27378
75%,12790.84956,15363.25136,18794.74567
max,13450.40151,16361.87647,18965.05551


Pandas **vectorizing methods and grouping operations** are features that provide users much flexibility to analyse their data.

For instance, let’s say we want to have a clearer view on how the european countries split themselves according to their GDP.

* We may have a glance by splitting the countries in two groups during the years surveyed, those who presented a GDP higher than the european average and those with a lower GDP.
* We then estimate a wealthy score based on the historical (from 1962 to 2007) values, where we account how many times a country has participated in the groups of lower or higher GDP

### Exercise

* GDP per capita for all countries in 1982.
* GDP per capita for Denmark for all years.
* GDP per capita for all countries for years after 1985.
* GDP per capita for each country in 2007 as a multiple of GDP per capita for that country in * 1952.

In [30]:
data['gdpPercap_1982']

country
Albania                    3630.880722
Austria                   21597.083620
Belgium                   20979.845890
Bosnia and Herzegovina     4126.613157
Bulgaria                   8224.191647
Croatia                   13221.821840
Czech Republic            15377.228550
Denmark                   21688.040480
Finland                   18533.157610
France                    20293.897460
Germany                   22031.532740
Greece                    15268.420890
Hungary                   12545.990660
Iceland                   23269.607500
Ireland                   12618.321410
Italy                     16537.483500
Montenegro                11222.587620
Netherlands               21399.460460
Norway                    26298.635310
Poland                     8451.531004
Portugal                  11753.842910
Romania                    9605.314053
Serbia                    15181.092700
Slovak Republic           11348.545850
Slovenia                  17866.721750
Spain            

In [31]:
data.loc['Denmark',:]

gdpPercap_1952     9692.385245
gdpPercap_1957    11099.659350
gdpPercap_1962    13583.313510
gdpPercap_1967    15937.211230
gdpPercap_1972    18866.207210
gdpPercap_1977    20422.901500
gdpPercap_1982    21688.040480
gdpPercap_1987    25116.175810
gdpPercap_1992    26406.739850
gdpPercap_1997    29804.345670
gdpPercap_2002    32166.500060
gdpPercap_2007    35278.418740
Name: Denmark, dtype: float64

In [36]:
# notice there is no 1985...
data.loc[:,'gdpPercap_1985':]

Unnamed: 0_level_0,gdpPercap_1987,gdpPercap_1992,gdpPercap_1997,gdpPercap_2002,gdpPercap_2007
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Albania,3738.932735,2497.437901,3193.054604,4604.211737,5937.029526
Austria,23687.82607,27042.01868,29095.92066,32417.60769,36126.4927
Belgium,22525.56308,25575.57069,27561.19663,30485.88375,33692.60508
Bosnia and Herzegovina,4314.114757,2546.781445,4766.355904,6018.975239,7446.298803
Bulgaria,8239.854824,6302.623438,5970.38876,7696.777725,10680.79282
Croatia,13822.58394,8447.794873,9875.604515,11628.38895,14619.22272
Czech Republic,16310.4434,14297.02122,16048.51424,17596.21022,22833.30851
Denmark,25116.17581,26406.73985,29804.34567,32166.50006,35278.41874
Finland,21141.01223,20647.16499,23723.9502,28204.59057,33207.0844
France,22066.44214,24703.79615,25889.78487,28926.03234,30470.0167


In [37]:
data['gdpPercap_2007']/data['gdpPercap_1952']

country
Albania                   3.708196
Austria                   5.886596
Belgium                   4.038377
Bosnia and Herzegovina    7.648736
Bulgaria                  4.369697
Croatia                   4.686795
Czech Republic            3.320658
Denmark                   3.639808
Finland                   5.168805
France                    4.334402
Germany                   4.503060
Greece                    7.799725
Hungary                   3.421364
Iceland                   4.978308
Ireland                   7.806873
Italy                     5.793425
Montenegro                3.495221
Netherlands               4.115376
Norway                    4.889067
Poland                    3.819475
Portugal                  6.684325
Romania                   3.437140
Serbia                    2.732555
Slovak Republic           3.680703
Slovenia                  6.113405
Spain                     7.517163
Sweden                    3.970493
Switzerland               2.545529
Turkey      

In [38]:
# other operations
# corr

In [81]:
data.loc['Italy',:].corr(data.loc['France',:])

0.9936652019196104

In [None]:
# Exercise - Find some countries with lower correlations, see what they are. 

In [44]:
#https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html

In [47]:
# Sometimes you want to see things side by side. 

51323684.26761847

In [50]:
# stacking, vertical and horizontal

In [108]:
# stack the DataFrames on top of each other
df_IP = data.loc['Italy':'Poland', 'gdpPercap_1952':'gdpPercap_1962']
df_PS = data.loc['Portugal':'Spain':, 'gdpPercap_1952':'gdpPercap_1962']
#verticalStack = pd.concat([data.loc[:, 'gdpPercap_1952':'gdpPercap_1962'], data.loc[:, 'gdpPercap_1982':'gdpPercap_1992']], axis=0,)

In [109]:
df_IP

Unnamed: 0_level_0,gdpPercap_1952,gdpPercap_1957,gdpPercap_1962
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Italy,4931.404155,6248.656232,8243.58234
Montenegro,2647.585601,3682.259903,4649.593785
Netherlands,8941.571858,11276.19344,12790.84956
Norway,10095.42172,11653.97304,13450.40151
Poland,4029.329699,4734.253019,5338.752143


In [97]:
df62_72.T

country,Portugal,Romania,Serbia,Slovak Republic,Slovenia,Spain
gdpPercap_1962,4727.954889,4734.997586,6289.629157,7481.107598,7402.303395,5693.843879
gdpPercap_1967,6361.517993,6470.866545,7991.707066,8412.902397,9405.489397,7993.512294
gdpPercap_1972,9022.247417,8011.414402,10522.06749,9674.167626,12383.4862,10638.75131


In [104]:
pd.concat([df52_62.T, df62_72.T], sort=False)

Unnamed: 0,Italy,Montenegro,Netherlands,Norway,Poland,Portugal,Romania,Serbia,Slovak Republic,Slovenia,Spain
gdpPercap_1952,4931.404155,2647.585601,8941.571858,10095.42172,4029.329699,,,,,,
gdpPercap_1957,6248.656232,3682.259903,11276.19344,11653.97304,4734.253019,,,,,,
gdpPercap_1962,8243.58234,4649.593785,12790.84956,13450.40151,5338.752143,,,,,,
gdpPercap_1962,,,,,,4727.954889,4734.997586,6289.629157,7481.107598,7402.303395,5693.843879
gdpPercap_1967,,,,,,6361.517993,6470.866545,7991.707066,8412.902397,9405.489397,7993.512294
gdpPercap_1972,,,,,,9022.247417,8011.414402,10522.06749,9674.167626,12383.4862,10638.75131


In [65]:
# place the DataFrames side by side
horizontalStack = pd.concat([data.loc['Denmark',:], data.loc['Portugal',:]], axis=1)

In [66]:
horizontalStack

Unnamed: 0,Denmark,Portugal
gdpPercap_1952,9692.385245,3068.319867
gdpPercap_1957,11099.65935,3774.571743
gdpPercap_1962,13583.31351,4727.954889
gdpPercap_1967,15937.21123,6361.517993
gdpPercap_1972,18866.20721,9022.247417
gdpPercap_1977,20422.9015,10172.48572
gdpPercap_1982,21688.04048,11753.84291
gdpPercap_1987,25116.17581,13039.30876
gdpPercap_1992,26406.73985,16207.26663
gdpPercap_1997,29804.34567,17641.03156
