# Missing Values Overview

In [139]:
import pandas as pd
import numpy as np

WorldData = pd.read_csv("WorldMarketData.csv", na_values = '..')
df_original = pd.DataFrame(WorldData)
pd.options.display.float_format = '{:,.2f}'.format
df_original.columns = ['country', 'country_code', 'series', 'series_code', '1994', '1995', '1996', '1997', '1998', 
             '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', 
             '2012', '2013', '2014', '2015', '2016', '2017', '2018']

df = df_original.drop(columns=['country_code', 'series_code'])
df = df.drop(df.index[3800:])
df1 = pd.melt(df,
                       ["country", 'series'],
                       var_name="year",
                       value_name="value")
#df1 = df1.sort_values(by=["year"], ascending = True)
df1 = df1.set_index('country').drop(index = ['World', 'Low income', 'Middle income', 'High income'])
df1 = df1.reset_index()
df1.head()

Unnamed: 0,country,series,year,value
0,United States,Agricultural raw materials exports (% of merch...,1994,3.49
1,United States,Agricultural raw materials imports (% of merch...,1994,2.04
2,United States,Average time to clear exports through customs ...,1994,
3,United States,Bribery incidence (% of firms experiencing at ...,1994,
4,United States,Changes in inventories (current US$),1994,63785000000.0


In [140]:
#Copy the Untidy WMDA Data (df)
df_overview = df.copy()

In [141]:
#Add a Total Missing Values Column
df_overview['Missing_Values'] = df_overview.apply(lambda x: x.isna().sum(), axis=1)

In [142]:
#Only display the country, series, and Missing_Values column
#Sort the missing data overview by the most missing values across the years
#Either of the below methods achieves the same result

df_overview = df_overview[['country', 'series', 'Missing_Values']]
df_overview = df_overview.set_index('country')
#df_overview = df_overview[df_overview.Missing_Values != 0]
df_overview = df_overview.sort_values(by = "Missing_Values", ascending = False)
df_overview.head(10)

Unnamed: 0_level_0,series,Missing_Values
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Luxembourg,Average time to clear exports through customs ...,25
Italy,"Presence of peace keepers (number of troops, p...",25
Zimbabwe,Research and development expenditure (% of GDP),25
Zimbabwe,Researchers in R&D (per million people),25
Italy,Public private partnerships investment in ener...,25
Italy,Public private partnerships investment in ICT ...,25
Zimbabwe,Technicians in R&D (per million people),25
Belgium,Public private partnerships investment in ICT ...,25
Belgium,Public private partnerships investment in ener...,25
Belgium,"Presence of peace keepers (number of troops, p...",25


# Find the Years with the least NaN values/ least missing entries

In [143]:
#Find the years with the least NaN values/least missing entries
df.isna().sum().sort_values(ascending = True).to_frame()

Unnamed: 0,0
country,0
series,0
2014,546
2016,560
2010,966
2015,974
2012,981
2007,997
2017,1159
2013,1372


# Finding Series with the least NaN Values

In [144]:
#Finding Series with the least NaN Values
df = df.set_index('series')
series_counts = df.isna().sum(axis = 1).sort_values(ascending = True).to_frame()
series_totals = series_counts.groupby('series').sum()
series_totals_sorted = series_totals.sort_values(0, ascending = True)
df = df.reset_index()
series_totals_sorted.head(20)

Unnamed: 0_level_0,0
series,Unnamed: 1_level_1
GDP per capita (current US$),0
GDP (current US$),0
GNI (current US$),6
GDP per capita growth (annual %),7
GDP growth (annual %),7
"GNI, Atlas method (current US$)",14
"GNI per capita, Atlas method (current US$)",14
Trade (% of GDP),18
Imports of goods and services (% of GDP),18
Exports of goods and services (% of GDP),18


# Finding Country with the least NaN Values


In [145]:
#Finding Country with the least NaN Values
df = df.set_index('country')
country_counts = df.isna().sum(axis = 1).sort_values(ascending = True).to_frame()

#Total Missing Values Per Country
country_totals = country_counts.groupby('country').sum()
country_totals_sorted = country_totals.sort_values(0, ascending = True)
df = df.reset_index()
country_totals_sorted.head(20)

Unnamed: 0_level_0,0
country,Unnamed: 1_level_1
Argentina,612
Colombia,654
"Korea, Rep.",663
Germany,664
Singapore,668
United Kingdom,668
Costa Rica,669
Peru,669
Spain,670
Mexico,672


# Missing Values (by Years) Overview

In [146]:
df_mv = df_overview.reset_index()
df_mv = df_mv.pivot_table(index = ['country'], columns = 'series', values='Missing_Values')
df_mv = df_mv.reset_index()
df_mv = df_mv.set_index('country')
df_mv.head()

series,Agricultural raw materials exports (% of merchandise exports),Agricultural raw materials imports (% of merchandise imports),Average time to clear exports through customs (days),Bribery incidence (% of firms experiencing at least one bribe payment request),Changes in inventories (current US$),Commercial service exports (current US$),Commercial service imports (current US$),"Computer, communications and other services (% of commercial service exports)","Computer, communications and other services (% of commercial service imports)",Cost of business start-up procedures (% of GNI per capita),...,"Tariff rate, applied, simple mean, all products (%)","Tariff rate, most favored nation, weighted mean, all products (%)",Technicians in R&D (per million people),Time required to build a warehouse (days),Time required to start a business (days),"Time to export, border compliance (hours)","Time to export, documentary compliance (hours)","Time to import, border compliance (hours)","Time to import, documentary compliance (hours)",Trade (% of GDP)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Argentina,0,0,22,22,0,0,0,0,0,9,...,2,2,5,11,9,20,20,20,20,0
Australia,0,0,25,25,0,0,0,0,0,9,...,3,3,18,11,9,20,20,20,20,0
Bahrain,5,5,25,25,1,1,1,1,1,13,...,6,6,24,13,13,20,20,20,20,0
Belgium,0,0,25,25,0,8,8,8,8,9,...,1,1,9,11,9,20,20,20,20,0
Brazil,1,1,24,24,0,0,0,0,0,19,...,1,1,18,19,19,20,20,20,20,0


Had previously tried:
>`df_mv.reindex(df_mv.mean().sort_values(ascending = False).index, axis=1 )`

But it did not work because it got rid of the country labels

In [147]:
#Shows the number of years missing the value for that given series, for the given country
#For series wherein almost all the countries are missing over 20 years of data... these series should not be included.

rows_index=df_mv.mean(axis=1).sort_values(ascending=False).index
col_index=df_mv.mean().sort_values(ascending=False).index
dfmv=df_mv.loc[rows_index,col_index]
dfmv.head()

series,"Presence of peace keepers (number of troops, police, and military observers in mandate)",Public private partnerships investment in ICT (current US$),Firms that spend on R&D (% of firms),Ease of doing business index (1=most business-friendly regulations),Bribery incidence (% of firms experiencing at least one bribe payment request),Average time to clear exports through customs (days),"Lead time to import, median case (days)","Lead time to export, median case (days)",Logistics performance index: Quality of trade and transport-related infrastructure (1=low to 5=high),Logistics performance index: Ability to track and trace consignments (1=low to 5=high),...,Exports of goods and services (% of GDP),Imports of goods and services (% of GDP),Trade (% of GDP),"GNI per capita, Atlas method (current US$)","GNI, Atlas method (current US$)",GDP growth (annual %),GDP per capita growth (annual %),GNI (current US$),GDP (current US$),GDP per capita (current US$)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Low income,25,25,24,25,24,24,20,20,20,20,...,4,4,4,0,0,0,0,0,0,0
United Arab Emirates,25,25,25,24,25,25,20,20,20,20,...,7,7,7,6,6,0,0,6,0,0
Qatar,25,25,25,24,25,25,20,20,20,20,...,1,1,1,8,8,7,7,0,0,0
"Congo, Dem. Rep.",16,24,24,24,22,22,23,22,21,21,...,0,0,0,0,0,0,0,0,0,0
Middle income,25,25,24,25,24,24,20,20,20,20,...,0,0,0,0,0,0,0,0,0,0


In [148]:
#Drop first 10 columns because they are missing roughly all the years of data for that series
dfmv.drop(dfmv.iloc[:, 0:10], inplace = True, axis = 1)
dfmv.head()

series,Logistics performance index: Ease of arranging competitively priced shipments (1=low to 5=high),Logistics performance index: Efficiency of customs clearance process (1=low to 5=high),Logistics performance index: Frequency with which shipments reach consignee within scheduled or expected time (1=low to 5=high),Logistics performance index: Overall (1=low to 5=high),Logistics performance index: Competence and quality of logistics services (1=low to 5=high),"Time to import, documentary compliance (hours)","Cost to export, border compliance (US$)","Cost to export, documentary compliance (US$)","Cost to import, documentary compliance (US$)","Cost to import, border compliance (US$)",...,Exports of goods and services (% of GDP),Imports of goods and services (% of GDP),Trade (% of GDP),"GNI per capita, Atlas method (current US$)","GNI, Atlas method (current US$)",GDP growth (annual %),GDP per capita growth (annual %),GNI (current US$),GDP (current US$),GDP per capita (current US$)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Low income,20,20,20,20,20,20,20,20,20,20,...,4,4,4,0,0,0,0,0,0,0
United Arab Emirates,20,20,20,20,20,20,20,20,20,20,...,7,7,7,6,6,0,0,6,0,0
Qatar,20,20,20,20,20,20,20,20,20,20,...,1,1,1,8,8,7,7,0,0,0
"Congo, Dem. Rep.",21,21,21,21,21,20,20,20,20,20,...,0,0,0,0,0,0,0,0,0,0
Middle income,20,20,20,20,20,20,20,20,20,20,...,0,0,0,0,0,0,0,0,0,0


# Stacked Data for Visibility and Formatting Flexibility

In [149]:
#Get the series as the columns using Stack (could have used pivot if desired)

df_stack = df1.set_index(['series', 'country'])
df_stack_all = df_stack

dfsa = df_stack_all.reset_index()
dfsa = dfsa.pivot_table(index = ['country', 'year'], columns = 'series', values='value')
dfsa = dfsa.reset_index()
dfsa.head()

series,country,year,Agricultural raw materials exports (% of merchandise exports),Agricultural raw materials imports (% of merchandise imports),Average time to clear exports through customs (days),Bribery incidence (% of firms experiencing at least one bribe payment request),Changes in inventories (current US$),Commercial service exports (current US$),Commercial service imports (current US$),"Computer, communications and other services (% of commercial service exports)",...,"Tariff rate, applied, simple mean, all products (%)","Tariff rate, most favored nation, weighted mean, all products (%)",Technicians in R&D (per million people),Time required to build a warehouse (days),Time required to start a business (days),"Time to export, border compliance (hours)","Time to export, documentary compliance (hours)","Time to import, border compliance (hours)","Time to import, documentary compliance (hours)",Trade (% of GDP)
0,Argentina,1994,3.36,1.62,,,68602700.0,3180600000.0,6856100000.0,10.34,...,,,,,,,,,,18.13
1,Argentina,1995,4.32,2.03,,,1451852800.0,3664500000.0,6961200000.0,11.16,...,12.7,11.41,,,,,,,,19.77
2,Argentina,1996,3.81,1.91,,,4195708700.0,4239554642.49,7623604066.22,12.05,...,14.45,13.4,,,,,,,,21.51
3,Argentina,1997,2.84,1.55,,,4263816100.0,4430771471.36,8675186708.79,10.91,...,14.44,13.93,159.12,,,,,,,23.34
4,Argentina,1998,2.11,1.5,,,3096528400.0,4694769562.61,9047117536.8,10.48,...,16.73,15.67,169.89,,,,,,,23.35


There are many series with too many NaN values, so we will drop these using the code below

In [150]:
#Drop columns with certain number of NaN
dfsa = dfsa.dropna(thresh=len(dfsa) - 1000, axis=1)
dfsa.head()

series,country,year,Agricultural raw materials exports (% of merchandise exports),Agricultural raw materials imports (% of merchandise imports),Changes in inventories (current US$),Commercial service exports (current US$),Commercial service imports (current US$),"Computer, communications and other services (% of commercial service exports)","Computer, communications and other services (% of commercial service imports)",Cost of business start-up procedures (% of GNI per capita),...,"Tariff rate, applied, simple mean, all products (%)","Tariff rate, most favored nation, weighted mean, all products (%)",Technicians in R&D (per million people),Time required to build a warehouse (days),Time required to start a business (days),"Time to export, border compliance (hours)","Time to export, documentary compliance (hours)","Time to import, border compliance (hours)","Time to import, documentary compliance (hours)",Trade (% of GDP)
0,Argentina,1994,3.36,1.62,68602700.0,3180600000.0,6856100000.0,10.34,15.34,,...,,,,,,,,,,18.13
1,Argentina,1995,4.32,2.03,1451852800.0,3664500000.0,6961200000.0,11.16,15.46,,...,12.7,11.41,,,,,,,,19.77
2,Argentina,1996,3.81,1.91,4195708700.0,4239554642.49,7623604066.22,12.05,15.61,,...,14.45,13.4,,,,,,,,21.51
3,Argentina,1997,2.84,1.55,4263816100.0,4430771471.36,8675186708.79,10.91,15.94,,...,14.44,13.93,159.12,,,,,,,23.34
4,Argentina,1998,2.11,1.5,3096528400.0,4694769562.61,9047117536.8,10.48,16.9,,...,16.73,15.67,169.89,,,,,,,23.35


In [151]:
# Set the display option so that we can see all the missing values across the all series
pd.set_option('display.max_rows', 1000)
series_totals_sorted

Unnamed: 0_level_0,0
series,Unnamed: 1_level_1
GDP per capita (current US$),0
GDP (current US$),0
GNI (current US$),6
GDP per capita growth (annual %),7
GDP growth (annual %),7
"GNI, Atlas method (current US$)",14
"GNI per capita, Atlas method (current US$)",14
Trade (% of GDP),18
Imports of goods and services (% of GDP),18
Exports of goods and services (% of GDP),18


In [152]:
#Reset Display option
pd.reset_option('display.max_rows')

Now that we have removed some of the series that were missing a critical amount of data, we will be even more selective.

## 2014 Isolation

MCMC and Linear Interpolation

In [170]:
#Least missing values is year 2014
dfsa14 = dfsa[dfsa['year'] == '2014']
dfsa14 = dfsa14.drop(columns = ['year'])
dfsa14.set_index('country', inplace = True)
dfsa14.head()

series,Agricultural raw materials exports (% of merchandise exports),Agricultural raw materials imports (% of merchandise imports),Changes in inventories (current US$),Commercial service exports (current US$),Commercial service imports (current US$),"Computer, communications and other services (% of commercial service exports)","Computer, communications and other services (% of commercial service imports)",Cost of business start-up procedures (% of GNI per capita),"Cost to export, border compliance (US$)","Cost to export, documentary compliance (US$)",...,"Tariff rate, applied, simple mean, all products (%)","Tariff rate, most favored nation, weighted mean, all products (%)",Technicians in R&D (per million people),Time required to build a warehouse (days),Time required to start a business (days),"Time to export, border compliance (hours)","Time to export, documentary compliance (hours)","Time to import, border compliance (hours)","Time to import, documentary compliance (hours)",Trade (% of GDP)
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Argentina,1.07,0.89,6752296832.26,13189837804.76,17628881981.13,43.11,37.89,17.7,150.0,60.0,...,12.58,11.5,318.81,342.5,24.5,21.0,30.0,60.0,336.0,28.41
Australia,2.58,0.68,-2035251996.7,55610991575.62,66749264533.55,22.6,28.32,0.7,749.0,264.0,...,2.84,2.69,,112.0,2.5,36.0,7.0,37.0,3.0,42.51
Bahrain,0.03,1.04,414893617.02,8571276595.74,6764414893.62,25.26,15.41,0.8,47.0,100.0,...,3.75,3.99,17.21,174.0,9.3,79.0,24.0,54.0,84.0,175.56
Belgium,1.39,1.27,1714139993.32,123026103755.97,117578947480.15,59.37,52.46,5.2,0.0,0.0,...,2.58,2.81,,212.0,4.0,0.0,1.0,0.0,1.0,164.7
Brazil,4.01,0.99,16587335316.62,39046701888.6,85915546867.55,62.78,50.47,5.5,862.0,226.4,...,13.74,9.92,,434.0,83.6,61.0,30.0,51.1,146.1,24.69


In [171]:
#From the following information, we can see that many series still have several NaN values across the countries
# And this is only in year 2014
dfsa14.info()

<class 'pandas.core.frame.DataFrame'>
Index: 46 entries, Argentina to Zimbabwe
Data columns (total 70 columns):
Agricultural raw materials exports (% of merchandise exports)                                                                      45 non-null float64
Agricultural raw materials imports (% of merchandise imports)                                                                      45 non-null float64
Changes in inventories (current US$)                                                                                               43 non-null float64
Commercial service exports (current US$)                                                                                           45 non-null float64
Commercial service imports (current US$)                                                                                           45 non-null float64
Computer, communications and other services (% of commercial service exports)                                                      45

> locate columns that are missing a certain percentage of the data and consider how/if these should be imputed

In [172]:
# Percentage of missing data
dfsa14 = dfsa14.reset_index()
dfsa14_missing = dfsa14.isnull().sum() * 100 / len(dfsa14)
dfsa14_missing.head(50)

series
country                                                                                                                            0.00
Agricultural raw materials exports (% of merchandise exports)                                                                      2.17
Agricultural raw materials imports (% of merchandise imports)                                                                      2.17
Changes in inventories (current US$)                                                                                               6.52
Commercial service exports (current US$)                                                                                           2.17
Commercial service imports (current US$)                                                                                           2.17
Computer, communications and other services (% of commercial service exports)                                                      2.17
Computer, communications and other servic

#locate columns where the number of nan values are greater than or equal to 25% of that column for all 'series'
dfsa14.loc[dfsa14['series'] == nan >= 25% ]

if count(nan) in 'series' is > (.25 * (count(x)))
    return loc[x]

In [159]:
dfsa14.head()

series,country,Agricultural raw materials exports (% of merchandise exports),Agricultural raw materials imports (% of merchandise imports),Changes in inventories (current US$),Commercial service exports (current US$),Commercial service imports (current US$),"Computer, communications and other services (% of commercial service exports)","Computer, communications and other services (% of commercial service imports)",Cost of business start-up procedures (% of GNI per capita),"Cost to export, border compliance (US$)",...,"Tariff rate, applied, simple mean, all products (%)","Tariff rate, most favored nation, weighted mean, all products (%)",Technicians in R&D (per million people),Time required to build a warehouse (days),Time required to start a business (days),"Time to export, border compliance (hours)","Time to export, documentary compliance (hours)","Time to import, border compliance (hours)","Time to import, documentary compliance (hours)",Trade (% of GDP)
0,Argentina,1.07,0.89,6752296832.26,13189837804.76,17628881981.13,43.11,37.89,17.7,150.0,...,12.58,11.5,318.81,342.5,24.5,21.0,30.0,60.0,336.0,28.41
1,Australia,2.58,0.68,-2035251996.7,55610991575.62,66749264533.55,22.6,28.32,0.7,749.0,...,2.84,2.69,,112.0,2.5,36.0,7.0,37.0,3.0,42.51
2,Bahrain,0.03,1.04,414893617.02,8571276595.74,6764414893.62,25.26,15.41,0.8,47.0,...,3.75,3.99,17.21,174.0,9.3,79.0,24.0,54.0,84.0,175.56
3,Belgium,1.39,1.27,1714139993.32,123026103755.97,117578947480.15,59.37,52.46,5.2,0.0,...,2.58,2.81,,212.0,4.0,0.0,1.0,0.0,1.0,164.7
4,Brazil,4.01,0.99,16587335316.62,39046701888.6,85915546867.55,62.78,50.47,5.5,862.0,...,13.74,9.92,,434.0,83.6,61.0,30.0,51.1,146.1,24.69


In [160]:
dfsa14.isnull().sum(axis=1)

0      2
1      5
2      5
3      3
4      1
5      4
6      4
7      7
8      1
9     14
10     1
11     6
12     6
13     2
14     2
15     2
16     7
17     2
18     5
19     4
20     7
21     3
22     3
23     3
24     2
25     2
26     3
27     2
28    10
29     3
30     4
31     3
32     5
33     6
34     2
35     8
36     7
37     0
38     6
39     2
40     2
41     5
42     9
43     2
44     5
45     9
dtype: int64

My initial thought process was the following:
> `dfsa14.dropna(axis= 1, thresh= (.75 * dfsa14.apply(lambda x: x.count(), axis=0), inplace = True)`

But the Lambda call in this case takes too long/isnt computationally efficient (or necessary)

In [173]:
dfsa14.head()

series,country,Agricultural raw materials exports (% of merchandise exports),Agricultural raw materials imports (% of merchandise imports),Changes in inventories (current US$),Commercial service exports (current US$),Commercial service imports (current US$),"Computer, communications and other services (% of commercial service exports)","Computer, communications and other services (% of commercial service imports)",Cost of business start-up procedures (% of GNI per capita),"Cost to export, border compliance (US$)",...,"Tariff rate, applied, simple mean, all products (%)","Tariff rate, most favored nation, weighted mean, all products (%)",Technicians in R&D (per million people),Time required to build a warehouse (days),Time required to start a business (days),"Time to export, border compliance (hours)","Time to export, documentary compliance (hours)","Time to import, border compliance (hours)","Time to import, documentary compliance (hours)",Trade (% of GDP)
0,Argentina,1.07,0.89,6752296832.26,13189837804.76,17628881981.13,43.11,37.89,17.7,150.0,...,12.58,11.5,318.81,342.5,24.5,21.0,30.0,60.0,336.0,28.41
1,Australia,2.58,0.68,-2035251996.7,55610991575.62,66749264533.55,22.6,28.32,0.7,749.0,...,2.84,2.69,,112.0,2.5,36.0,7.0,37.0,3.0,42.51
2,Bahrain,0.03,1.04,414893617.02,8571276595.74,6764414893.62,25.26,15.41,0.8,47.0,...,3.75,3.99,17.21,174.0,9.3,79.0,24.0,54.0,84.0,175.56
3,Belgium,1.39,1.27,1714139993.32,123026103755.97,117578947480.15,59.37,52.46,5.2,0.0,...,2.58,2.81,,212.0,4.0,0.0,1.0,0.0,1.0,164.7
4,Brazil,4.01,0.99,16587335316.62,39046701888.6,85915546867.55,62.78,50.47,5.5,862.0,...,13.74,9.92,,434.0,83.6,61.0,30.0,51.1,146.1,24.69
