In [76]:
#imports libraries and file
import pandas as pd
df = pd.read_csv('countries_of_the_world.csv')
pd.set_option('display.max_rows', 5000)

Data from https://www.kaggle.com/fernandol/countries-of-the-world

## Assess

#### Column changes
- Change columns names with spaces and capitalizations
- GDP column per capita?
- Change datatypes of columns with new values
- Columns with commas instead of decimal place: 
  - Pop. Density (per sq. mi.)
  - Coastline (coast/area ratio)
  - Net migration
  - Infant mortality (per 1000 births)
  - Literacy (%)
  - Phones (per 1000)
  - Arable (%)
  - Crops (%)
  - Other (%)
  - Birthrate
  - Deathrate
  - Agriculture
  - Industry
  - Service

#### Other Changes
- Double entry for Dem. Repub. of the Congo with different values

## Clean
#### Deletes double entry for Democratic Republic of the Congo

In [77]:
#drops extra row of Congo data by index number
df = df.drop([46])

### Column Changes
#### Change Column Names

In [78]:
#changes column names
old_names = ('Country', 'Region', 'Population', 'Area (sq. mi.)', 'Pop. Density (per sq. mi.)', 'Coastline (coast/area ratio)', 'Net migration', 'Infant mortality (per 1000 births)', 'GDP ($ per capita)', 'Literacy (%)', 'Phones (per 1000)', 'Arable (%)', 'Crops (%)', 'Other (%)', 'Climate', 'Birthrate', 'Deathrate', 'Agriculture', 'Industry', 'Service')
new_names = ('country', 'region', 'population', 'area_sqm', 'pop_density_sqm', 'coastline_ratio', 'net_migration', 'infant_mortality_per_thousand', 'gdp', 'literacy', 'phones_per_thousand', 'arable', 'crops', 'other', 'climate', 'birthrate', 'deathrate', 'agriculture', 'industry', 'service')
df.rename(columns=dict(zip(old_names, new_names)), inplace=True)

#### Change Commas and Datatypes

In [79]:
#changes commas to periods and datatype to float
# use a loop?
df['pop_density_sqm'] = df['pop_density_sqm'].str.replace(',','.').astype(float)
df['coastline_ratio'] = df['coastline_ratio'].str.replace(',','.').astype(float)
df['net_migration'] = df['net_migration'].str.replace(',','.').astype(float)
df['infant_mortality_per_thousand'] = df['infant_mortality_per_thousand'].str.replace(',','.').astype(float)
df['literacy'] = df['literacy'].str.replace(',','.').astype(float)
df['phones_per_thousand'] = df['phones_per_thousand'].str.replace(',','.').astype(float)
df['arable'] = df['arable'].str.replace(',','.').astype(float)
df['crops'] = df['crops'].str.replace(',','.').astype(float)
df['other'] = df['other'].str.replace(',','.').astype(float)
df['birthrate'] = df['birthrate'].str.replace(',','.').astype(float)
df['deathrate'] = df['deathrate'].str.replace(',','.').astype(float)
df['agriculture'] = df['agriculture'].str.replace(',','.').astype(float)
df['industry'] = df['industry'].str.replace(',','.').astype(float)
df['service'] = df['service'].str.replace(',','.').astype(float)

#### Creates new column for new population change

In [80]:
#adds column of net population change
df['net_pop_change'] = df.birthrate - df.deathrate

In [88]:
df.population.mean()

28851071.845132742

## Analysis
Do Countries with highest infant mortality have highest birthrates?

In [81]:
df.groupby('country')['birthrate'].mean()

country
Afghanistan                          46.60
Albania                              15.11
Algeria                              17.14
American Samoa                       22.46
Andorra                               8.71
Angola                               45.11
Anguilla                             14.17
Antigua & Barbuda                    16.93
Argentina                            16.73
Armenia                              12.07
Aruba                                11.03
Australia                            12.14
Austria                               8.74
Azerbaijan                           20.74
Bahamas, The                         17.57
Bahrain                              17.80
Bangladesh                           29.80
Barbados                             12.71
Belarus                              11.16
Belgium                              10.38
Belize                               28.84
Benin                                38.85
Bermuda                              11.40
Bhu

In [82]:
df.groupby('country')['infant_mortality_per_thousand'].mean()

country
Afghanistan                          163.07
Albania                               21.52
Algeria                               31.00
American Samoa                         9.27
Andorra                                4.05
Angola                               191.19
Anguilla                              21.03
Antigua & Barbuda                     19.46
Argentina                             15.18
Armenia                               23.28
Aruba                                  5.89
Australia                              4.69
Austria                                4.66
Azerbaijan                            81.74
Bahamas, The                          25.21
Bahrain                               17.27
Bangladesh                            62.60
Barbados                              12.50
Belarus                               13.37
Belgium                                4.68
Belize                                25.69
Benin                                 85.00
Bermuda                 

In [83]:
#writes code to csv
df.to_csv('country_data.csv')

### References

- https://www.kaggle.com/fernandol/countries-of-the-world
- https://en.wikipedia.org/wiki/Birth_rate
- https://stackoverflow.com/questions/38101009/changing-multiple-column-names-but-not-all-of-them-panda-python
- https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.groupby.html
- https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort_values.html
- https://erikrood.com/Python_References/dropping_rows_cols_pandas.html
- https://www.google.com/publicdata/explore?ds=d5bncppjof8f9_&met_y=sp_pop_totl&hl=en&dl=en#!ctype=l&strail=false&bcs=d&nselm=h&met_y=sp_pop_totl&scale_y=lin&ind_y=false&rdim=region&idim=country:COG&ifdim=region&tdim=true&hl=en_US&dl=en&ind=false
- https://countryeconomy.com/demography/literacy-rate/democratic-republic-congo
- https://en.wikipedia.org/wiki/Democratic_Republic_of_the_Congo