# Convert ISO3 codes to country names - Unemployment rates in the EU

## Import libraries, insall and import `country_converter`

Useful links:
https://notebook.community/konstantinstadler/country_converter/doc/country_converter_examples

In [1]:
import pandas as pd
import numpy as np
import country_converter as coco

In [2]:
# read csv and create data frame
unemp_df = pd.read_csv("Unemployment_EU.csv")

In [3]:
unemp_df.head(10)

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes
0,AUT,HUR,TOT,PC_LF,M,2002-11,4.4,
1,AUT,HUR,TOT,PC_LF,M,2002-12,4.5,
2,AUT,HUR,TOT,PC_LF,M,2003-01,4.5,
3,AUT,HUR,TOT,PC_LF,M,2003-02,4.5,
4,AUT,HUR,TOT,PC_LF,M,2003-03,4.6,
5,AUT,HUR,TOT,PC_LF,M,2003-04,4.7,
6,AUT,HUR,TOT,PC_LF,M,2003-05,4.8,
7,AUT,HUR,TOT,PC_LF,M,2003-06,4.9,
8,AUT,HUR,TOT,PC_LF,M,2003-07,4.8,
9,AUT,HUR,TOT,PC_LF,M,2003-08,4.8,


In [4]:
unemp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15972 entries, 0 to 15971
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   LOCATION    15972 non-null  object 
 1   INDICATOR   15972 non-null  object 
 2   SUBJECT     15972 non-null  object 
 3   MEASURE     15972 non-null  object 
 4   FREQUENCY   15972 non-null  object 
 5   TIME        15972 non-null  object 
 6   Value       15972 non-null  float64
 7   Flag Codes  36 non-null     object 
dtypes: float64(1), object(7)
memory usage: 998.4+ KB


## Convert ISO3 to Country names

In [5]:
converter = coco.CountryConverter()

In [6]:
iso3_codes = unemp_df["LOCATION"]

In [7]:
iso3_codes

0        AUT
1        AUT
2        AUT
3        AUT
4        AUT
        ... 
15967    SVN
15968    SVN
15969    SVN
15970    SVN
15971    SVN
Name: LOCATION, Length: 15972, dtype: object

In [8]:
# get country names and add as new column
unemp_df["Country_name"] = converter.convert(names = iso3_codes, src = "ISO3", to = "name_short")

In [9]:
unemp_df.head(10)

Unnamed: 0,LOCATION,INDICATOR,SUBJECT,MEASURE,FREQUENCY,TIME,Value,Flag Codes,Country_name
0,AUT,HUR,TOT,PC_LF,M,2002-11,4.4,,Austria
1,AUT,HUR,TOT,PC_LF,M,2002-12,4.5,,Austria
2,AUT,HUR,TOT,PC_LF,M,2003-01,4.5,,Austria
3,AUT,HUR,TOT,PC_LF,M,2003-02,4.5,,Austria
4,AUT,HUR,TOT,PC_LF,M,2003-03,4.6,,Austria
5,AUT,HUR,TOT,PC_LF,M,2003-04,4.7,,Austria
6,AUT,HUR,TOT,PC_LF,M,2003-05,4.8,,Austria
7,AUT,HUR,TOT,PC_LF,M,2003-06,4.9,,Austria
8,AUT,HUR,TOT,PC_LF,M,2003-07,4.8,,Austria
9,AUT,HUR,TOT,PC_LF,M,2003-08,4.8,,Austria


## Data transformation and new data frame

### Select columns to create new data frame

In [10]:
# create new data frame with selected columns
unemp_final = unemp_df[["Country_name", "SUBJECT","TIME", "Value"]]

In [11]:
unemp_final.head(10)

Unnamed: 0,Country_name,SUBJECT,TIME,Value
0,Austria,TOT,2002-11,4.4
1,Austria,TOT,2002-12,4.5
2,Austria,TOT,2003-01,4.5
3,Austria,TOT,2003-02,4.5
4,Austria,TOT,2003-03,4.6
5,Austria,TOT,2003-04,4.7
6,Austria,TOT,2003-05,4.8
7,Austria,TOT,2003-06,4.9
8,Austria,TOT,2003-07,4.8
9,Austria,TOT,2003-08,4.8


### Rename columns

In [12]:
# rename columns
unemp_final = unemp_final.rename(columns = {"Country_name": "Country", "SUBJECT":"Subject", "TIME":"Date", "Value":"Percentage"})

In [13]:
unemp_final.head()

Unnamed: 0,Country,Subject,Date,Percentage
0,Austria,TOT,2002-11,4.4
1,Austria,TOT,2002-12,4.5
2,Austria,TOT,2003-01,4.5
3,Austria,TOT,2003-02,4.5
4,Austria,TOT,2003-03,4.6


### Check and replace values in Subject column

In [14]:
# see unique values in a column
unemp_final["Subject"].unique()

array(['TOT', 'MEN', 'WOMEN'], dtype=object)

In [15]:
# replace values
unemp_final["Subject"] = unemp_final["Subject"].replace(["TOT","MEN","WOMEN"],["Total","Men","Women"])

In [16]:
unemp_final["Subject"].unique()

array(['Total', 'Men', 'Women'], dtype=object)

### Convert to datetime

In [17]:
# convert to datetime
unemp_final["Date"] = pd.to_datetime(unemp_final["Date"])

In [18]:
unemp_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15972 entries, 0 to 15971
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Country     15972 non-null  object        
 1   Subject     15972 non-null  object        
 2   Date        15972 non-null  datetime64[ns]
 3   Percentage  15972 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 499.2+ KB


In [19]:
unemp_final.head(10)

Unnamed: 0,Country,Subject,Date,Percentage
0,Austria,Total,2002-11-01,4.4
1,Austria,Total,2002-12-01,4.5
2,Austria,Total,2003-01-01,4.5
3,Austria,Total,2003-02-01,4.5
4,Austria,Total,2003-03-01,4.6
5,Austria,Total,2003-04-01,4.7
6,Austria,Total,2003-05-01,4.8
7,Austria,Total,2003-06-01,4.9
8,Austria,Total,2003-07-01,4.8
9,Austria,Total,2003-08-01,4.8


## Write final data frame to csv

In [20]:
#unemp_final.to_csv("Unemployment_EU_final.csv", index = False)