In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

In [4]:
df = pd.read_excel('./datasets/WDIEXCEL.xlsx')

In [3]:
df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,,,,,...,17.40141,17.911234,18.463874,18.924037,19.437054,20.026254,20.647969,21.165877,21.863139,
1,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS,,,,,,,...,6.728819,7.005877,7.308571,7.547226,7.875917,8.243018,8.545483,8.906711,9.26132,
2,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.UR.ZS,,,,,,,...,38.080931,38.422282,38.722108,38.993157,39.337872,39.695279,40.137847,40.522209,41.011132,
3,Africa Eastern and Southern,AFE,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,,,,,...,31.860474,33.9038,38.854624,40.199898,43.017148,44.381259,46.264875,48.100862,48.711995,
4,Africa Eastern and Southern,AFE,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,,,,,,,...,17.619475,16.500171,24.605861,25.396929,27.037528,29.137914,31.001049,32.77791,33.747907,


In [5]:
def final_categorization(indicator_name):
    categories = {
        'Clean Fuels': ['clean fuels', 'cooking'],
        'Electricity': ['electricity'],
        'Demographics': ['population', 'demographic', 'birth', 'death', 'age', 'fertility', 'life expectancy'],
        'Education': ['education', 'school', 'literacy', 'student', 'teacher', 'enrollment', 'tertiary'],
        'Employment': ['employment', 'job', 'work', 'labor', 'unemployment', 'workforce'],
        'Income': ['income', 'earnings', 'wage', 'salary', 'gni', 'gross national income'],
        'GDP': ['gdp', 'gross domestic product', 'economic growth'],
        'Health': ['health', 'medical', 'hospital', 'disease', 'mortality', 'immunization', 'nutrition', 'tuberculosis'],
        'Water': ['water', 'sanitation', 'hygiene'],
        'Energy': ['energy', 'power', 'fuel', 'renewable'],
        'Transport': ['transport', 'vehicle', 'road', 'traffic', 'logistics'],
        'Trade': ['trade', 'export', 'import', 'commerce', 'tariff'],
        'Poverty': ['poverty', 'poor', 'inequality', 'vulnerable'],
        'Urban Development': ['urban', 'city', 'infrastructure', 'slum'],
        'Social Development': ['social', 'community', 'welfare', 'gender', 'equality'],
        'Environment': ['environment', 'emission', 'pollution', 'climate', 'biodiversity', 'forest', 'co2'],
        'Agriculture': ['agriculture', 'farming', 'crop', 'livestock', 'land'],
        'Technology': ['technology', 'internet', 'digital', 'innovation', 'ict'],
        'Finance': ['finance', 'bank', 'investment', 'credit', 'debt', 'tax', 'interest rate', 'financial flows', 'assets', 'liabilities'],
        'Business': ['business', 'enterprise', 'company', 'firm', 'startup'],
        'Government': ['government', 'public sector', 'administration', 'policy'],
        'Research': ['research', 'development', 'r&d', 'scientific'],
        'International Relations': ['bilateral', 'multilateral', 'aid', 'donor'],
        'Economic Sectors': ['industry', 'manufacturing', 'services', 'value added', 'production'],
        'Human Development': ['human capital', 'welfare', 'social protection'],
        'Governance': ['regulatory', 'voice and accountability', 'rule of law', 'corruption'],
        'Time Metrics': ['time required', 'time to'],
        'Natural Resources': ['fisheries', 'reserves', 'gold'],
        'Price Indices': ['price index', 'inflation'],
        'Gender Issues': ['women', 'gender'],
        'Other': []  # Default category
    }
    
    indicator_name_lower = indicator_name.lower()
    for category, keywords in categories.items():
        if any(keyword in indicator_name_lower for keyword in keywords):
            return category
    return 'Other'

In [4]:
df[df['Country Code'] == 'GBR' ].head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
377952,United Kingdom,GBR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,
377953,United Kingdom,GBR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,
377954,United Kingdom,GBR,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.UR.ZS,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,
377955,United Kingdom,GBR,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,
377956,United Kingdom,GBR,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,,,,,,,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,


In [5]:
df_uk = df[df['Country Code'] == 'GBR' ]

In [8]:
df_uk.columns

Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
       '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022',
       '2023'],
      dtype='object')

In [10]:
df_uk_short = df_uk[['Country Code','Indicator Name']]

In [11]:
df_uk_short.to_csv('df_uk_short.csv')

In [18]:
df_uk_short['Category'] = df_uk_short['Indicator Name'].apply(final_categorization)

# Check the updated category counts
df_uk_short['Category'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_uk_short['Category'] = df_uk_short['Indicator Name'].apply(final_categorization)


Category
Demographics               471
Other                      178
Trade                      108
Education                   95
Income                      90
GDP                         82
Finance                     72
Employment                  47
Economic Sectors            37
Environment                 29
Transport                   29
International Relations     29
Energy                      28
Agriculture                 24
Governance                  24
Health                      21
Business                    20
Government                  17
Electricity                 14
Water                       11
Gender Issues               10
Human Development            9
Research                     7
Technology                   6
Time Metrics                 6
Social Development           6
Poverty                      5
Urban Development            4
Price Indices                3
Natural Resources            3
Clean Fuels                  3
Name: count, dtype: int64

In [20]:
df_uk_short_emp = df_uk_short[df_uk_short['Category'] == 'Employment'] 

In [22]:
df_uk_short_emp.head()

Unnamed: 0,Country Code,Indicator Name,Category
377968,GBR,Adequacy of social protection and labor progra...,Employment
377970,GBR,Adequacy of unemployment benefits and ALMP (% ...,Employment
378026,GBR,"Agriculture, forestry, and fishing, value adde...",Employment
378045,GBR,Armed forces personnel (% of total labor force),Employment
378065,GBR,Benefit incidence of social protection and lab...,Employment


In [23]:
df_uk_short_emp.to_excel('df_uk_short_emp.xlsx') 

# Tasks
- Delete "National estimates" row
- Delete unwanted rows
- Merge UK_emp with original dataset
- Del "Category" col




In [29]:
# Deleting rows that contain "national estimates" data
df_uk_short_emp = df_uk_short_emp[~(df_uk_short_emp['Indicator Name'].str.contains("national"))]

In [34]:
df_uk_short_emp.head()

Unnamed: 0,Country Code,Indicator Name,Category
377968,GBR,Adequacy of social protection and labor progra...,Employment
377970,GBR,Adequacy of unemployment benefits and ALMP (% ...,Employment
378026,GBR,"Agriculture, forestry, and fishing, value adde...",Employment
378045,GBR,Armed forces personnel (% of total labor force),Employment
378065,GBR,Benefit incidence of social protection and lab...,Employment


In [39]:
rows_to_delete = [
    377968,
    377970,
    378065,
    378067,
    378164,
    378183,
    378184,
    378185,
    379127,
    379128,
    378026,
    378398,
    378586,
    378647,
    379239
]

In [43]:
df_uk_short_emp.drop(rows_to_delete).reset_index(drop=True)

KeyError: '[377968, 377970, 378065, 378067, 378164, 378183, 378184, 378185, 379127, 379128, 378026, 378398, 378586, 378647, 379239] not found in axis'

In [53]:
df_uk_short_emp.reset_index(inplace=True)

In [55]:
df_uk_short_emp.drop(columns=['level_0','index'],inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_uk_short_emp.drop(columns=['level_0','index'],inplace=True)


In [56]:
df_uk_short_emp.head()

Unnamed: 0,Country Code,Indicator Name,Category
0,GBR,Armed forces personnel (% of total labor force),Employment
1,GBR,"Employers, female (% of female employment) (mo...",Employment
2,GBR,"Employers, male (% of male employment) (modele...",Employment
3,GBR,"Employers, total (% of total employment) (mode...",Employment
4,GBR,Employment in agriculture (% of total employme...,Employment


In [60]:
df_uk_short_emp.to_excel('df_uk_short_emp_v2.xlsx')

In [61]:
df_uk_short_emp.drop(columns=['Category'],inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_uk_short_emp.drop(columns=['Category'],inplace=True)


In [62]:
merged_df = df_uk_short_emp.merge(df, on=['Indicator Name', 'Country Code'], how='left')

In [63]:
merged_df.head()

Unnamed: 0,Country Code,Indicator Name,Country Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,Category
0,GBR,Armed forces personnel (% of total labor force),United Kingdom,MS.MIL.TOTL.TF.ZS,,,,,,,...,0.453232,0.442697,0.433652,0.431347,0.429445,0.441771,,,,Employment
1,GBR,"Employers, female (% of female employment) (mo...",United Kingdom,SL.EMP.MPYR.FE.ZS,,,,,,,...,1.343361,1.396491,1.319773,1.24383,1.271591,1.296677,1.311026,1.285092,,Employment
2,GBR,"Employers, male (% of male employment) (modele...",United Kingdom,SL.EMP.MPYR.MA.ZS,,,,,,,...,3.419577,3.342514,3.054329,2.782002,3.059169,3.002847,2.969704,2.972063,,Employment
3,GBR,"Employers, total (% of total employment) (mode...",United Kingdom,SL.EMP.MPYR.ZS,,,,,,,...,2.44931,2.432246,2.239167,2.05893,2.213687,2.187893,2.173875,2.166912,,Employment
4,GBR,Employment in agriculture (% of total employme...,United Kingdom,SL.AGR.EMPL.ZS,,,,,,,...,1.136193,1.121066,1.163076,1.070067,1.044325,1.034035,1.032581,1.004949,,Employment


In [65]:
merged_df.drop(columns=['Category'],inplace=True)

In [66]:
merged_df.to_excel('df_uk_short_emp_v3.xlsx')

# Data cleaning


In [79]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 37 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country Code    28 non-null     object 
 1   Indicator Name  28 non-null     object 
 2   Country Name    28 non-null     object 
 3   Indicator Code  28 non-null     object 
 4   1991            28 non-null     float64
 5   1992            28 non-null     float64
 6   1993            28 non-null     float64
 7   1994            28 non-null     float64
 8   1995            28 non-null     float64
 9   1996            28 non-null     float64
 10  1997            28 non-null     float64
 11  1998            28 non-null     float64
 12  1999            28 non-null     float64
 13  2000            28 non-null     float64
 14  2001            28 non-null     float64
 15  2002            28 non-null     float64
 16  2003            28 non-null     float64
 17  2004            28 non-null     float

In [72]:
merged_df.dropna(axis=1,how='all',inplace=True)

In [78]:
merged_df.drop(columns=list(map(str,range(1983,1991))), inplace=True)

In [80]:
merged_df.isnull().sum()

Country Code       0
Indicator Name     0
Country Name       0
Indicator Code     0
1991               0
1992               0
1993               0
1994               0
1995               0
1996               0
1997               0
1998               0
1999               0
2000               0
2001               0
2002               0
2003               0
2004               0
2005               0
2006               0
2007               0
2008               0
2009               0
2010               0
2011               0
2012               0
2013               0
2014               0
2015               0
2016               0
2017               0
2018               0
2019               0
2020               0
2021               1
2022               1
2023              19
dtype: int64

In [83]:
merged_df.drop(columns=['Country Name','Country Code'], inplace=True)

In [85]:
merged_df.to_excel('clean/clean_uk_emp.xlsx', index=False)

In [69]:
def create_dataset(cd):
    
    data = df.loc[df['Country Code'] == cd ]
    
    
    data['Category'] = data['Indicator Name'].apply(final_categorization)
    
    data = data.loc[data['Category'] == "Employment" ]
    
    # Deleting data from 1983 - 1991
    data.drop(columns=list(map(str,range(1960,1991))), inplace=True)
    data.drop(columns=['Country Name', "Indicator Code",'Category',"Country Code"], inplace=True)
    
    # Deleting rows that contain "national estimates" data
    data = data[~(data['Indicator Name'].str.contains("national"))]
    
    # Selecting rows we want to analyze
    frmt_df = pd.read_excel('./datasets/df_uk_short_emp_v3.xlsx')
    data = data[data['Indicator Name'].isin(frmt_df['Indicator Name'])]
    
    
    
    return data
    
 

In [57]:
def transpose_df(df, cd):
    
    df.set_index('Indicator Name', inplace=True)
    df = df.transpose()
    
    df.reset_index(inplace=True)
    df.rename(columns={'index': 'Year'}, inplace=True)
    
    # Convert 'Year' to datetime
    df['Year'] = pd.to_datetime(df['Year'], format='%Y')

    # Set 'Year' as the index
    df.set_index('Year', inplace=True)
    
    df['Country Code'] = cd
    
    return df
    

In [4]:
df = pd.read_excel('../clean/clean_uk_emp_v2.xlsx')

In [10]:
df.drop(32, inplace=True)

In [11]:
df.to_excel('../clean/clean_uk_emp_v2.xlsx')

# Creating USA and Canada datasets

In [81]:
df_us = create_dataset('USA')
df_ca = create_dataset('CAN')
df_uk = create_dataset('GBR')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Category'] = data['Indicator Name'].apply(final_categorization)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Category'] = data['Indicator Name'].apply(final_categorization)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Category'] = data['Indicator Name'].apply(final_categorizat

In [82]:
t_us = transpose_df(df_us, 'USA')
t_ca = transpose_df(df_ca, 'CAN')
t_uk = transpose_df(df_uk, 'GBR')


In [83]:
t_us.head()

Indicator Name,Armed forces personnel (% of total labor force),"Employers, female (% of female employment) (modeled ILO estimate)","Employers, male (% of male employment) (modeled ILO estimate)","Employers, total (% of total employment) (modeled ILO estimate)",Employment in agriculture (% of total employment) (modeled ILO estimate),"Employment in agriculture, female (% of female employment) (modeled ILO estimate)","Employment in agriculture, male (% of male employment) (modeled ILO estimate)",Employment in industry (% of total employment) (modeled ILO estimate),"Employment in industry, female (% of female employment) (modeled ILO estimate)","Employment in industry, male (% of male employment) (modeled ILO estimate)",Employment in services (% of total employment) (modeled ILO estimate),"Employment in services, female (% of female employment) (modeled ILO estimate)","Employment in services, male (% of male employment) (modeled ILO estimate)","Labor force, female (% of total labor force)","Labor force, total","Part time employment, female (% of total female employment)","Part time employment, male (% of total male employment)","Part time employment, total (% of total employment)",Ratio of female to male labor force participation rate (%) (modeled ILO estimate),"Self-employed, female (% of female employment) (modeled ILO estimate)","Self-employed, male (% of male employment) (modeled ILO estimate)","Self-employed, total (% of total employment) (modeled ILO estimate)","Unemployment, female (% of female labor force) (modeled ILO estimate)","Unemployment, male (% of male labor force) (modeled ILO estimate)","Unemployment, total (% of total labor force) (modeled ILO estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)",Country Code
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
1991-01-01,1.652661,1.122814,4.521475,3.001132,2.776975,1.278615,3.989772,24.501773,13.657668,33.279174,72.721252,85.063718,62.731051,44.522951,128277990.0,,,,75.189205,5.28412,12.040567,9.018167,6.359,7.154,6.8,4.161306,7.519093,6.017035,USA
1992-01-01,1.472535,1.080785,4.356493,2.885542,2.740269,1.244002,3.959788,23.927351,13.30297,32.586654,73.33237,85.453021,63.453558,44.664533,130387356.0,,,,75.731187,5.094077,11.633147,8.69679,7.002,7.902,7.5,4.013294,7.276653,5.811248,USA
1993-01-01,1.38096,1.104125,4.430748,2.934371,2.590243,1.162072,3.757888,23.468508,12.699867,32.272759,73.941241,86.138063,63.969344,44.823422,131792391.0,,,,76.309203,5.122882,11.838412,8.817641,6.57,7.168,6.9,4.018755,7.407663,5.883269,USA
1994-01-01,1.282848,1.140871,4.483261,2.966807,2.770192,1.511834,3.815269,23.401592,12.637248,32.341465,73.828215,85.850909,63.843265,45.338385,134076659.0,40.14,22.8,30.78,78.001061,5.043922,11.918923,8.79971,6.052,6.174,6.119,3.903052,7.43566,5.832904,USA
1995-01-01,1.202862,1.120699,4.375383,2.896982,2.754203,1.535567,3.768474,23.441152,12.489053,32.55659,73.804654,85.975381,63.674943,45.434033,135975697.0,39.71,22.16,30.25,78.402472,4.900913,11.600386,8.557231,5.671,5.633,5.65,3.780213,7.225002,5.660248,USA


In [84]:
t_ca.head()

Indicator Name,Armed forces personnel (% of total labor force),"Employers, female (% of female employment) (modeled ILO estimate)","Employers, male (% of male employment) (modeled ILO estimate)","Employers, total (% of total employment) (modeled ILO estimate)",Employment in agriculture (% of total employment) (modeled ILO estimate),"Employment in agriculture, female (% of female employment) (modeled ILO estimate)","Employment in agriculture, male (% of male employment) (modeled ILO estimate)",Employment in industry (% of total employment) (modeled ILO estimate),"Employment in industry, female (% of female employment) (modeled ILO estimate)","Employment in industry, male (% of male employment) (modeled ILO estimate)",Employment in services (% of total employment) (modeled ILO estimate),"Employment in services, female (% of female employment) (modeled ILO estimate)","Employment in services, male (% of male employment) (modeled ILO estimate)","Labor force, female (% of total labor force)","Labor force, total","Part time employment, female (% of total female employment)","Part time employment, male (% of total male employment)","Part time employment, total (% of total employment)",Ratio of female to male labor force participation rate (%) (modeled ILO estimate),"Self-employed, female (% of female employment) (modeled ILO estimate)","Self-employed, male (% of male employment) (modeled ILO estimate)","Self-employed, total (% of total employment) (modeled ILO estimate)","Unemployment, female (% of female labor force) (modeled ILO estimate)","Unemployment, male (% of male labor force) (modeled ILO estimate)","Unemployment, total (% of total labor force) (modeled ILO estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)",Country Code
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
1991-01-01,0.580938,2.99204,9.321282,6.476809,3.491257,2.389925,4.390229,23.877093,11.847307,33.696513,72.631651,85.762768,61.913258,44.630309,14803644.0,48.48,27.5,36.95,77.865302,10.056981,18.571287,14.744808,9.689,10.822,10.316,7.064924,9.250019,8.267999,CAN
1992-01-01,0.554721,3.028929,9.223835,6.417203,3.45133,2.295266,4.408936,23.181633,11.671614,32.715761,73.367029,86.033103,62.875317,44.798519,14782203.0,51.95,31.18,40.59,78.310675,10.585332,18.91384,15.14058,10.192,12.013,11.197,7.556403,9.690005,8.723376,CAN
1993-01-01,0.511053,2.980141,9.318215,6.447357,3.482529,2.375179,4.399417,22.510037,10.932633,32.096128,74.007434,86.692188,63.504456,44.940789,14871263.0,50.83,29.32,39.07,78.652298,11.198054,19.465377,15.720656,10.677,11.949,11.377,8.217912,10.147161,9.273307,CAN
1994-01-01,0.499551,2.952469,8.773015,6.137988,3.348042,2.300642,4.214425,22.669263,10.911164,32.395422,73.982696,86.788178,63.39014,44.970393,15013489.0,49.09,27.16,37.09,78.687224,11.435727,18.922758,15.533294,9.795,10.885,10.395,8.483258,10.149742,9.395306,CAN
1995-01-01,0.500671,3.043241,8.811028,6.193924,3.153491,2.123671,4.008907,22.927997,11.286149,32.598256,73.918519,86.590179,63.392837,45.185973,15119721.0,50.22,28.34,38.28,79.327288,11.621448,19.029284,15.668017,9.115,9.798,9.489,8.578207,10.218256,9.474093,CAN


In [85]:
t_uk.head()

Indicator Name,Armed forces personnel (% of total labor force),"Employers, female (% of female employment) (modeled ILO estimate)","Employers, male (% of male employment) (modeled ILO estimate)","Employers, total (% of total employment) (modeled ILO estimate)",Employment in agriculture (% of total employment) (modeled ILO estimate),"Employment in agriculture, female (% of female employment) (modeled ILO estimate)","Employment in agriculture, male (% of male employment) (modeled ILO estimate)",Employment in industry (% of total employment) (modeled ILO estimate),"Employment in industry, female (% of female employment) (modeled ILO estimate)","Employment in industry, male (% of male employment) (modeled ILO estimate)",Employment in services (% of total employment) (modeled ILO estimate),"Employment in services, female (% of female employment) (modeled ILO estimate)","Employment in services, male (% of male employment) (modeled ILO estimate)","Labor force, female (% of total labor force)","Labor force, total","Part time employment, female (% of total female employment)","Part time employment, male (% of total male employment)","Part time employment, total (% of total employment)",Ratio of female to male labor force participation rate (%) (modeled ILO estimate),"Self-employed, female (% of female employment) (modeled ILO estimate)","Self-employed, male (% of male employment) (modeled ILO estimate)","Self-employed, total (% of total employment) (modeled ILO estimate)","Unemployment, female (% of female labor force) (modeled ILO estimate)","Unemployment, male (% of male labor force) (modeled ILO estimate)","Unemployment, total (% of total labor force) (modeled ILO estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)",Country Code
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
1991-01-01,1.034938,2.071416,5.451175,3.964613,2.165177,1.166189,2.949595,30.425698,16.451258,41.398603,67.409125,82.382545,55.651802,43.440749,29083861.0,58.89,23.82,39.15,70.83542,8.692886,18.146675,13.988496,7.403,9.431,8.55,6.62147,12.6955,10.023879,GBR
1992-01-01,1.011012,1.821551,4.9977,3.571861,2.111121,1.132115,2.908641,29.930716,16.166012,41.143743,67.958159,82.701873,55.947623,43.754765,28980864.0,60.39,26.5,41.56,71.736961,7.990265,17.597703,13.284724,7.428,11.596,9.772,6.168706,12.600003,9.712864,GBR
1993-01-01,0.941825,1.832719,4.78889,3.445334,2.047451,1.081204,2.852484,29.392396,15.82816,40.693485,68.560149,83.09062,56.454031,44.172166,28773930.0,59.61,25.24,40.69,72.965467,7.967716,17.462428,13.147159,7.758,12.393,10.346,6.134997,12.673538,9.701825,GBR
1994-01-01,0.893465,1.786122,4.660436,3.355197,2.078798,1.156117,2.846337,27.780677,14.329842,38.969846,70.140528,84.514041,58.183831,44.307299,28764405.0,59.83,25.13,40.7,73.385803,8.009235,18.023474,13.475951,7.397,11.438,9.648,6.223113,13.363038,10.120754,GBR
1995-01-01,0.809746,1.702226,4.759194,3.375982,2.053645,1.231358,2.733193,27.425058,14.108477,38.430046,70.521309,84.660156,58.836775,44.389175,28774443.0,59.7,24.45,40.2,73.658612,7.883406,18.172128,13.516708,6.929,10.103,8.694,6.181181,13.412942,10.140726,GBR


# Combining the dataframes


In [92]:
result_df = pd.concat([t_us, t_ca, t_uk])

In [93]:
result_df.head()

Indicator Name,Armed forces personnel (% of total labor force),"Employers, female (% of female employment) (modeled ILO estimate)","Employers, male (% of male employment) (modeled ILO estimate)","Employers, total (% of total employment) (modeled ILO estimate)",Employment in agriculture (% of total employment) (modeled ILO estimate),"Employment in agriculture, female (% of female employment) (modeled ILO estimate)","Employment in agriculture, male (% of male employment) (modeled ILO estimate)",Employment in industry (% of total employment) (modeled ILO estimate),"Employment in industry, female (% of female employment) (modeled ILO estimate)","Employment in industry, male (% of male employment) (modeled ILO estimate)",Employment in services (% of total employment) (modeled ILO estimate),"Employment in services, female (% of female employment) (modeled ILO estimate)","Employment in services, male (% of male employment) (modeled ILO estimate)","Labor force, female (% of total labor force)","Labor force, total","Part time employment, female (% of total female employment)","Part time employment, male (% of total male employment)","Part time employment, total (% of total employment)",Ratio of female to male labor force participation rate (%) (modeled ILO estimate),"Self-employed, female (% of female employment) (modeled ILO estimate)","Self-employed, male (% of male employment) (modeled ILO estimate)","Self-employed, total (% of total employment) (modeled ILO estimate)","Unemployment, female (% of female labor force) (modeled ILO estimate)","Unemployment, male (% of male labor force) (modeled ILO estimate)","Unemployment, total (% of total labor force) (modeled ILO estimate)","Vulnerable employment, female (% of female employment) (modeled ILO estimate)","Vulnerable employment, male (% of male employment) (modeled ILO estimate)","Vulnerable employment, total (% of total employment) (modeled ILO estimate)",Country Code
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
1991-01-01,1.652661,1.122814,4.521475,3.001132,2.776975,1.278615,3.989772,24.501773,13.657668,33.279174,72.721252,85.063718,62.731051,44.522951,128277990.0,,,,75.189205,5.28412,12.040567,9.018167,6.359,7.154,6.8,4.161306,7.519093,6.017035,USA
1992-01-01,1.472535,1.080785,4.356493,2.885542,2.740269,1.244002,3.959788,23.927351,13.30297,32.586654,73.33237,85.453021,63.453558,44.664533,130387356.0,,,,75.731187,5.094077,11.633147,8.69679,7.002,7.902,7.5,4.013294,7.276653,5.811248,USA
1993-01-01,1.38096,1.104125,4.430748,2.934371,2.590243,1.162072,3.757888,23.468508,12.699867,32.272759,73.941241,86.138063,63.969344,44.823422,131792391.0,,,,76.309203,5.122882,11.838412,8.817641,6.57,7.168,6.9,4.018755,7.407663,5.883269,USA
1994-01-01,1.282848,1.140871,4.483261,2.966807,2.770192,1.511834,3.815269,23.401592,12.637248,32.341465,73.828215,85.850909,63.843265,45.338385,134076659.0,40.14,22.8,30.78,78.001061,5.043922,11.918923,8.79971,6.052,6.174,6.119,3.903052,7.43566,5.832904,USA
1995-01-01,1.202862,1.120699,4.375383,2.896982,2.754203,1.535567,3.768474,23.441152,12.489053,32.55659,73.804654,85.975381,63.674943,45.434033,135975697.0,39.71,22.16,30.25,78.402472,4.900913,11.600386,8.557231,5.671,5.633,5.65,3.780213,7.225002,5.660248,USA


In [94]:
result_df.to_csv("./clean/countries_emp.csv")