# Data Cleaning, preprocessing and merging

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import math
%matplotlib inline

## asylum_seekers.csv

In [2]:
as_data = pd.read_csv("data/asylum_seekers.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
cols = ['Tota pending start-year', 'of which UNHCR-assisted(start-year)', 'Applied during year', 
        'decisions_recognized', 'decisions_other', 'Rejected', 'Otherwise closed', 'Total decisions', 
        'Total pending end-year', 'of which UNHCR-assisted(end-year)']

as_data[cols] = as_data[cols].apply(pd.to_numeric, errors='coerce', axis=1)

In [4]:
as_data = as_data[as_data.Year != 2000] # removing all the rows with year below 2001
print(as_data['Year'].value_counts(dropna=False))

2015    11225
2016    10461
2014     9908
2013     9259
2012     8644
2011     8299
2010     7905
2009     7159
2008     7042
2007     6924
2005     6721
2006     6656
2004     6601
2003     6359
2002     5862
2001     5542
Name: Year, dtype: int64


In [5]:
as_data = as_data.fillna( value = 0)

In [6]:
as_data.isnull().sum()

Year                                       0
Country / territory of asylum/residence    0
Origin                                     0
RSD procedure type / level                 0
Tota pending start-year                    0
of which UNHCR-assisted(start-year)        0
Applied during year                        0
decisions_recognized                       0
decisions_other                            0
Rejected                                   0
Otherwise closed                           0
Total decisions                            0
Total pending end-year                     0
of which UNHCR-assisted(end-year)          0
dtype: int64

In [7]:
as_data.replace(to_replace ="*", value ="0")

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,RSD procedure type / level,Tota pending start-year,of which UNHCR-assisted(start-year),Applied during year,decisions_recognized,decisions_other,Rejected,Otherwise closed,Total decisions,Total pending end-year,of which UNHCR-assisted(end-year)
5153,2001,South Africa,Afghanistan,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
5154,2001,South Africa,Afghanistan,G / FI,8.0,0.0,0.0,5.0,0.0,2.0,0.0,7.0,1.0,0.0
5155,2001,Uzbekistan,Afghanistan,U / FI,1235.0,1235.0,2090.0,1573.0,0.0,247.0,189.0,2009.0,1316.0,1316.0
5156,2001,United States of America,Afghanistan,G / EO,186.0,0.0,225.0,129.0,0.0,27.0,91.0,247.0,164.0,0.0
5157,2001,United States of America,Afghanistan,G / IN,152.0,0.0,274.0,212.0,0.0,43.0,19.0,274.0,166.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129715,2016,United States of America,Zimbabwe,G / IN,232.0,0.0,229.0,16.0,0.0,0.0,10.0,28.0,435.0,0.0
129716,2016,United States of America,Zimbabwe,G / EO,142.0,0.0,12.0,0.0,0.0,0.0,23.0,31.0,138.0,0.0
129717,2016,South Africa,Zimbabwe,G / AR,94.0,9.0,0.0,0.0,0.0,0.0,94.0,94.0,0.0,0.0
129718,2016,South Africa,Zimbabwe,G / FI,41238.0,4124.0,7964.0,73.0,0.0,7869.0,0.0,7942.0,41260.0,0.0


In [8]:
as_data = as_data.reset_index(drop=True)

In [9]:
as_data.to_csv("cleaned_data/cleaned_asylum_seekers.csv", index=False)

## demographic.csv

In [10]:
demo = pd.read_csv('data/cleaned_demographics.csv')

In [11]:
# Sum up the columns 
demo['5-17f'] = demo.iloc[:, 5:8].sum(axis=1)
demo['5-17m'] = demo.iloc[:, -9:-6].sum(axis=1)

In [12]:
# drop the columns that are not needed
demo = demo.drop(['5-11f', '12-17f', '5-11m', '12-17m'], axis=1)

In [13]:
demo.to_csv('cleaned_data/cleaned_aggregated_columns_demographics.csv', index=False)

## persons_of_concern.csv

In [14]:
person_of_concern_data = "./data/persons_of_concern.csv"
poc_data = pd.read_csv(person_of_concern_data)

  interactivity=interactivity, compiler=compiler, result=result)


In [15]:
cols = ['Refugees (incl. refugee-like situations)', 'Asylum-seekers (pending cases)', 
        'Returned refugees', 'Stateless persons', 'Others of concern', 'Total Population']

poc_data[cols] = poc_data[cols].apply(pd.to_numeric, errors='coerce', axis=1)

In [16]:
data_post_year_2001 = poc_data[poc_data['Year'] > 2000]

In [17]:
data_post_year_2001 = data_post_year_2001.fillna(0)

In [18]:
data_post_year_2001.replace(to_replace ="*", value ="0")

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,Refugees (incl. refugee-like situations),Asylum-seekers (pending cases),Returned refugees,Internally displaced persons (IDPs),Returned IDPs,Stateless persons,Others of concern,Total Population
26904,2001,Afghanistan,Afghanistan,0.0,0.0,0.0,1200000.0,0.0,0.0,0.0,1200000.0
26905,2001,Afghanistan,Iran (Islamic Rep. of),3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
26906,2001,Afghanistan,Iraq,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
26907,2001,Angola,Angola,0.0,0.0,0.0,202000.0,0.0,0.0,0.0,202000.0
26908,2001,Angola,Burundi,18.0,3.0,0.0,0.0,0.0,0.0,0.0,21.0
...,...,...,...,...,...,...,...,...,...,...,...
117316,2016,Zimbabwe,Somalia,24.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0
117317,2016,Zimbabwe,Syrian Arab Rep.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
117318,2016,Zimbabwe,Uganda,7.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0
117319,2016,Zimbabwe,South Africa,0.0,0.0,0.0,0.0,0.0,0.0,7.0,8.0


In [19]:
df = data_post_year_2001.reset_index(drop=True)

In [20]:
data_post_year_2001.to_csv(r'cleaned_data/cleaned_people_of_concern.csv', index=False)

## resettlement.csv

In [21]:
resettlement_data = "./data/resettlement.csv"
r_data = pd.read_csv(resettlement_data)

In [22]:
r_data = r_data[r_data['Year'] >= 2001]

In [23]:
r_data = r_data.replace(to_replace ="*", value ="0")

In [24]:
r_data.to_csv('cleaned_data/cleaned_resettlement.csv', index=False)

## Adding columns to csv files

In [4]:
file1 = pd.read_csv("cleaned_data/cleaned_people_of_concern.csv")
file2 = pd.read_csv("cleaned_data/cleaned_asylum_seekers.csv")
file3 = pd.read_csv("cleaned_data/cleaned_resettlement.csv")

In [5]:
file2['Successful'] = file2['decisions_recognized']+file2['decisions_other']
file2['Unsuccessful'] = file2['Rejected']+file2['Otherwise closed']

file2 = file2.rename(columns={"Otherwise closed": "Otherwise_closed"})
file2['Otherwise_closed'] = file2.apply(lambda x: 1.0 if x.Otherwise_closed <= 0.0 else x.Otherwise_closed, axis=1)

## Merge data files - GDP per capital, Unemployment Rate, HDI, Distance from origin to target

In [6]:
file_gdp = pd.read_csv("cleaned_data/cleaned_GDP_Per_Capital_2001-2016.csv")
file_countries = pd.read_csv("cleaned_data/countries.csv")
file_hdi = pd.read_csv("cleaned_data/Human Development Index (HDI).csv")
file_unemployment_rate = pd.read_csv("cleaned_data/cleaned_unemployment_rate.csv")

### Dropna and drop columns

In [7]:
file_gdp = file_gdp.dropna(axis=0)
file_countries = file_countries.drop(['Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6'], axis = 1)

### Rename columns

In [8]:
file_gdp_target = file_gdp.rename({'Country Name': 'Country / territory of asylum/residence'}, axis=1)
file_gdp_origin = file_gdp.rename({'Country Name': 'Origin'}, axis=1)
file_countries_target = file_countries.rename({'country': 'Country / territory of asylum/residence'}, axis=1)
file_countries_origin = file_countries.rename({'country': 'Origin'}, axis=1)
file_hdi = file_hdi.rename({'Country': 'Country / territory of asylum/residence'}, axis=1)
file_unemployment_rate = file_unemployment_rate.rename({'Country Name': 'Country / territory of asylum/residence'}, axis=1)

### Merge GDP

In [9]:
file2 = pd.merge(file2, file_gdp_target, on = ['Year', 'Country / territory of asylum/residence'])
file2 = file2.rename({'GDP per capita': 'Target_country_GDP_per_capita'}, axis=1)

In [10]:
file2 = pd.merge(file2, file_gdp_origin, on = ['Year', 'Origin'])
file2 = file2.rename({'GDP per capita': 'Origin_country_GDP_per_capita'}, axis=1)

### Add new columns GDP difference by finding the difference between target and origin GDP

In [11]:
file2['GDP_difference'] = file2.apply(lambda x: x.Target_country_GDP_per_capita - x.Origin_country_GDP_per_capita, axis=1)

### Add new columns Lat, Long 

In [12]:
file2 = pd.merge(file2, file_countries_target, on = 'Country / territory of asylum/residence')
file2 = file2.rename({'latitude': 'Target_latitude', 'longitude': 'Target_longitude'}, axis=1)

In [13]:
file2 = pd.merge(file2, file_countries_origin, on = 'Origin')
file2 = file2.rename({'latitude': 'Origin_latitude', 'longitude': 'Origin_longitude'}, axis=1)

### Calculate Distance between points based on lat & lon

In [14]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6372800  # Earth radius in meters
    
    phi1, phi2 = math.radians(lat1), math.radians(lat2) 
    dphi       = math.radians(lat2 - lat1)
    dlambda    = math.radians(lon2 - lon1)
    
    a = math.sin(dphi/2)**2 + \
        math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    
    return (2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a)))/1000

# print(haversine(-54.429579, -36.587909, -14.270972, -170.132217))

In [15]:
file2['origin_to_target_dist'] = file2.apply(lambda x: haversine(x['Target_latitude'],x['Target_longitude'],
                                                       x['Origin_latitude'], x['Origin_longitude']), axis=1)

In [16]:
file2.head()

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,RSD procedure type / level,Tota pending start-year,of which UNHCR-assisted(start-year),Applied during year,decisions_recognized,decisions_other,Rejected,...,Successful,Unsuccessful,Target_country_GDP_per_capita,Origin_country_GDP_per_capita,GDP_difference,Target_latitude,Target_longitude,Origin_latitude,Origin_longitude,origin_to_target_dist
0,2001,South Africa,Angola,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2666.480846,527.333529,2139.147318,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329
1,2001,South Africa,Angola,G / FI,371.0,108.0,1015.0,860.0,0.0,36.0,...,860.0,104.0,2666.480846,527.333529,2139.147318,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329
2,2002,South Africa,Angola,G / FI,422.0,422.0,5682.0,542.0,0.0,14.0,...,542.0,14.0,2502.265926,872.494492,1629.771434,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329
3,2003,South Africa,Angola,G / FI,5548.0,1302.0,894.0,491.0,0.0,26.0,...,491.0,26.0,3751.258432,982.960899,2768.297533,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329
4,2004,South Africa,Angola,G / FI,5925.0,1302.0,397.0,34.0,0.0,14.0,...,34.0,14.0,4833.633129,1255.564045,3578.069084,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329


### Merge HDI

In [17]:
file2 = pd.merge(file2, file_hdi, on = ['Year', 'Country / territory of asylum/residence'])

### Merge Unemployment rate

In [18]:
file2 = pd.merge(file2, file_unemployment_rate, on = ['Year', 'Country / territory of asylum/residence'])

### Group by Origin and Target

In [19]:
# file2_combine = file2.drop(['Year'], axis=1)

# file2_combine = file2_combine.groupby(['Country / territory of asylum/residence', 'Origin', 'RSD procedure type / level'])

In [20]:
# file2_combine = file2_combine.sum()

# # resetting index 
# file2_combine.reset_index(inplace = True) 

In [21]:
# file2_combine[file2_combine['Country / territory of asylum/residence']=='Afghanistan']

### Calculating the acceptance rate

In [22]:
file2['acceptance_rate'] = file2.apply(
    lambda a: 0.0 if(a.decisions_recognized == 0.0 and a.decisions_other == 0.0 
                     and a.Rejected == 0.0 and a.Otherwise_closed <= 0.0)
            else 
            (a.decisions_recognized + a.decisions_other)/(a.decisions_recognized + a.decisions_other 
            + a.Rejected + a.Otherwise_closed), axis=1)

In [23]:
file2

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,RSD procedure type / level,Tota pending start-year,of which UNHCR-assisted(start-year),Applied during year,decisions_recognized,decisions_other,Rejected,...,Origin_country_GDP_per_capita,GDP_difference,Target_latitude,Target_longitude,Origin_latitude,Origin_longitude,origin_to_target_dist,HDI,Unemployment rate,acceptance_rate
0,2001,South Africa,Angola,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,527.333529,2139.147318,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329,0.61,30.896,0.000000
1,2001,South Africa,Angola,G / FI,371.0,108.0,1015.0,860.0,0.0,36.0,...,527.333529,2139.147318,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329,0.61,30.896,0.892116
2,2001,South Africa,Albania,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,1281.659393,1384.821453,-54.429579,-36.587909,41.153332,20.168331,11919.880369,0.61,30.896,0.000000
3,2001,South Africa,Burundi,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,134.363448,2532.117399,-54.429579,-36.587909,12.238333,-1.561593,8114.662998,0.61,30.896,0.000000
4,2001,South Africa,Burundi,G / FI,1165.0,1049.0,190.0,808.0,0.0,234.0,...,134.363448,2532.117399,-54.429579,-36.587909,12.238333,-1.561593,8114.662998,0.61,30.896,0.770257
5,2001,South Africa,Benin,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,378.736054,2287.744792,-54.429579,-36.587909,17.189877,-88.497650,9356.444643,0.61,30.896,0.000000
6,2001,South Africa,Benin,G / FI,82.0,12.0,1.0,0.0,0.0,75.0,...,378.736054,2287.744792,-54.429579,-36.587909,17.189877,-88.497650,9356.444643,0.61,30.896,0.000000
7,2001,South Africa,Burkina Faso,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,235.491232,2430.989614,-54.429579,-36.587909,42.733883,25.485830,12301.788683,0.61,30.896,0.000000
8,2001,South Africa,Bangladesh,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,415.034427,2251.446419,-54.429579,-36.587909,25.930414,50.637772,12156.012618,0.61,30.896,0.000000
9,2001,South Africa,Bangladesh,G / FI,418.0,17.0,18.0,0.0,0.0,268.0,...,415.034427,2251.446419,-54.429579,-36.587909,25.930414,50.637772,12156.012618,0.61,30.896,0.000000


### Encoding columns of accepted or rejected based on acceptance rate

In [24]:
file2['accepted/rejected'] = file2.apply(lambda x: 0 if x.acceptance_rate <= 0.25 else 
                                                 1 if x.acceptance_rate <= 0.5 else 
                                                2 if x.acceptance_rate <= 0.75 else 3, axis=1)

In [25]:
file2

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,RSD procedure type / level,Tota pending start-year,of which UNHCR-assisted(start-year),Applied during year,decisions_recognized,decisions_other,Rejected,...,GDP_difference,Target_latitude,Target_longitude,Origin_latitude,Origin_longitude,origin_to_target_dist,HDI,Unemployment rate,acceptance_rate,accepted/rejected
0,2001,South Africa,Angola,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,2139.147318,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329,0.61,30.896,0.000000,0
1,2001,South Africa,Angola,G / FI,371.0,108.0,1015.0,860.0,0.0,36.0,...,2139.147318,-54.429579,-36.587909,-14.270972,-170.132217,11214.770329,0.61,30.896,0.892116,3
2,2001,South Africa,Albania,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,1384.821453,-54.429579,-36.587909,41.153332,20.168331,11919.880369,0.61,30.896,0.000000,0
3,2001,South Africa,Burundi,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,2532.117399,-54.429579,-36.587909,12.238333,-1.561593,8114.662998,0.61,30.896,0.000000,0
4,2001,South Africa,Burundi,G / FI,1165.0,1049.0,190.0,808.0,0.0,234.0,...,2532.117399,-54.429579,-36.587909,12.238333,-1.561593,8114.662998,0.61,30.896,0.770257,3
5,2001,South Africa,Benin,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,2287.744792,-54.429579,-36.587909,17.189877,-88.497650,9356.444643,0.61,30.896,0.000000,0
6,2001,South Africa,Benin,G / FI,82.0,12.0,1.0,0.0,0.0,75.0,...,2287.744792,-54.429579,-36.587909,17.189877,-88.497650,9356.444643,0.61,30.896,0.000000,0
7,2001,South Africa,Burkina Faso,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,2430.989614,-54.429579,-36.587909,42.733883,25.485830,12301.788683,0.61,30.896,0.000000,0
8,2001,South Africa,Bangladesh,G / AR,0.0,0.0,0.0,0.0,0.0,0.0,...,2251.446419,-54.429579,-36.587909,25.930414,50.637772,12156.012618,0.61,30.896,0.000000,0
9,2001,South Africa,Bangladesh,G / FI,418.0,17.0,18.0,0.0,0.0,268.0,...,2251.446419,-54.429579,-36.587909,25.930414,50.637772,12156.012618,0.61,30.896,0.000000,0


In [26]:
#List unique values in the df['RSD procedure type / level'] column
file2["RSD procedure type / level"].unique()

array(['G / AR', 'G / FI', 'G / BL', 'G / EO', 'G / IN', 'G / JR',
       'U / FI', 'G / FA', 'U / FA', 'U / AR', 'G / RA', 'G / TA',
       'G / ar', 'J / FI', 'J / AR', 'G / SP', 'G / fi', 'G / NA',
       'G / CA', 'G / TP', 'J / FA', 'U / RA', 'J / RA', 'U / JR',
       'U / NA', '0'], dtype=object)

#### Encoding RSD procedure type / level

In [27]:
file2['Encoded procedure type'] = file2['RSD procedure type / level'].astype('category').cat.codes

#### Encoding Country / territory of asylum/residence

In [28]:
file2['Country / territory of asylum/residence'].unique()

array(['South Africa', 'United States of America', 'Ukraine', 'Sweden',
       'Slovenia', 'Slovakia', 'Russian Federation', 'Romania',
       'Portugal', 'Poland', 'Norway', 'Netherlands', 'Mozambique',
       'Rep. of Moldova', 'Kenya', 'Italy', 'Ireland', 'Hungary',
       'Greece', 'United Kingdom', 'France', 'Finland', 'Spain',
       'Denmark', 'Germany', 'Czech Rep.', 'Chile', 'Switzerland',
       'Canada', 'Brazil', 'Bosnia and Herzegovina', 'Belgium', 'Austria',
       'Australia', 'Argentina', 'Zimbabwe', 'Zambia', 'Senegal',
       'Rwanda', 'Panama', 'Pakistan', 'Mauritania', 'Mexico', 'Ghana',
       'Gabon', 'Congo', 'Dem. Rep. of the Congo', 'Central African Rep.',
       'Botswana', 'Benin', 'New Zealand', 'Luxembourg', 'Iceland',
       'Turkey', 'Malaysia', 'Cambodia', 'Egypt', 'China', 'Uganda',
       'Morocco', 'Algeria', 'Cameroon', 'Burkina Faso',
       'Serbia and Kosovo (S/RES/1244 (1999))', 'Rep. of Korea', 'Japan',
       'India', 'Indonesia', 'Croatia', 'C

In [29]:
file2['Encoded Target Country'] = file2['Country / territory of asylum/residence'].astype('category').cat.codes

#### Encoding Origin

In [30]:
file2['Origin'].unique()

array(['Angola', 'Albania', 'Burundi', 'Benin', 'Burkina Faso',
       'Bangladesh', 'Bulgaria', 'Bosnia and Herzegovina', 'China',
       'Cameroon', 'Dem. Rep. of the Congo', 'Congo', 'Cuba', 'Algeria',
       'Egypt', 'Eritrea', 'Ethiopia', 'Gabon', 'Georgia', 'Ghana',
       'Gambia', 'Croatia', 'Hungary', 'India', 'Kenya', 'Liberia',
       'Sri Lanka', 'China, Macao SAR', 'Morocco', 'Mali', 'Mozambique',
       'Nigeria', 'Pakistan', 'Rwanda', 'Sudan', 'Senegal',
       'Sierra Leone', 'Serbia and Kosovo (S/RES/1244 (1999))',
       'Syrian Arab Rep.', 'United Rep. of Tanzania', 'Uganda', 'Ukraine',
       'Zambia', 'Zimbabwe', 'Afghanistan', 'Kyrgyzstan', 'Chad',
       'Malawi', 'Iraq', 'Central African Rep.', 'Guinea', 'Haiti',
       'Russian Federation', 'Togo', 'Turkey', 'Fiji', 'Jordan', 'Kuwait',
       'Myanmar', 'Niger', 'Nepal', 'Botswana', 'Namibia', 'Djibouti',
       'Switzerland', 'Swaziland', 'Lesotho', 'Poland', 'Tunisia',
       'Thailand', 'Libya', 'Armenia', '

In [31]:
file2['Encoded Origin'] = file2['Origin'].astype('category').cat.codes

### Drop Attributes

In [32]:
file2 = file2.drop(['of which UNHCR-assisted(start-year)', 'of which UNHCR-assisted(end-year)', 
                                    'Applied during year', 'Tota pending start-year', 'Total pending end-year'], axis = 1)

In [33]:
file2

Unnamed: 0,Year,Country / territory of asylum/residence,Origin,RSD procedure type / level,decisions_recognized,decisions_other,Rejected,Otherwise_closed,Total decisions,Successful,...,Origin_latitude,Origin_longitude,origin_to_target_dist,HDI,Unemployment rate,acceptance_rate,accepted/rejected,Encoded procedure type,Encoded Target Country,Encoded Origin
0,2001,South Africa,Angola,G / AR,0.0,0.0,0.0,1.0,0.0,0.0,...,-14.270972,-170.132217,11214.770329,0.61,30.896,0.000000,0,1,135,3
1,2001,South Africa,Angola,G / FI,860.0,0.0,36.0,68.0,964.0,860.0,...,-14.270972,-170.132217,11214.770329,0.61,30.896,0.892116,3,6,135,3
2,2001,South Africa,Albania,G / AR,0.0,0.0,0.0,1.0,0.0,0.0,...,41.153332,20.168331,11919.880369,0.61,30.896,0.000000,0,1,135,1
3,2001,South Africa,Burundi,G / AR,0.0,0.0,0.0,1.0,0.0,0.0,...,12.238333,-1.561593,8114.662998,0.61,30.896,0.000000,0,1,135,25
4,2001,South Africa,Burundi,G / FI,808.0,0.0,234.0,7.0,1049.0,808.0,...,12.238333,-1.561593,8114.662998,0.61,30.896,0.770257,3,6,135,25
5,2001,South Africa,Benin,G / AR,0.0,0.0,0.0,1.0,0.0,0.0,...,17.189877,-88.497650,9356.444643,0.61,30.896,0.000000,0,1,135,18
6,2001,South Africa,Benin,G / FI,0.0,0.0,75.0,5.0,80.0,0.0,...,17.189877,-88.497650,9356.444643,0.61,30.896,0.000000,0,6,135,18
7,2001,South Africa,Burkina Faso,G / AR,0.0,0.0,0.0,1.0,0.0,0.0,...,42.733883,25.485830,12301.788683,0.61,30.896,0.000000,0,1,135,24
8,2001,South Africa,Bangladesh,G / AR,0.0,0.0,0.0,1.0,0.0,0.0,...,25.930414,50.637772,12156.012618,0.61,30.896,0.000000,0,1,135,13
9,2001,South Africa,Bangladesh,G / FI,0.0,0.0,268.0,30.0,298.0,0.0,...,25.930414,50.637772,12156.012618,0.61,30.896,0.000000,0,6,135,13


### Exporting the cleaned and preprocessed file

In [34]:
file2.to_csv('cleaned_data/cleaned_asylum_seekers_added.csv', index=False)