In [505]:
#loading the necessary libraries
import pandas as pd
import numpy as np
from standardize_country import name_to_alpha_2, alpha_2_to_name, alpha_3_to_alpha_2, alpha_2_to_alpha_3

In [506]:
#loading the BRI data
bri_data = pd.read_csv("Data/BRI_Dataset.csv")


In [507]:
#selecting only projects that are recommended for aggregates
bri_data = bri_data[bri_data["Recommended For Aggregates"] == "Yes"]

In [508]:
#selecting the columns that are needed
bri_data = bri_data[['Recipient', 'Commitment Year', 'Implementation Start Year', "Intent", 
                     "Flow Type", "Concessional", "Sector Name", "Flow Class", "Funding Agencies",
                       "Funding Agencies Type", "Amount (Constant USD2017)", "Interest Rate",
                       "Collateralized/Securitized"]]



In [509]:
bri_data

Unnamed: 0,Recipient,Commitment Year,Implementation Start Year,Intent,Flow Type,Concessional,Sector Name,Flow Class,Funding Agencies,Funding Agencies Type,Amount (Constant USD2017),Interest Rate,Collateralized/Securitized
0,Afghanistan,2017,2017.0,Development,Grant,Yes,EMERGENCY RESPONSE,ODA-like,Unspecified Chinese Government Institution,Government Agency,2.015411e+06,,
1,Afghanistan,2017,,Development,Grant,Yes,EDUCATION,ODA-like,Unspecified Chinese Government Institution,Government Agency,,,
2,Afghanistan,2017,2017.0,Development,Grant,Yes,EMERGENCY RESPONSE,ODA-like,Unspecified Chinese Government Institution,Government Agency,1.000000e+06,,
3,Afghanistan,2017,2017.0,Development,Grant,Yes,EDUCATION,ODA-like,China Ministry of Commerce,Government Agency,2.663728e+06,,
4,Afghanistan,2017,,Representational,Grant,Yes,EDUCATION,OOF-like,Chinese Embassy,Government Agency,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10844,Zambia,2000,2000.0,Development,Grant,Yes,GOVERNMENT AND CIVIL SOCIETY,ODA-like,Unspecified Chinese Government Institution,Government Agency,2.690830e+05,,
10845,Zimbabwe,2000,2000.0,Development,Grant,Yes,GOVERNMENT AND CIVIL SOCIETY,ODA-like,Unspecified Chinese Government Institution,Government Agency,1.345415e+06,,
10846,Zimbabwe,2000,,Mixed,Loan,Yes,"INDUSTRY, MINING, CONSTRUCTION",OOF-like,Export-Import Bank of China,State-owned Policy Bank,1.296980e+07,3.0,
10847,Zimbabwe,2000,2000.0,Development,Free-standing technical assistance,Yes,HEALTH,ODA-like,Unspecified Chinese Government Institution,Government Agency,,,


In [510]:
#removing rows without implementation start year
bri_data = bri_data[bri_data["Implementation Start Year"].notnull()]

In [511]:
#converting the implementation start year to integer
bri_data = bri_data.astype({"Implementation Start Year": int})

In [512]:
#dropping commitment year
bri_data.drop("Commitment Year", axis = 1, inplace = True)

In [513]:
#renaming the columns
bri_data.rename(columns = {"Recipient": "Country_Code", "Implementation Start Year" : "Year"}, inplace = True)

In [514]:
#converting country names to ISO-2
bri_data.replace(name_to_alpha_2, inplace = True)

In [515]:
#removing non countries
bri_data = bri_data[bri_data["Country_Code"].isin(alpha_2_to_name.keys())]

In [516]:
#setting country and year as indices


In [517]:
#filling na values in collateralized/securitized with "No"
bri_data["Collateralized/Securitized"].fillna("No", inplace = True)

In [518]:
#seperating into categorical and quantitative data
bri_data_categorical = bri_data[["Country_Code", "Year", "Intent", "Flow Type", "Concessional", "Sector Name", "Flow Class", "Funding Agencies", "Funding Agencies Type", "Collateralized/Securitized"]]
bri_data_quantitative = bri_data[["Country_Code", "Year","Amount (Constant USD2017)", "Interest Rate"]]

In [519]:
#converting entires in Funding Agencies and Funding Agencies Type to lists
bri_data_categorical["Funding Agencies"] = bri_data_categorical["Funding Agencies"].str.split("|")
bri_data_categorical["Funding Agencies Type"] = bri_data_categorical["Funding Agencies Type"].str.split("|")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bri_data_categorical["Funding Agencies"] = bri_data_categorical["Funding Agencies"].str.split("|")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bri_data_categorical["Funding Agencies Type"] = bri_data_categorical["Funding Agencies Type"].str.split("|")


In [520]:
#getting rid of the duplicates in each list
bri_data_categorical["Funding Agencies"] = bri_data_categorical["Funding Agencies"].apply(lambda x: list(set(x)))
bri_data_categorical["Funding Agencies Type"] = bri_data_categorical["Funding Agencies Type"].apply(lambda x: list(set(x)))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bri_data_categorical["Funding Agencies"] = bri_data_categorical["Funding Agencies"].apply(lambda x: list(set(x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bri_data_categorical["Funding Agencies Type"] = bri_data_categorical["Funding Agencies Type"].apply(lambda x: list(set(x)))


In [521]:
#exploding out the lists
bri_data_categorical = bri_data_categorical.explode("Funding Agencies")
bri_data_categorical = bri_data_categorical.explode("Funding Agencies Type")

In [522]:
#setting values to nan for duplicant entries for non selected columns
duplicates = bri_data_categorical.index.duplicated(keep = "first")

for column in bri_data_categorical.columns:
    if column not in["Country_Code", "Year", "Funding Agencies", "Funding Agencies Type"]:
        bri_data_categorical.loc[duplicates, column] = np.nan

In [523]:
bri_data_categorical

Unnamed: 0,Country_Code,Year,Intent,Flow Type,Concessional,Sector Name,Flow Class,Funding Agencies,Funding Agencies Type,Collateralized/Securitized
0,AF,2017,Development,Grant,Yes,EMERGENCY RESPONSE,ODA-like,Unspecified Chinese Government Institution,Government Agency,No
2,AF,2017,Development,Grant,Yes,EMERGENCY RESPONSE,ODA-like,Unspecified Chinese Government Institution,Government Agency,No
3,AF,2017,Development,Grant,Yes,EDUCATION,ODA-like,China Ministry of Commerce,Government Agency,No
6,AF,2017,Development,Grant,Yes,EMERGENCY RESPONSE,ODA-like,Unspecified Chinese Government Institution,Government Agency,No
9,AF,2017,Development,Grant,Yes,EDUCATION,ODA-like,Unspecified Chinese Government Institution,Government Agency,No
...,...,...,...,...,...,...,...,...,...,...
10838,VN,2000,Development,Grant,Yes,"INDUSTRY, MINING, CONSTRUCTION",ODA-like,China Ministry of Commerce,Government Agency,No
10839,VN,2000,Development,Loan,Yes,"INDUSTRY, MINING, CONSTRUCTION",ODA-like,China Ministry of Commerce,Government Agency,No
10844,ZM,2000,Development,Grant,Yes,GOVERNMENT AND CIVIL SOCIETY,ODA-like,Unspecified Chinese Government Institution,Government Agency,No
10845,ZW,2000,Development,Grant,Yes,GOVERNMENT AND CIVIL SOCIETY,ODA-like,Unspecified Chinese Government Institution,Government Agency,No


In [524]:
#encoding the categorical data
bri_data_categorical = pd.get_dummies(bri_data_categorical, prefix_sep=": ", columns=
                                      ["Intent", "Flow Type", "Concessional", 
                                       "Sector Name", "Flow Class", "Funding Agencies",
                                         "Funding Agencies Type", "Collateralized/Securitized"])


In [525]:
#converting bools into integers
convert = [col for col in bri_data_categorical.columns if col not in ["Country_Code", "Year"]]
bri_data_categorical[convert] = bri_data_categorical[convert].astype(int)

In [526]:
#grouping the data by country and year
bri_data_categorical = bri_data_categorical.groupby(["Country_Code", "Year"]).sum()

In [527]:
bri_data_categorical

Unnamed: 0_level_0,Unnamed: 1_level_0,Intent: Commercial,Intent: Development,Intent: Mixed,Intent: Representational,Flow Type: Debt forgiveness,Flow Type: Debt rescheduling,Flow Type: Export Buyer's Credit,Flow Type: Free-standing technical assistance,Flow Type: Grant,Flow Type: Loan,...,Funding Agencies: Zhejiang Provincial Government,Funding Agencies: Zhejiang Provincial Health Department,Funding Agencies Type: Government Agency,Funding Agencies Type: State-owned Bank,Funding Agencies Type: State-owned Commercial Bank,Funding Agencies Type: State-owned Company,Funding Agencies Type: State-owned Fund,Funding Agencies Type: State-owned Policy Bank,Collateralized/Securitized: No,Collateralized/Securitized: Yes
Country_Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AF,2000,0,1,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,1,0
AF,2001,0,1,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,1,0
AF,2004,0,3,0,0,1,0,0,0,2,0,...,0,0,3,0,0,0,0,0,3,0
AF,2005,0,1,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,1,0
AF,2006,0,1,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZW,2015,0,7,0,0,0,0,0,1,6,0,...,0,0,7,0,0,0,0,0,7,0
ZW,2016,0,3,0,1,0,0,0,0,3,0,...,0,0,3,0,0,1,0,0,4,0
ZW,2017,0,3,0,0,0,0,0,1,2,0,...,0,0,3,0,0,0,0,0,3,0
ZW,2018,0,3,1,0,0,0,1,0,3,0,...,0,0,3,0,0,0,0,1,4,0


In [528]:
#grouping the quantitative data by country and year and summing the values
bri_data_quantitative_sum = bri_data_quantitative.groupby(["Country_Code", "Year"]).sum()

In [529]:
#renaming the columns to include sum in the title
bri_data_quantitative_sum.columns = ["Sum: " + col for col in bri_data_quantitative_sum.columns]

In [530]:
#grouping teh quantitative data by country and year and taking the mean of the values
bri_data_quantitative_mean = bri_data_quantitative.groupby(["Country_Code", "Year"]).mean()

In [531]:
#renaming the columns to include mean in the title
bri_data_quantitative_mean.columns = ["Mean: " + col for col in bri_data_quantitative_mean.columns]

In [532]:
#combining the two quantitative dataframes
bri_data_quantitative = pd.concat([bri_data_quantitative_sum, bri_data_quantitative_mean], axis = 1)

In [533]:
#dropping sum of interest rate
bri_data_quantitative.drop("Sum: Interest Rate", axis = 1, inplace = True)

In [534]:
#combining the categorical and quantitative data
bri_data = pd.concat([bri_data_categorical, bri_data_quantitative], axis = 1)

In [535]:
bri_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Intent: Commercial,Intent: Development,Intent: Mixed,Intent: Representational,Flow Type: Debt forgiveness,Flow Type: Debt rescheduling,Flow Type: Export Buyer's Credit,Flow Type: Free-standing technical assistance,Flow Type: Grant,Flow Type: Loan,...,Funding Agencies Type: State-owned Bank,Funding Agencies Type: State-owned Commercial Bank,Funding Agencies Type: State-owned Company,Funding Agencies Type: State-owned Fund,Funding Agencies Type: State-owned Policy Bank,Collateralized/Securitized: No,Collateralized/Securitized: Yes,Sum: Amount (Constant USD2017),Mean: Amount (Constant USD2017),Mean: Interest Rate
Country_Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AF,2000,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,1.113801e+06,1.113801e+06,
AF,2001,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,2.636852e+05,2.636852e+05,
AF,2004,0,3,0,0,1,0,0,0,2,0,...,0,0,0,0,0,3,0,6.046799e+07,2.015600e+07,
AF,2005,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0.000000e+00,,
AF,2006,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,4.708864e+07,4.708864e+07,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZW,2015,0,7,0,0,0,0,0,1,6,0,...,0,0,0,0,0,7,0,1.148534e+07,3.828447e+06,
ZW,2016,0,3,0,1,0,0,0,0,3,0,...,0,0,1,0,0,4,0,3.878235e+07,1.292745e+07,
ZW,2017,0,3,0,0,0,0,0,1,2,0,...,0,0,0,0,0,3,0,1.500000e+06,7.500000e+05,
ZW,2018,0,3,1,0,0,0,1,0,3,0,...,0,0,0,0,1,4,0,1.040085e+09,3.466950e+08,2.0


In [536]:
#reseting index
bri_data.reset_index(inplace = True)

  bri_data.reset_index(inplace = True)
  bri_data.reset_index(inplace = True)


In [537]:
#converting alpha-2 to alpha-3
bri_data["Country_Code"] = bri_data["Country_Code"].replace(alpha_2_to_alpha_3)

In [538]:
#loading in the previous dataset
loan_debt_data = pd.read_csv("loan_debt_data.csv")

In [539]:
loan_debt_data

Unnamed: 0,Country_ID,Country_Code,Country,Year,Central Govt Debt (Percentage of GDP),General Govt Debt (Percentage of GDP),Grant,Loan,Vague,Commercial,...,e_democ,e_p_polity,e_gdp,e_pop,gdp_per_cap,CPI,Peaceful Count,Intervention Count,Excessive Count,Total Count
0,1,ARE,United Arab Emirates,2005,,4.481497,0.0,0.0,0.0,0.0,...,0.0,-8.0,44675.582,502.176,88.963993,,0.0,0.0,0.0,0.0
1,2,AFG,Afghanistan,2005,206.356007,,1.0,0.0,0.0,0.0,...,-66.0,-66.0,2912.684,2746.122,1.060654,,0.0,0.0,0.0,0.0
2,3,ATG,Antigua and Barbuda,2005,92.614383,,1.0,1.0,0.0,0.0,...,,,,,,,0.0,0.0,0.0,0.0
3,4,ALB,Albania,2005,58.199553,58.203359,1.0,0.0,0.0,0.0,...,9.0,9.0,2140.925,321.135,6.666745,,0.0,0.0,0.0,0.0
4,5,ARM,Armenia,2005,20.467167,,0.0,0.0,0.0,0.0,...,5.0,5.0,2085.719,315.380,6.613352,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3259,188,WSM,Samoa,2021,46.300071,,1.0,0.0,0.0,0.0,...,,,,,,,0.0,0.0,0.0,0.0
3260,189,YEM,Yemen,2021,85.083023,85.083023,0.0,0.0,0.0,0.0,...,,,,,,16.0,0.0,0.0,0.0,0.0
3261,190,ZAF,South Africa,2021,68.978427,,3.0,1.0,0.0,1.0,...,,,,,,44.0,0.0,0.0,0.0,0.0
3262,191,ZMB,Zambia,2021,110.789739,,4.0,1.0,0.0,0.0,...,,,,,,33.0,0.0,0.0,0.0,0.0


In [540]:
loan_debt_data.drop(["Country_ID", "Country"], axis = 1, inplace = True)

In [541]:
loan_debt_data  

Unnamed: 0,Country_Code,Year,Central Govt Debt (Percentage of GDP),General Govt Debt (Percentage of GDP),Grant,Loan,Vague,Commercial,Development,Mixed,...,e_democ,e_p_polity,e_gdp,e_pop,gdp_per_cap,CPI,Peaceful Count,Intervention Count,Excessive Count,Total Count
0,ARE,2005,,4.481497,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-8.0,44675.582,502.176,88.963993,,0.0,0.0,0.0,0.0
1,AFG,2005,206.356007,,1.0,0.0,0.0,0.0,1.0,0.0,...,-66.0,-66.0,2912.684,2746.122,1.060654,,0.0,0.0,0.0,0.0
2,ATG,2005,92.614383,,1.0,1.0,0.0,0.0,2.0,0.0,...,,,,,,,0.0,0.0,0.0,0.0
3,ALB,2005,58.199553,58.203359,1.0,0.0,0.0,0.0,1.0,0.0,...,9.0,9.0,2140.925,321.135,6.666745,,0.0,0.0,0.0,0.0
4,ARM,2005,20.467167,,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,5.0,2085.719,315.380,6.613352,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3259,WSM,2021,46.300071,,1.0,0.0,0.0,0.0,1.0,0.0,...,,,,,,,0.0,0.0,0.0,0.0
3260,YEM,2021,85.083023,85.083023,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,16.0,0.0,0.0,0.0,0.0
3261,ZAF,2021,68.978427,,3.0,1.0,0.0,1.0,3.0,0.0,...,,,,,,44.0,0.0,0.0,0.0,0.0
3262,ZMB,2021,110.789739,,4.0,1.0,0.0,0.0,5.0,0.0,...,,,,,,33.0,0.0,0.0,0.0,0.0


In [542]:
#dropping old loan information
drop_columns  = loan_debt_data.columns[4:131]
loan_debt_data.drop(drop_columns, axis = 1, inplace = True)


In [543]:
#setting indices to both dfs to country code and year
bri_data.set_index(["Country_Code", "Year"], inplace = True)
loan_debt_data.set_index(["Country_Code", "Year"], inplace = True)

In [544]:
#sorting the dataframes
loan_debt_data.sort_values(by = ["Country_Code", "Year"], inplace = True)

In [545]:
#combining the two dataframes
bri_data = pd.concat([bri_data, loan_debt_data], axis = 1)

In [546]:
#moving debt columns to begining
debt_columns = ["Central Govt Debt (Percentage of GDP)", "General Govt Debt (Percentage of GDP)"]
remaining_columns = [col for col in bri_data.columns if col not in debt_columns]
loan_debt_data = bri_data[debt_columns + remaining_columns]

In [547]:
loan_debt_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Central Govt Debt (Percentage of GDP),General Govt Debt (Percentage of GDP),Intent: Commercial,Intent: Development,Intent: Mixed,Intent: Representational,Flow Type: Debt forgiveness,Flow Type: Debt rescheduling,Flow Type: Export Buyer's Credit,Flow Type: Free-standing technical assistance,...,Mean: Amount (Constant USD2017),Mean: Interest Rate,e_gdp,e_pop,gdp_per_cap,CPI,Peaceful Count,Intervention Count,Excessive Count,Total Count
Country_Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AFG,2000,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.113801e+06,,,,,,,,,
AFG,2001,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.636852e+05,,,,,,,,,
AFG,2004,,,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.015600e+07,,,,,,,,,
AFG,2005,206.356007,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,2912.684,2746.122,1.060654,,0.0,0.0,0.0,0.0
AFG,2006,22.984644,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.708864e+07,,3182.829,2820.303,1.128542,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZAF,2021,68.978427,,,,,,,,,,...,,,,,,44.0,0.0,0.0,0.0,0.0
ZMB,2020,140.210618,,,,,,,,,,...,,,,,,33.0,5.0,1.0,2.0,8.0
ZMB,2021,110.789739,,,,,,,,,,...,,,,,,33.0,0.0,0.0,0.0,0.0
ZWE,2020,84.447716,,,,,,,,,,...,,,,,,24.0,35.0,17.0,0.0,0.0


In [548]:
bri_data = loan_debt_data

In [550]:
bri_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Central Govt Debt (Percentage of GDP),General Govt Debt (Percentage of GDP),Intent: Commercial,Intent: Development,Intent: Mixed,Intent: Representational,Flow Type: Debt forgiveness,Flow Type: Debt rescheduling,Flow Type: Export Buyer's Credit,Flow Type: Free-standing technical assistance,...,Mean: Amount (Constant USD2017),Mean: Interest Rate,e_gdp,e_pop,gdp_per_cap,CPI,Peaceful Count,Intervention Count,Excessive Count,Total Count
Country_Code,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AFG,2000,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.113801e+06,,,,,,,,,
AFG,2001,,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.636852e+05,,,,,,,,,
AFG,2004,,,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.015600e+07,,,,,,,,,
AFG,2005,206.356007,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,2912.684,2746.122,1.060654,,0.0,0.0,0.0,0.0
AFG,2006,22.984644,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.708864e+07,,3182.829,2820.303,1.128542,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZAF,2021,68.978427,,,,,,,,,,...,,,,,,44.0,0.0,0.0,0.0,0.0
ZMB,2020,140.210618,,,,,,,,,,...,,,,,,33.0,5.0,1.0,2.0,8.0
ZMB,2021,110.789739,,,,,,,,,,...,,,,,,33.0,0.0,0.0,0.0,0.0
ZWE,2020,84.447716,,,,,,,,,,...,,,,,,24.0,35.0,17.0,0.0,0.0


In [551]:
bri_data.reset_index(inplace = True)

  bri_data.reset_index(inplace = True)
  bri_data.reset_index(inplace = True)


In [552]:
bri_data

Unnamed: 0,Country_Code,Year,Central Govt Debt (Percentage of GDP),General Govt Debt (Percentage of GDP),Intent: Commercial,Intent: Development,Intent: Mixed,Intent: Representational,Flow Type: Debt forgiveness,Flow Type: Debt rescheduling,...,Mean: Amount (Constant USD2017),Mean: Interest Rate,e_gdp,e_pop,gdp_per_cap,CPI,Peaceful Count,Intervention Count,Excessive Count,Total Count
0,AFG,2000,,,0.0,1.0,0.0,0.0,0.0,0.0,...,1.113801e+06,,,,,,,,,
1,AFG,2001,,,0.0,1.0,0.0,0.0,0.0,0.0,...,2.636852e+05,,,,,,,,,
2,AFG,2004,,,0.0,3.0,0.0,0.0,1.0,0.0,...,2.015600e+07,,,,,,,,,
3,AFG,2005,206.356007,,0.0,1.0,0.0,0.0,0.0,0.0,...,,,2912.684,2746.122,1.060654,,0.0,0.0,0.0,0.0
4,AFG,2006,22.984644,,0.0,1.0,0.0,0.0,0.0,0.0,...,4.708864e+07,,3182.829,2820.303,1.128542,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3538,ZAF,2021,68.978427,,,,,,,,...,,,,,,44.0,0.0,0.0,0.0,0.0
3539,ZMB,2020,140.210618,,,,,,,,...,,,,,,33.0,5.0,1.0,2.0,8.0
3540,ZMB,2021,110.789739,,,,,,,,...,,,,,,33.0,0.0,0.0,0.0,0.0
3541,ZWE,2020,84.447716,,,,,,,,...,,,,,,24.0,35.0,17.0,0.0,0.0


In [562]:
#creating country_id column
id_dict = {}

codes = bri_data["Country_Code"].unique()

for i in range(len(codes)):
    id_dict[codes[i]] = i

ids = bri_data["Country_Code"].map(id_dict)
   
bri_data.insert(0, "Country_ID", ids)



  bri_data.insert(0, "Country_ID", ids)


In [564]:
#creating country name column

bri_data.insert(1, "Country", bri_data["Country_Code"].replace(alpha_3_to_alpha_2).replace(alpha_2_to_name))

  bri_data.insert(1, "Country", bri_data["Country_Code"].replace(alpha_3_to_alpha_2).replace(alpha_2_to_name))


In [565]:
bri_data

Unnamed: 0,Country_ID,Country,Country_Code,Year,Central Govt Debt (Percentage of GDP),General Govt Debt (Percentage of GDP),Intent: Commercial,Intent: Development,Intent: Mixed,Intent: Representational,...,Mean: Amount (Constant USD2017),Mean: Interest Rate,e_gdp,e_pop,gdp_per_cap,CPI,Peaceful Count,Intervention Count,Excessive Count,Total Count
0,0,Afghanistan,AFG,2000,,,0.0,1.0,0.0,0.0,...,1.113801e+06,,,,,,,,,
1,0,Afghanistan,AFG,2001,,,0.0,1.0,0.0,0.0,...,2.636852e+05,,,,,,,,,
2,0,Afghanistan,AFG,2004,,,0.0,3.0,0.0,0.0,...,2.015600e+07,,,,,,,,,
3,0,Afghanistan,AFG,2005,206.356007,,0.0,1.0,0.0,0.0,...,,,2912.684,2746.122,1.060654,,0.0,0.0,0.0,0.0
4,0,Afghanistan,AFG,2006,22.984644,,0.0,1.0,0.0,0.0,...,4.708864e+07,,3182.829,2820.303,1.128542,,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3538,133,South Africa,ZAF,2021,68.978427,,,,,,...,,,,,,44.0,0.0,0.0,0.0,0.0
3539,134,Zambia,ZMB,2020,140.210618,,,,,,...,,,,,,33.0,5.0,1.0,2.0,8.0
3540,134,Zambia,ZMB,2021,110.789739,,,,,,...,,,,,,33.0,0.0,0.0,0.0,0.0
3541,135,Zimbabwe,ZWE,2020,84.447716,,,,,,...,,,,,,24.0,35.0,17.0,0.0,0.0


In [566]:
#saving the data
bri_data.to_csv("bri_data.csv", index = False)