## Loading the data 

We start by importing the relevant modules and loading the data from the dataset `SambnisImp.csv` into a Pandas dataframe `df`. This dataset contains all the relevant variables for our analysis.

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import more_itertools as mit
import csv

In [3]:
#Load the data into a dataframe
df = pd.read_csv('SambnisImp.csv', header=0, sep=',', error_bad_lines=False)

#display the head of the dataframe
df.head(10)

Unnamed: 0.1,Unnamed: 0,atwards,X,id,cid,cowcode,year,warstds,ptime,yrint,...,decade1,decade2,decade3,decade4,independ,tip,anocracy,proxregc,sxpnew.2,sxpsq.2
0,1,0,1,1.0,1,700,1945,0,12,0,...,0,0,0,0,1,17.0,0,0.1432991,0.094095,0.094095
1,2,0,2,1.0,1,700,1946,0,24,1,...,0,0,0,0,1,18.0,0,1.0,0.094547,0.094547
2,3,0,3,1.0,1,700,1947,0,36,2,...,0,0,0,0,1,19.0,0,1.0,0.095567,0.095567
3,4,0,4,1.0,1,700,1948,0,48,3,...,0,0,0,0,1,20.0,0,1.0,0.101303,0.101303
4,5,0,5,1.0,1,700,1949,0,60,4,...,0,0,0,0,1,21.0,0,1.0,0.092107,0.092107
5,6,0,6,1.0,1,700,1950,0,72,5,...,0,0,0,0,1,22.0,0,1.0,0.090396,0.090396
6,7,0,7,1.0,1,700,1951,0,84,6,...,0,0,0,0,1,23.0,0,7.890000000000001e-31,0.094163,0.094163
7,8,0,8,1.0,1,700,1952,0,96,7,...,0,0,0,0,1,24.0,0,1.9700000000000002e-31,0.09254,0.09254
8,9,0,9,1.0,1,700,1953,0,108,8,...,0,0,0,0,1,25.0,0,4.93e-32,0.090118,0.090118
9,10,0,10,1.0,1,700,1954,0,120,9,...,0,0,0,0,1,26.0,0,1.23e-32,0.088193,0.088193


In [4]:
#Select the years with a civil war
df_withwar = df[df["atwards"]==1]

At this point, we want to create a dataframe which only has certain features. This dataframe ```DF``` only considers countries that had a civil war, and we consider all civil wars experienced by each country. For each civil war, we will include two entries: one before the war and one after. We compute the average GDP growth rate in the five years immediately before the war and immediately after. 

In [5]:
new_data_list = []
onewarcounting = 0
#We do a for loop of all countries that experienced a civil war
for i in df_withwar["cid"].drop_duplicates():
    #find the list of years at which the country was at war
    years = list( df_withwar[df_withwar["cid"] == i]["year"]   )
    #find the different civil wars that occurred within each country
    wars_years = [list(group) for group in mit.consecutive_groups(years)]
    #compute the total number of wars for each country
    num_wars = len(wars_years)
    
    #obtain geographical data from every country
    geo_data = list(np.array(df_withwar[df_withwar["cid"]==i][["geo1","geo2","geo8","geo34","geo57","geo69"]])[0]) 
    country_info = [num_wars] + geo_data
    country_info_cols = ["num_wars", "geo1","geo2","geo8","geo34","geo57","geo69"]
    
    #this is a list of all years for which data are recorded for a country. It is necessary for later steps.
    list_all_country_years =  list(df[(df["cid"]==i)]["year"].drop_duplicates())
    
    #We do a for loop for all the wars that a country experienced
    for n in range(num_wars):
        year1 = wars_years[n][0]
        yearF = wars_years[n][-1]
        lenwar = len(wars_years[n])
        
        
        #Do not consider wars that occurred less than 5 years between each other
        if n > 0:
            if wars_years[n][0] - wars_years[n-1][-1] <5: 
                break
        if num_wars > 1 and n < num_wars-1:
            if wars_years[n+1][0] - wars_years[n][-1] <5:
                break
  
        if (year1 == list_all_country_years[0]) or (yearF == list_all_country_years[-1]):
            break
        
        
        years_before = list(range(year1-5, year1))
        if (set(years_before) <= set (list_all_country_years)) == False:
            years_before = list(range(list_all_country_years[0], year1))
        
        sumgrowth = 0
        sumnmgrowth = 0
        for year_b in years_before:
            sumgrowth += (df[(df["cid"]==i) & (df["year"] == year_b)]["gdpgrowth"]).iloc[0]
            sumnmgrowth += (df[(df["cid"]==i) & (df["year"] == year_b)]["nmdgdp"]).iloc[0]
        avggrowth_before = sumgrowth/5
        nei_gdp_b = sumnmgrowth/5
        
        before_war = [0,0, avggrowth_before, nei_gdp_b]
        
        years_after = list(range(yearF+1, yearF+6,))
        if (set(years_after) <= set (list_all_country_years)) == False:
            years_after = list(range(yearF+1, list_all_country_years[-1]+1))
        
        sumgrowth = 0
        sumnmgrowth = 0
        for year_a in years_after:
            sumgrowth += (df[(df["cid"]==i) & (df["year"] == year_a)]["gdpgrowth"]).iloc[0]
            sumnmgrowth += (df[(df["cid"]==i) & (df["year"] == year_b)]["nmdgdp"]).iloc[0]
        avggrowth_after = sumgrowth/5
        neig_gdp_a = sumnmgrowth/5
        
        after_war= [1,lenwar, avggrowth_after,neig_gdp_a]

        
        war_cols = ["cid", "year1", "post_war", "war_length", "avg_growth","avg_neigh_gdp"]
        to_append = ([i, year1])
        new_data_list.append(to_append + before_war + country_info)
        new_data_list.append(to_append + after_war + country_info)
        
DF = pd.DataFrame(new_data_list, columns =  war_cols + country_info_cols)
DF

Unnamed: 0,cid,year1,post_war,war_length,avg_growth,avg_neigh_gdp,num_wars,geo1,geo2,geo8,geo34,geo57,geo69
0,5,1955,0,0,-0.003514,0.602715,1,0.0,0.0,0.0,0.0,0.0,1.0
1,5,1955,1,1,-0.003136,0.622822,1,0.0,0.0,0.0,0.0,0.0,1.0
2,12,1973,0,0,0.009120,-0.018270,1,0.0,0.0,0.0,0.0,1.0,0.0
3,12,1973,1,22,0.031957,-0.555341,1,0.0,0.0,0.0,0.0,1.0,0.0
4,19,1952,0,0,0.037212,0.535453,1,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,169,1998,1,2,0.004695,0.248099,2,0.0,1.0,0.0,0.0,0.0,0.0
112,172,1972,0,0,0.072376,0.324902,2,0.0,0.0,1.0,0.0,0.0,0.0
113,172,1972,1,8,0.020115,0.401953,2,0.0,0.0,1.0,0.0,0.0,0.0
114,172,1984,0,0,0.031335,0.442547,2,0.0,0.0,1.0,0.0,0.0,0.0


## Econometric Analysis

In [7]:
#Import the relevant modules
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import math
import statsmodels.api as sm 

### Part I: Effect of the civil war on growth

At this point, we want to see the impact that the civil war had on the GDP growth of countries that experienced a civil war. In order to do so, we carry a regression of the gdp growth based on whether or not it has already experienced a civil war, as well as geographical factor, the length of the war, the number of civil wars that it experienced and gdp of neighboring countries, which could all be confounding factors.
The estimators that we use for this linear regression are GLS estimators, which account for correlation of the features (which are quite possibly correlated between each other)

In [9]:
#defining the dependent and independent variables
country_info_cols = ["num_wars","geo2","geo8","geo34","geo57","geo69"]

regressand = DF["avg_growth"]
regressor = sm.add_constant(DF[["post_war","war_length", "avg_neigh_gdp"]+country_info_cols])

# fitting the logistic regression model
reg = sm.GLS(regressand, regressor).fit() 

#showing the summary of the regression
print(reg.summary()) 


                            GLS Regression Results                            
Dep. Variable:             avg_growth   R-squared:                       0.099
Model:                            GLS   Adj. R-squared:                  0.023
Method:                 Least Squares   F-statistic:                     1.296
Date:                Thu, 17 Dec 2020   Prob (F-statistic):              0.248
Time:                        23:12:04   Log-Likelihood:                 229.93
No. Observations:                 116   AIC:                            -439.9
Df Residuals:                     106   BIC:                            -412.3
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0149      0.027      0.544

As a result of this regression, we can coefficient associated to whether the civil war has occurred or not is 0.0162. The p-value associated to this coefficient is 0.047, meaning that there is a 4.7% chance that these data occurred randomly (i.e. the effect of the variable `post_war` is zero). This is below the critical value of $\alpha = 0.05$, and therefore we accept the result, and we consider the effect of the civil war as statistically significative. This means that the gdp growth after a civil war tends to be higher than before the civil war.
This result can be explained by the fact that during the civil war, a great destruction and reduction in GDP is generally experienced by country. Hence, the post-war usually sees a reconstruction effort which implies a high GDP growth rate, as the country has to reach back its original level of income. 

## Panel Data Methods to understand 

In [10]:
from linearmodels.panel import PanelOLS, RandomEffects, compare


In order to understand the potential significance of other variables in our models on gdp growth, we consider panel data models to see whether they do have an effect on growth.
We begin with a linear model with Fixed Effects estimation.

In [14]:
DF_panel = DF.set_index(["cid","post_war"])

country_info_cols = ["num_wars","geo2","geo8","geo34","geo57","geo69"]
regressand = DF_panel["avg_growth"]
regressor = sm.add_constant(DF_panel[["war_length", "avg_neigh_gdp"]+country_info_cols])


In [15]:

FE_Model = PanelOLS(regressand, regressor)
FE_res = FE_Model.fit()
print(FE_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:             avg_growth   R-squared:                        0.0648
Estimator:                   PanelOLS   R-squared (Between):              0.1326
No. Observations:                 116   R-squared (Within):              -0.0267
Date:                Thu, Dec 17 2020   R-squared (Overall):              0.0648
Time:                        23:31:50   Log-likelihood                    227.76
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      0.9273
Entities:                          49   P-value                           0.4971
Avg Obs:                       2.3673   Distribution:                   F(8,107)
Min Obs:                       2.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             0.9273
                            

From these results, we can observe that non of the variables is statistically significant. Hence, in the previous part, the growth in GDP is mainly accountable to the

We now carry another regression similar to the previous one, but this time using Random Effect estimators. Random Effects estimator is more efficient 

In [16]:
RE_Model = RandomEffects(regressand, regressor)
RE_res = RE_Model.fit()
print(RE_res)

                        RandomEffects Estimation Summary                        
Dep. Variable:             avg_growth   R-squared:                        0.0443
Estimator:              RandomEffects   R-squared (Between):              0.1215
No. Observations:                 116   R-squared (Within):              -0.0141
Date:                Thu, Dec 17 2020   R-squared (Overall):              0.0634
Time:                        23:31:54   Log-likelihood                    235.45
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      0.6197
Entities:                          49   P-value                           0.7596
Avg Obs:                       2.3673   Distribution:                   F(8,107)
Min Obs:                       2.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             0.7003
                            

The results that we obtained with Random Effects are similar to those with Fixed Effects. Hence, we can say that the relevant variables do not have an effect on growth, regardless of wheter civil war has occurred or not.