In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import datetime


from tqdm import tqdm
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import train_test_split, learning_curve, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
from sklearn.metrics import confusion_matrix as cf_matrix

In [13]:
# df = pd.read_json('full_reg_df.json')

In [14]:
# df.to_csv('full_reg_df_georg.csv')

In [2]:
corp_list = ['aut', 'czc', 'den', 'deu', 'eng', 'esp', 'nld', 'nzl', 'swe']
topic_list = ['soc', 'rel', 'eth', 'urb', 'pos', 'for']

In [16]:
reg_df = pd.DataFrame()

for corp in tqdm(corp_list):
    temp_df = pd.read_json(corp + '_reg_df.json')
    temp_df['corp'] = corp
    temp_df['year_tot_dist'] = None
    temp_df['year_left_dist'] = None
    
    temp_party_df = pd.read_json(corp + '_party_df.json')
    temp_party_df['year'] = pd.DatetimeIndex(temp_party_df['date']).year
    temp_party_df = temp_party_df.loc[temp_party_df['speaker'].notna()]
    
    for year in temp_party_df['year'].unique():
        temp_df.loc[temp_df['Year'] == year, 'year_tot_dist'] = len(temp_party_df.loc[temp_party_df['year'] == year])
        temp_df.loc[temp_df['Year'] == year, 'year_nonleft_dist'] = len(temp_party_df.loc[(temp_party_df['year'] == year) & (temp_party_df['left'] == 1)])
        temp_df.loc[temp_df['Year'] == year, 'year_left_dist'] = len(temp_party_df.loc[(temp_party_df['year'] == year) & (temp_party_df['left'] == 1)])
    
    for topic in topic_list:
        temp_df = temp_df.rename(columns = {topic + ' Polarisation': topic + '_polarisation'})
        temp_df = temp_df.rename(columns = {topic + ' Total Distribution': topic + '_tot_dist'})
        temp_df = temp_df.rename(columns = {topic + ' Left Distribution': topic + '_left_dist'})         
    
    reg_df = reg_df.append(temp_df)
    
reg_df.reset_index().to_json('full_reg_df.json')

  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
  reg_df = reg_df.append(temp_df)
100%|█████████████████████████████████████████████| 9/9 [06:03<00:00, 40.35s/it]


In [3]:
df = pd.read_json('full_reg_df.json')

In [8]:
df

Unnamed: 0,index,Year,Inequality,soc_polarisation,rel_polarisation,eth_polarisation,urb_polarisation,pos_polarisation,for_polarisation,soc_tot_dist,...,pos_tot_dist,pos_left_dist,for_tot_dist,for_left_dist,corp,year_tot_dist,year_left_dist,year_nonleft_dist,p0p10_abs,p0p10
0,16,1996,0.3392,0.780303,0.655172,0.545455,0.766667,0.608696,0.680000,1227,...,135,52,124,58,aut,3838,1505,1505,,0.37
1,17,1997,0.3436,0.745763,0.600000,0.555556,0.689655,0.578947,0.714286,945,...,98,42,153,63,aut,4287,1624,1624,,0.37
2,18,1998,0.3488,0.763636,0.727273,0.750000,0.791667,0.705882,0.538462,827,...,80,34,171,60,aut,3952,1564,1564,,0.37
3,19,1999,0.3356,0.750000,0.586207,0.375000,0.592593,0.533333,0.421053,587,...,65,28,92,41,aut,3006,1172,1172,,0.39
4,20,2000,0.3491,0.900826,0.805556,0.900000,0.900000,0.923077,0.821429,1057,...,147,62,150,84,aut,4661,2270,2270,,0.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,34,2014,0.3026,0.844720,0.702128,0.625000,0.700000,0.808989,0.750000,1089,...,682,356,133,51,swe,12498,5712,5712,,0.42
231,35,2015,0.3109,0.834586,0.687500,0.500000,0.714286,0.884615,0.625000,1229,...,556,331,163,86,swe,13756,7253,7253,,0.41
232,36,2016,0.2933,0.851852,0.822222,0.790698,0.875000,0.835616,0.766667,1090,...,525,331,181,90,swe,13432,7256,7256,,0.42
233,37,2017,0.3027,0.833333,0.755102,0.738095,0.666667,0.768116,0.720000,1201,...,454,259,149,82,swe,12516,6760,6760,,0.42


In [9]:
for topic in topic_list:
    df[topic + '_tot_dist_prop'] = df[topic + '_tot_dist'] / df['year_tot_dist'] * 100
    df[topic + '_left_dist_prop'] = df[topic + '_left_dist'] / df['year_left_dist'] * 100
    df[topic + '_nonleft_dist_prop'] = (df[topic + '_tot_dist'] - df[topic + '_left_dist']) / (df['year_tot_dist'] - df['year_left_dist']) * 100
    
    df[topic + '_tot_dist_prop'] = df[topic + '_tot_dist_prop'].astype(float)
    df[topic + '_left_dist_prop'] = df[topic + '_left_dist_prop'].astype(float)
    df[topic + '_nonleft_dist_prop'] = df[topic + '_nonleft_dist_prop'].astype(float)

In [10]:
for topic in topic_list:
    df[topic + '_polarisation'] = df[topic + '_polarisation'] * 100

In [11]:
df['Inequality'] = df['Inequality'] * 100

In [12]:
wid_df = pd.read_csv(r"/maps/hdir/gsw508/Thesis//Data/WID_Data_28042023-122557.csv", sep = ';', header = 1)

p0p10_abs_df = pd.DataFrame()

for corp in corp_list:
    if corp == 'aut': 
        temp_df = wid_df[['Year','aptinc_z_AT\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nAustria']]
        temp_df = temp_df.rename(columns = {'aptinc_z_AT\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nAustria': 'p0p10_abs'})

    elif corp == 'czc': 
        temp_df = wid_df[['Year','aptinc_z_CZ\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nCzech Republic']]
        temp_df = temp_df.rename(columns = {'aptinc_z_CZ\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nCzech Republic': 'p0p10_abs'})

    elif corp == 'den': 
        temp_df = wid_df[['Year','aptinc_z_DK\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nDenmark']]
        temp_df = temp_df.rename(columns = {'aptinc_z_DK\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nDenmark': 'p0p10_abs'})

    elif corp == 'deu': 
        temp_df = wid_df[['Year','aptinc_z_DE\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nGermany']]
        temp_df = temp_df.rename(columns = {'aptinc_z_DE\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nGermany': 'p0p10_abs'})

    elif corp == 'eng': 
        temp_df = wid_df[['Year','aptinc_z_GB\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nUnited Kingdom']]
        temp_df = temp_df.rename(columns = {'aptinc_z_GB\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nUnited Kingdom': 'p0p10_abs'})

    elif corp == 'esp': 
        temp_df = wid_df[['Year','aptinc_z_ES\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nSpain']]
        temp_df = temp_df.rename(columns = {'aptinc_z_ES\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nSpain': 'p0p10_abs'})

    elif corp == 'ndl': 
        temp_df = wid_df[['Year','aptinc_z_NL\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nNetherlands']]
        temp_df = temp_df.rename(columns = {'aptinc_z_NL\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nNetherlands': 'p0p10_abs'})

    elif corp == 'nzl':
        temp_df = wid_df[['Year','aptinc_z_NZ\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nNew Zealand']]
        temp_df = temp_df.rename(columns = {'aptinc_z_NZ\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nNew Zealand': 'p0p10_abs'})

    elif corp == 'swe': 
        temp_df = wid_df[['Year','aptinc_z_SE\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nSweden']]
        temp_df = temp_df.rename(columns = {'aptinc_z_SE\nPre-tax national income \nP0-10 | average income or wealth | adults | equal split | Euro € | ppp | constant (2021)\nSweden': 'p0p10_abs'})
    
    temp_df['corp'] = corp
    p0p10_abs_df = p0p10_abs_df.append(temp_df)
    

  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)
  p0p10_abs_df = p0p10_abs_df.append(temp_df)


In [13]:
wid_40_df = pd.read_csv(r"/maps/hdir/gsw508/Thesis//Data/WID_Data_01052023-123555.csv", sep = ';', header = 1)

mid40_df = pd.DataFrame()

for corp in corp_list:
    if corp == 'aut': 
        temp_df = wid_40_df[['Year','sptinc_z_AT\nPre-tax national income \nMiddle 40% | share | adults | equal split\nAustria']]
        temp_df = temp_df.rename(columns = {'sptinc_z_AT\nPre-tax national income \nMiddle 40% | share | adults | equal split\nAustria': 'mid40'})

    elif corp == 'czc': 
        temp_df = wid_40_df[['Year','sptinc_z_CZ\nPre-tax national income \nMiddle 40% | share | adults | equal split\nCzech Republic']]
        temp_df = temp_df.rename(columns = {'sptinc_z_CZ\nPre-tax national income \nMiddle 40% | share | adults | equal split\nCzech Republic': 'mid40'})

    elif corp == 'den': 
        temp_df = wid_40_df[['Year','sptinc_z_DK\nPre-tax national income \nMiddle 40% | share | adults | equal split\nDenmark']]
        temp_df = temp_df.rename(columns = {'sptinc_z_DK\nPre-tax national income \nMiddle 40% | share | adults | equal split\nDenmark': 'mid40'})

    elif corp == 'deu': 
        temp_df = wid_40_df[['Year','sptinc_z_DE\nPre-tax national income \nMiddle 40% | share | adults | equal split\nGermany']]
        temp_df = temp_df.rename(columns = {'sptinc_z_DE\nPre-tax national income \nMiddle 40% | share | adults | equal split\nGermany': 'mid40'})

    elif corp == 'eng': 
        temp_df = wid_40_df[['Year','sptinc_z_GB\nPre-tax national income \nMiddle 40% | share | adults | equal split\nUnited Kingdom']]
        temp_df = temp_df.rename(columns = {'sptinc_z_GB\nPre-tax national income \nMiddle 40% | share | adults | equal split\nUnited Kingdom': 'mid40'})

    elif corp == 'esp': 
        temp_df = wid_40_df[['Year','sptinc_z_ES\nPre-tax national income \nMiddle 40% | share | adults | equal split\nSpain']]
        temp_df = temp_df.rename(columns = {'sptinc_z_ES\nPre-tax national income \nMiddle 40% | share | adults | equal split\nSpain': 'mid40'})

    elif corp == 'ndl': 
        temp_df = wid_40_df[['Year','sptinc_z_NL\nPre-tax national income \nMiddle 40% | share | adults | equal split\nNetherlands']]
        temp_df = temp_df.rename(columns = {'sptinc_z_NL\nPre-tax national income \nMiddle 40% | share | adults | equal split\nNetherlands': 'mid40'})

    elif corp == 'nzl':
        temp_df = wid_40_df[['Year','sptinc_z_NZ\nPre-tax national income \nMiddle 40% | share | adults | equal split\nNew Zealand']]
        temp_df = temp_df.rename(columns = {'sptinc_z_NZ\nPre-tax national income \nMiddle 40% | share | adults | equal split\nNew Zealand': 'mid40'})

    elif corp == 'swe': 
        temp_df = wid_40_df[['Year','sptinc_z_SE\nPre-tax national income \nMiddle 40% | share | adults | equal split\nSweden']]
        temp_df = temp_df.rename(columns = {'sptinc_z_SE\nPre-tax national income \nMiddle 40% | share | adults | equal split\nSweden': 'mid40'})
    
    temp_df['corp'] = corp
#     temp_df['bottom_10_income_growth'] = (temp_df['bottom_50_income'] - temp_df['bottom_10_income'].shift(1))/temp_df['bottom_10_income'].shift(1)*100
    mid40_df = mid40_df.append(temp_df)


  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)
  mid40_df = mid40_df.append(temp_df)


In [14]:
wid_10_df = pd.read_csv(r"/maps/hdir/gsw508/Thesis//Data/WID_Data_01052023-130130.csv", sep = ';', header = 1)

p0p10_df = pd.DataFrame()

for corp in corp_list:
    if corp == 'aut': 
        temp_df = wid_10_df[['Year','sptinc_992_j_AT\nPre-tax national income \nP0-10 | share | adults | equal split\nAustria']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_AT\nPre-tax national income \nP0-10 | share | adults | equal split\nAustria': 'p0p10'})

    elif corp == 'czc': 
        temp_df = wid_10_df[['Year','sptinc_992_j_CZ\nPre-tax national income \nP0-10 | share | adults | equal split\nCzech Republic']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_CZ\nPre-tax national income \nP0-10 | share | adults | equal split\nCzech Republic': 'p0p10'})

    elif corp == 'den': 
        temp_df = wid_10_df[['Year','sptinc_992_j_DK\nPre-tax national income \nP0-10 | share | adults | equal split\nDenmark']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_DK\nPre-tax national income \nP0-10 | share | adults | equal split\nDenmark': 'p0p10'})

    elif corp == 'deu': 
        temp_df = wid_10_df[['Year','sptinc_992_j_DE\nPre-tax national income \nP0-10 | share | adults | equal split\nGermany']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_DE\nPre-tax national income \nP0-10 | share | adults | equal split\nGermany': 'p0p10'})

    elif corp == 'eng': 
        temp_df = wid_10_df[['Year','sptinc_992_j_GB\nPre-tax national income \nP0-10 | share | adults | equal split\nUnited Kingdom']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_GB\nPre-tax national income \nP0-10 | share | adults | equal split\nUnited Kingdom': 'p0p10'})

    elif corp == 'esp': 
        temp_df = wid_10_df[['Year','sptinc_992_j_ES\nPre-tax national income \nP0-10 | share | adults | equal split\nSpain']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_ES\nPre-tax national income \nP0-10 | share | adults | equal split\nSpain': 'p0p10'})

    elif corp == 'ndl': 
        temp_df = wid_10_df[['Year','sptinc_992_j_NL\nPre-tax national income \nP0-10 | share | adults | equal split\nNetherlands']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_NL\nPre-tax national income \nP0-10 | share | adults | equal split\nNetherlands': 'p0p10'})

    elif corp == 'nzl':
        temp_df = wid_10_df[['Year','sptinc_992_j_NZ\nPre-tax national income \nP0-10 | share | adults | equal split\nNew Zealand']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_NZ\nPre-tax national income \nP0-10 | share | adults | equal split\nNew Zealand': 'p0p10'})

    elif corp == 'swe': 
        temp_df = wid_10_df[['Year','sptinc_992_j_SE\nPre-tax national income \nP0-10 | share | adults | equal split\nSweden']]
        temp_df = temp_df.rename(columns = {'sptinc_992_j_SE\nPre-tax national income \nP0-10 | share | adults | equal split\nSweden': 'p0p10'})
    
    temp_df['corp'] = corp
#     temp_df['bottom_10_income_growth'] = (temp_df['bottom_50_income'] - temp_df['bottom_10_income'].shift(1))/temp_df['bottom_10_income'].shift(1)*100
    p0p10_df = p0p10_df.append(temp_df)


  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)
  p0p10_df = p0p10_df.append(temp_df)


In [15]:
df['p0p10_abs'] = None
for corp in corp_list:
    for year in df['Year'].unique():
        df.loc[(df['Year'] == year) & (df['corp'] == corp), 'p0p10_abs'] = float(p0p10_abs_df.loc[(p0p10_abs_df['Year'] == year) & (p0p10_abs_df['corp'] == corp), 'p0p10_abs'].values)

In [16]:
df['p0p10'] = None
for corp in corp_list:
    for year in df['Year'].unique():
        df.loc[(df['Year'] == year) & (df['corp'] == corp), 'p0p10'] = float(p0p10_df.loc[(p0p10_df['Year'] == year) & (p0p10_df['corp'] == corp), 'p0p10'].values*100)

In [17]:
df['mid40'] = None
for corp in corp_list:
    for year in df['Year'].unique():
        df.loc[(df['Year'] == year) & (df['corp'] == corp), 'mid40'] = float(mid40_df.loc[(mid40_df['Year'] == year) & (mid40_df['corp'] == corp), 'mid40'].values[0]*100)

In [18]:
df.columns

Index(['index', 'Year', 'Inequality', 'soc_polarisation', 'rel_polarisation',
       'eth_polarisation', 'urb_polarisation', 'pos_polarisation',
       'for_polarisation', 'soc_tot_dist', 'soc_left_dist', 'rel_tot_dist',
       'rel_left_dist', 'eth_tot_dist', 'eth_left_dist', 'urb_tot_dist',
       'urb_left_dist', 'pos_tot_dist', 'pos_left_dist', 'for_tot_dist',
       'for_left_dist', 'corp', 'year_tot_dist', 'year_left_dist',
       'year_nonleft_dist', 'p0p10_abs', 'p0p10', 'soc_tot_dist_prop',
       'soc_left_dist_prop', 'soc_nonleft_dist_prop', 'rel_tot_dist_prop',
       'rel_left_dist_prop', 'rel_nonleft_dist_prop', 'eth_tot_dist_prop',
       'eth_left_dist_prop', 'eth_nonleft_dist_prop', 'urb_tot_dist_prop',
       'urb_left_dist_prop', 'urb_nonleft_dist_prop', 'pos_tot_dist_prop',
       'pos_left_dist_prop', 'pos_nonleft_dist_prop', 'for_tot_dist_prop',
       'for_left_dist_prop', 'for_nonleft_dist_prop', 'mid40'],
      dtype='object')

In [19]:
left_gov_dict = {'aut': [1996, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016],
                 'czc': [1999, 2000, 2001, 2002, 2003, 2004, 2005],
                 'den': [1997, 1998, 1999, 2000, 2001, 2012, 2013, 2014],
                 'deu': [1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008],
                 'eng': [1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009],
                 'esp': [2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2018],
                 'nld': [],
                 'nzl': [1987, 1988, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 ,2007, 2008, 2018, 2019],
                 'swe': [1991, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2015, 2016, 2017, 2018]}

In [20]:
df['left_gov'] = 0

for corp in corp_list:
    df.loc[(df['corp'] == corp) & (df['Year'].isin(left_gov_dict[corp])), 'left_gov'] = 1

In [124]:
ue_df = pd.read_csv(r"/maps/hdir/gsw508/Thesis//Data/DP_LIVE_10052023094822208.csv")

df['unemployment'] = 0

for corp in corp_list:
    for year in list(range(1987, 2020)):
        if corp == 'eng':
            temp_value = ue_df.loc[(ue_df['TIME'] == year) & (ue_df['LOCATION'] == 'GBR'), 'Value']
        elif corp == 'czc':
            temp_value = ue_df.loc[(ue_df['TIME'] == year) & (ue_df['LOCATION'] == 'CZE'), 'Value']
        elif corp == 'den':
            temp_value = ue_df.loc[(ue_df['TIME'] == year) & (ue_df['LOCATION'] == 'DNK'), 'Value']
        else:
            temp_value = ue_df.loc[(ue_df['TIME'] == year) & (ue_df['LOCATION'] == corp.upper()), 'Value']
        if len(temp_value != 0):
            df.loc[(df['Year'] == year) & (df['corp'] == corp), 'unemployment'] = float(temp_value)

In [133]:
df.to_json('full_reg_df.json')

In [134]:
final_df = df[['corp', 'Year', 'left_gov', 
               'p0p10_abs', 'unemployment', 'p0p10', 'mid40', 'Inequality',
               'soc_polarisation', 'rel_polarisation', 'eth_polarisation', 
               'urb_polarisation', 'pos_polarisation', 'for_polarisation', 
               'soc_tot_dist_prop', 'rel_tot_dist_prop', 'eth_tot_dist_prop',
               'urb_tot_dist_prop', 'pos_tot_dist_prop', 'for_tot_dist_prop',
               'soc_left_dist_prop', 'rel_left_dist_prop','eth_left_dist_prop', 
               'urb_left_dist_prop', 'pos_left_dist_prop', 'for_left_dist_prop',
               'soc_nonleft_dist_prop', 'rel_nonleft_dist_prop','eth_nonleft_dist_prop', 
               'urb_nonleft_dist_prop', 'pos_nonleft_dist_prop', 'for_nonleft_dist_prop',]]

In [135]:
final_df = final_df.rename(columns = {'Inequality': 'p90p10'})
final_df = final_df.rename(columns = {'Year': 'year'})

In [136]:
final_df['p0p10_abs'] = final_df['p0p10_abs'] / 1000

In [137]:
final_df.to_json('final_df.json')