In [3]:
from warnings import filterwarnings

filterwarnings('ignore')

In [4]:
import pandas as pd
import numpy as np
from more_itertools import flatten

In [5]:
pd.set_option('display.max_columns', None)

In [6]:
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [7]:
wrp_df = pd.read_csv('data/WRP_national.csv', encoding = 'utf8')

wrp_df.rename({'judgen': 'total_jewish', 'chrstprot': 'total_protestant',
        'chrstcat': 'total_catholic', 'chrstgen': 'total_christian',
        'pop': 'total_population', 'nonrelig': 'total_non_religious',
        'budgen': 'total_buddhist', 'hindgen': 'total_hindu',
        'islmgen': 'total_islam', 'islmsun': 'total_sunni',
        'islmshi': 'total_shia', 'name': 'state_name_1',
        'state': 'c_code_1', 'shntgen': 'total_shinto',
        'sikhgen': 'total_sikh'}, axis = 1, inplace = True)

wrp_df.drop(['jdcons', 'judconspct', 'judgenpct', 'judorth',
      'judorthpct', 'judothr', 'judothrpct', 'judref',
      'judrefpct', 'chrstang', 'chrstangpct',
       'chrstcatpct', 'chrstgenpct', 'chrstorth',
       'chrstorthpct', 'chrstothr', 'chrstothrpct',
       'chrstprotpct', 'confgen', 'confgenpct',
       'Version', 'zorogen', 'zorogenpct', 'anmgen',
       'anmgenpct', 'bahgen', 'bahgenpct', 'budgenpct',
       'budmah', 'budmahpct', 'budothr', 'budothrpct',
       'budthr', 'budthrpct', 'datatype', 'dualrelig',
       'hindgenpct', 'islmahm', 'islmahmpct', 'islmalw',
       'islmalwpct', 'islmgenpct', 'islmibd', 'islmibdpct',
       'islmnat', 'islmnatpct', 'islmothr', 'islmothrpct',
       'islmshipct', 'islmsunpct', 'jaingen', 'jaingenpct',
       'nonreligpct', 'othrgen', 'othrgenpct', 'recreliab',
       'reliabilevel', 'shntgenpct', 'sikhgenpct',
       'sourcecode', 'sourcereliab', 'sumrelig', 'sumreligpct',
       'syncgen', 'syncgenpct', 'taogen', 'taogenpct',
       'total', 'total_population'], axis = 1, inplace = True)

In [8]:
wrp_df.to_pickle('pickle/wrp_df.pkl')
wrp_df.to_csv('dataframe_exports/wrp_df.csv')

In [9]:
wrp_df.head()

Unnamed: 0,year,c_code_1,state_name_1,total_protestant,total_catholic,total_christian,total_jewish,total_sunni,total_shia,total_islam,total_buddhist,total_hindu,total_sikh,total_shinto,total_non_religious
0,1945,2,USA,66069671,38716742,110265118,4641182,0,0,0,1601218,0,0,0,22874544
1,1950,2,USA,73090083,42635882,122994019,6090837,0,0,0,0,0,0,0,22568130
2,1955,2,USA,79294628,46402368,134001770,5333332,0,0,0,90173,0,0,0,23303540
3,1960,2,USA,90692928,50587880,150234347,5500000,0,0,0,2012131,0,0,0,21548225
4,1965,2,USA,94165803,64761783,167515758,5600000,0,0,0,1080892,0,0,0,19852362


In [10]:
c_code_df = pd.read_csv('data/COW country codes.csv', encoding = 'utf8')

c_code_df.rename({'CCode': 'c_code', 'StateNme': 'state_name'}, axis = 1, inplace = True)

c_code_df.drop(['StateAbb'], axis = 1, inplace = True)

In [11]:
c_code_df.to_pickle('pickle/c_code_df.pkl')

In [12]:
c_code_dic = {}

for i, code in enumerate(c_code_df['c_code']):
    c_code_dic[code] = c_code_df['state_name'][i]

In [13]:
row_list = []
years = list(np.linspace(1945, 2010, 14))
for c_code in wrp_df['c_code_1'].unique():
    for year in years:
        try:
            assert len(list(flatten(wrp_df.loc[(wrp_df['year'] == year) & (wrp_df['c_code_1'] == c_code)].values))) > 0
            row_list.append(list(flatten(wrp_df.loc[(wrp_df['year'] == year) & (wrp_df['c_code_1'] == c_code)].values)))
        except:
            row_list.append([year, c_code, c_code_dic[c_code], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [14]:
wrp_df = pd.DataFrame(row_list)

wrp_df.columns = ['year', 'c_code_1', 'state_name_1', 'total_protestant',
         'total_catholic', 'total_christian', 'total_jewish',
         'total_sunni', 'total_shia', 'total_islam',
         'total_buddhist', 'total_hindu', 'total_sikh',
         'total_shinto', 'total_non_religious']

In [15]:
print('total possible 0:\t', len(wrp_df))
print('total_protestant == 0:\t', len(wrp_df[wrp_df['total_protestant'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_protestant'] == 0])/len(wrp_df))).values[0].round(3))
print('total_catholic == 0:\t', len(wrp_df[wrp_df['total_catholic'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_catholic'] == 0])/len(wrp_df))).values[0].round(3))
print('total_christian == 0:\t', len(wrp_df[wrp_df['total_christian'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_christian'] == 0])/len(wrp_df))).values[0].round(3))
print('total_jewish == 0:\t', len(wrp_df[wrp_df['total_jewish'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_jewish'] == 0])/len(wrp_df))).values[0].round(3))
print('total_sunni == 0:\t', len(wrp_df[wrp_df['total_sunni'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_sunni'] == 0])/len(wrp_df))).values[0].round(3))
print('total_shia == 0:\t', len(wrp_df[wrp_df['total_shia'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_shia'] == 0])/len(wrp_df))).values[0].round(3))
print('total_islam == 0:\t', len(wrp_df[wrp_df['total_islam'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_islam'] == 0])/len(wrp_df))).values[0].round(3))
print('total_buddhist == 0:\t', len(wrp_df[wrp_df['total_buddhist'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_buddhist'] == 0])/len(wrp_df))).values[0].round(3))
print('total_hindu == 0:\t', len(wrp_df[wrp_df['total_hindu'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_hindu'] == 0])/len(wrp_df))).values[0].round(3))
print('total_sikh == 0:\t', len(wrp_df[wrp_df['total_sikh'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_sikh'] == 0])/len(wrp_df))).values[0].round(3))
print('total_shinto == 0:\t', len(wrp_df[wrp_df['total_shinto'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_shinto'] == 0])/len(wrp_df))).values[0].round(3))
print('total_non_religious == 0:\t', len(wrp_df[wrp_df['total_non_religious'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_non_religious'] == 0])/len(wrp_df))).values[0].round(3))

total possible 0:	 2800
total_protestant == 0:	 1032 	 0.369
total_catholic == 0:	 904 	 0.323
total_christian == 0:	 838 	 0.299
total_jewish == 0:	 1683 	 0.601
total_sunni == 0:	 2226 	 0.795
total_shia == 0:	 2444 	 0.873
total_islam == 0:	 1134 	 0.405
total_buddhist == 0:	 1846 	 0.659
total_hindu == 0:	 2030 	 0.725
total_sikh == 0:	 2464 	 0.88
total_shinto == 0:	 2746 	 0.981
total_non_religious == 0:	 1158 	 0.414


In [16]:
wrp_df[wrp_df['total_non_religious'] == 0].head()

Unnamed: 0,year,c_code_1,state_name_1,total_protestant,total_catholic,total_christian,total_jewish,total_sunni,total_shia,total_islam,total_buddhist,total_hindu,total_sikh,total_shinto,total_non_religious
14,1945.0,20,CAN,4002534,5038280,10531961,176690,0,0,0,115066,0,5000,0,0
17,1960.0,20,CAN,4635491,8495062,16170219,254366,0,0,0,781211,0,0,0,0
28,1945.0,31,Bahamas,0,0,0,0,0,0,0,0,0,0,0,0
29,1950.0,31,Bahamas,0,0,0,0,0,0,0,0,0,0,0,0
30,1955.0,31,Bahamas,0,0,0,0,0,0,0,0,0,0,0,0


In [17]:
row_list = []
years = list(np.linspace(1945, 2010, 66).astype(int))
for c_code in wrp_df['c_code_1'].unique():
    for year in years:
        try:
            assert len(list(flatten(wrp_df.loc[(wrp_df['year'] == year) & (wrp_df['c_code_1'] == c_code)].values))) > 0
            row_list.append(list(flatten(wrp_df.loc[(wrp_df['year'] == year) & (wrp_df['c_code_1'] == c_code)].values)))
        except:
            slope_coef = int(str(year)[-1])
            if slope_coef > 5:
                slope_coef_adj = slope_coef - 5
            else:
                slope_coef_adj = slope_coef
            total_protestant_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_protestant'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_protestant'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_protestant'].values)
            total_catholic_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_catholic'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_catholic'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_catholic'].values)
            total_christian_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_christian'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_christian'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_christian'].values)
            total_jewish_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_jewish'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_jewish'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_jewish'].values)
            total_sunni_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_sunni'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_sunni'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_sunni'].values)
            total_shia_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_shia'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_shia'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_shia'].values)
            total_islam_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_islam'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_islam'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_islam'].values)
            total_buddhist_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_buddhist'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_buddhist'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_buddhist'].values)
            total_hindu_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_hindu'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_hindu'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_hindu'].values)
            total_sikh_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_sikh'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_sikh'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_sikh'].values)
            total_shinto_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_shinto'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_shinto'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_shinto'].values)
            total_non_religious_est = float((slope_coef_adj * ((wrp_df.loc[(wrp_df['year'] == year + (5 - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_non_religious'].values - wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_non_religious'].values)/5)) + wrp_df.loc[(wrp_df['year'] == (year - slope_coef_adj)) & (wrp_df['c_code_1'] == c_code)]['total_non_religious'].values)
            row_list.append([year, c_code, c_code_dic[c_code], total_protestant_est, total_catholic_est, total_christian_est, total_jewish_est, total_sunni_est, total_shia_est, total_islam_est, total_buddhist_est, total_hindu_est, total_sikh_est, total_shinto_est, total_non_religious_est])

In [18]:
wrp_df = pd.DataFrame(row_list)

wrp_df.columns = ['year', 'c_code_1', 'state_name_1', 'total_protestant',
                  'total_catholic', 'total_christian', 'total_jewish',
                  'total_sunni', 'total_shia', 'total_islam',
                  'total_buddhist', 'total_hindu', 'total_sikh',
                  'total_shinto', 'total_non_religious']

In [19]:
wrp_df.head()

Unnamed: 0,year,c_code_1,state_name_1,total_protestant,total_catholic,total_christian,total_jewish,total_sunni,total_shia,total_islam,total_buddhist,total_hindu,total_sikh,total_shinto,total_non_religious
0,1945.0,2,USA,66069671.0,38716742.0,110265118.0,4641182.0,0.0,0.0,0.0,1601218.0,0.0,0.0,0.0,22874544.0
1,1946.0,2,United States of America,67473753.4,39500570.0,112810898.2,4931113.0,0.0,0.0,0.0,1280974.4,0.0,0.0,0.0,22813261.2
2,1947.0,2,United States of America,68877835.8,40284398.0,115356678.4,5221044.0,0.0,0.0,0.0,960730.8,0.0,0.0,0.0,22751978.4
3,1948.0,2,United States of America,70281918.2,41068226.0,117902458.6,5510975.0,0.0,0.0,0.0,640487.2,0.0,0.0,0.0,22690695.6
4,1949.0,2,United States of America,71686000.6,41852054.0,120448238.8,5800906.0,0.0,0.0,0.0,320243.6,0.0,0.0,0.0,22629412.8


In [20]:
len(wrp_df)

13200

In [21]:
wrp_df.loc[wrp_df['total_protestant'] == 0, 'total_protestant'] = None
wrp_df.loc[wrp_df['total_catholic'] == 0, 'total_catholic'] = None
wrp_df.loc[wrp_df['total_christian'] == 0, 'total_christian'] = None
wrp_df.loc[wrp_df['total_jewish'] == 0, 'total_jewish'] = None
wrp_df.loc[wrp_df['total_sunni'] == 0, 'total_sunni'] = None
wrp_df.loc[wrp_df['total_shia'] == 0, 'total_shia'] = None
wrp_df.loc[wrp_df['total_islam'] == 0, 'total_islam'] = None
wrp_df.loc[wrp_df['total_buddhist'] == 0, 'total_buddhist'] = None
wrp_df.loc[wrp_df['total_hindu'] == 0, 'total_hindu'] = None
wrp_df.loc[wrp_df['total_sikh'] == 0, 'total_sikh'] = None
wrp_df.loc[wrp_df['total_shinto'] == 0, 'total_shinto'] = None
wrp_df.loc[wrp_df['total_non_religious'] == 0, 'total_non_religious'] = None

In [23]:
column_list = ['total_protestant', 'total_catholic', 'total_christian',
               'total_jewish', 'total_sunni', 'total_shia',
               'total_islam', 'total_buddhist', 'total_hindu',
               'total_sikh','total_shinto', 'total_non_religious']

for c_code in wrp_df['c_code_1'].unique():
    for column in column_list:
        data_list = list(wrp_df[wrp_df['c_code_1'] == c_code][column].values)
        if str(data_list).count('nan') == len(data_list):
            wrp_df.loc[wrp_df['c_code_1'] == c_code, column] = 0
        elif str(data_list).count('nan') <= len(data_list) - 2:
            wrp_df.loc[wrp_df['c_code_1'] == c_code, column] = wrp_df[wrp_df['c_code_1'] == c_code][column].interpolate()
        data_list = list(wrp_df[wrp_df['c_code_1'] == c_code][column].values)
        if str(data_list).count('nan') <= len(data_list) - 1:
            wrp_df.loc[wrp_df['c_code_1'] == c_code, column] = wrp_df[wrp_df['c_code_1'] == c_code][column].bfill()            
            wrp_df.loc[wrp_df['c_code_1'] == c_code, column] = wrp_df[wrp_df['c_code_1'] == c_code][column].ffill()            

In [24]:
print('total possible 0:\t', len(wrp_df))
print('total_protestant == 0:\t', len(wrp_df[wrp_df['total_protestant'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_protestant'] == 0])/len(wrp_df))).values[0].round(3))
print('total_catholic == 0:\t', len(wrp_df[wrp_df['total_catholic'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_catholic'] == 0])/len(wrp_df))).values[0].round(3))
print('total_christian == 0:\t', len(wrp_df[wrp_df['total_christian'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_christian'] == 0])/len(wrp_df))).values[0].round(3))
print('total_jewish == 0:\t', len(wrp_df[wrp_df['total_jewish'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_jewish'] == 0])/len(wrp_df))).values[0].round(3))
print('total_sunni == 0:\t', len(wrp_df[wrp_df['total_sunni'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_sunni'] == 0])/len(wrp_df))).values[0].round(3))
print('total_shia == 0:\t', len(wrp_df[wrp_df['total_shia'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_shia'] == 0])/len(wrp_df))).values[0].round(3))
print('total_islam == 0:\t', len(wrp_df[wrp_df['total_islam'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_islam'] == 0])/len(wrp_df))).values[0].round(3))
print('total_buddhist == 0:\t', len(wrp_df[wrp_df['total_buddhist'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_buddhist'] == 0])/len(wrp_df))).values[0].round(3))
print('total_hindu == 0:\t', len(wrp_df[wrp_df['total_hindu'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_hindu'] == 0])/len(wrp_df))).values[0].round(3))
print('total_sikh == 0:\t', len(wrp_df[wrp_df['total_sikh'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_sikh'] == 0])/len(wrp_df))).values[0].round(3))
print('total_shinto == 0:\t', len(wrp_df[wrp_df['total_shinto'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_shinto'] == 0])/len(wrp_df))).values[0].round(3))
print('total_non_religious == 0:\t', len(wrp_df[wrp_df['total_non_religious'] == 0]), '\t', pd.Series((len(wrp_df[wrp_df['total_non_religious'] == 0])/len(wrp_df))).values[0].round(3))

total possible 0:	 13200
total_protestant == 0:	 528 	 0.04
total_catholic == 0:	 264 	 0.02
total_christian == 0:	 132 	 0.01
total_jewish == 0:	 4752 	 0.36
total_sunni == 0:	 5742 	 0.435
total_shia == 0:	 7788 	 0.59
total_islam == 0:	 660 	 0.05
total_buddhist == 0:	 4620 	 0.35
total_hindu == 0:	 6204 	 0.47
total_sikh == 0:	 9966 	 0.755
total_shinto == 0:	 12672 	 0.96
total_non_religious == 0:	 264 	 0.02


In [25]:
religion_list = ['Christianity', 'Judiasm', 'Islam', 'Buddhism', 'Hindu', 'Sikhism', 'Shinto', 'Non-Religious']

for i, row in enumerate(wrp_df['year']):
    row_df = pd.DataFrame([wrp_df.loc[i][5], wrp_df.loc[i][6], wrp_df.loc[i][9], wrp_df.loc[i][10], wrp_df.loc[i][11], wrp_df.loc[i][12], wrp_df.loc[i][13], wrp_df.loc[i][14]])
    if row_df.sum()[0] != 0:
        max_index = row_df.loc[row_df[0] == row_df[0].max()].index[0]
        wrp_df.loc[i, 'majority_religion'] = religion_list[max_index]
    else:
        wrp_df.loc[i, 'majority_religion'] = 0

In [26]:
wrp_df['majority_religion'].value_counts()

Christianity     8431
Islam            3395
Buddhism          610
Non-Religious     434
Hindu             198
Shinto             66
Judiasm            66
Name: majority_religion, dtype: int64

In [27]:
wrp_df.drop(['total_protestant', 'total_catholic', 'total_christian',
             'total_jewish', 'total_sunni', 'total_shia',
             'total_islam', 'total_buddhist', 'total_hindu',
             'total_sikh', 'total_shinto', 'total_non_religious'],
            axis = 1, inplace = True)

In [28]:
for i, majority_religion in enumerate(wrp_df['majority_religion']):
    if majority_religion == 0:
        wrp_df.loc[i, 'majority_religion'] = str(wrp_df[(wrp_df['c_code_1'] == wrp_df.loc[i, 'c_code_1']) & (wrp_df['majority_religion'] != 0)]['majority_religion'].value_counts().nlargest(1)).split()[0]

In [29]:
wrp_df['majority_religion'].value_counts()

Christianity     8431
Islam            3395
Buddhism          610
Non-Religious     434
Hindu             198
Shinto             66
Judiasm            66
Name: majority_religion, dtype: int64

In [30]:
wrp_df.to_pickle('pickle/wrp_df_imputed.pkl')
wrp_df.to_csv('dataframe_exports/wrp_df_imputed.csv')

In [31]:
wrp_df = pd.read_pickle('pickle/wrp_df_imputed.pkl')