In [1]:
import pandas as pd
import os
import time
import numpy as np

In [2]:
os.chdir(r"C:\Users\Sidrcs\Downloads\SPSS_Processed\UKDA-5151-spss\spss\spss19")

In [19]:
df_residence = pd.read_csv(r"C:\Users\Sidrcs\Downloads\SPSS_Processed\UKDA-5151-spss\spss\spss19\bhps_w5\eindresp.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [24]:
# Loading variables of interest from 1991 BHPS dataset into dataframe
df_residence = df_residence[["eopsocc","eopsocd","eopsoce","efiyr","efetype","eage12","ejbstatt","eregion2", "eplbornc_cc"]]

In [26]:
# Dropping rows if they donot have any spatial information
df_residence = df_residence[~df_residence["eregion2"].isin(["don't know", "inapplicable", "missing or wild", "refused"])]

In [27]:
# To view columns
df_residence.head()

Unnamed: 0,eopsocc,eopsocd,eopsoce,efiyr,efetype,eage12,ejbstatt,eregion2,eplbornc_cc
0,proxy and/or phone,proxy and/or phone,proxy and/or phone,0.0,inapplicable,58.0,proxy and/or phone,London,proxy and/or phone
1,Agree,Disagree,Disagree,75532.59375,inapplicable,61.0,In paid employ,London,inapplicable
2,Agree,Agree,Disagree,514.256836,inapplicable,53.0,Self employed,London,inapplicable
3,Agree,"Not agree, disagree",Strongly disagree,0.0,inapplicable,16.0,"FT studt, school",London,australia
4,proxy and/or phone,proxy and/or phone,proxy and/or phone,0.0,inapplicable,50.0,proxy and/or phone,London,proxy and/or phone


In [28]:
# create an empty dataframe
output_df = pd.DataFrame()

# Group the dataframe by county and calculate standard deviation of age : "aage12"
output_df["std_dev_age"] = df_residence.groupby("eregion2")["eage12"].agg("std")

# Group the dataframe by county and number of people born in UK : "aplbornc_cc"
output_df["born"] = df_residence.groupby("eregion2")["eplbornc_cc"]

# Assuming inapplicable as people born in UK to all others
def native_share(x):
    pop_count = list(x["born"][1])
    native_count = pop_count.count("inapplicable")/len(pop_count)
    return native_count

# calculating fraction of natives in each county
output_df["native_share"] = output_df.apply(lambda x: native_share(x), axis = 1)

# Dropping "born" groupby column
output_df = output_df.drop(columns = ["born"], axis = 1)

# Creating afetype_dict based on PDF from 1995 BHPS data
efetype_dict = {"Nursing school etc" : 1, "College of f educ" : 2, "Other trng establmnt" : 3, "Polytechnic" : 4, \
                "University" : 5, "None of the above" : 7, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy and/or phone" : -7, "refused" : -2}


# Create a new column "afetype_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["efetype_encoded"] = df_residence["efetype"].replace(efetype_dict)

# Group the dataframe by county and "afetype_encoded"
output_df["education"] = df_residence.groupby("eregion2")["efetype_encoded"]

# function to calculate dissimilarity in education
def dissimilarity_education(x):
    # loads tuple element into job
    job = x["education"][1]
    # loads frequency of each education type in county as list
    freq = list(job.value_counts())
    # calculates total number of groups
    s = len(freq)
    dissim_index = 0
    for i in freq:
        # formula of dissimilarity index
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

def region_name(x):
    return x["education"][0]

output_df["education_variability"] = output_df.apply(lambda x: dissimilarity_education(x), axis = 1)
output_df["region_name"] = output_df.apply(lambda x: region_name(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["education"], axis = 1)

# Creating ajbstatt_dict based on PDF from 1991 BHPS data
ejbstatt_dict = {"Self employed" : 1, "In paid employ" : 2, "Unemployed" : 3, "Retired" : 4, "Maternity leave" : 5, \
                    "Family care" : 6,  "FT studt, school" : 7, " LT sick, disabld" : 8, \
                    "Govt trng scheme" : 9, "Something else" : 10, "don't know" : -1, "missing or wild" : -9, \
                    "inapplicable" : -8, "proxy" : -7, "refused" : -2}


# Create a new column "ajbstatt_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["ejbstatt_encoded"] = df_residence["ejbstatt"].replace(ejbstatt_dict)

# Group the dataframe by county and calculate the mean and standard deviation of the encoded values
output_df["job"] = df_residence.groupby("eregion2")["ejbstatt_encoded"]

# function to calculate dissimilarity in job
def dissimilarity_job(x):
    job = x["job"][1]
    freq = list(job.value_counts())
    s = len(freq)
    dissim_index = 0
    for i in freq:
        dissim_index += 0.5 * abs(i-(1/s))
    return dissim_index

# function to calculate fraction of employed people within each county
def frac_employed(x):
    job = list(x["job"][1])
    frac_emp = (job.count(2)/len(job))
    return frac_emp

output_df["job_variability"] = output_df.apply(lambda x: dissimilarity_job(x), axis = 1)
output_df ["frac_employed"] = output_df.apply(lambda x: frac_employed(x), axis = 1)

# Dropping "education" groupby column
output_df = output_df.drop(columns = ["job"], axis = 1)

# replacing all Not-A-Number cases to zero i.e., proxy values
df_residence["efiyr"] = df_residence["efiyr"].replace("proxy and/or phone",0)

# Group the dataframe by county and calculate median based on self-reported annual income: "afiyr"
output_df["median_income"] = df_residence.groupby("eregion2")["efiyr"].agg(["median"])

# convert "afiyr" column to float data type
df_residence["efiyr"] = df_residence["efiyr"].astype("float32")

# group incomes by county
output_df["income"] = df_residence.groupby("eregion2")["efiyr"]

def gini_index(x):
    data = list(x["income"][1])
    # sort the data in ascending order
    sorted_data = np.sort(data)
    # calculate the cumulative sum of the sorted data
    cumsum_data = np.cumsum(sorted_data)
    # calculate the Lorenz curve values
    lorenz_curve = cumsum_data / cumsum_data[-1]
    # calculate the area under the Lorenz curve
    area_lorenz_curve = np.trapz(lorenz_curve, dx=1/len(data))
    # calculate the Gini index
    gini_index = 1 - 2 * area_lorenz_curve
    return gini_index

output_df["gini_index"] = output_df.apply(lambda x : gini_index(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["income"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index
eregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
East Midlands,18.929772,0.946183,399.0,East Midlands,399.0,0.446809,6427.445312,0.485968
East of England,18.511457,0.953883,411.5,East of England,411.5,0.470874,7721.132812,0.497099
London,17.650852,0.938341,445.5,London,445.5,0.487668,8279.160156,0.520353
North East,18.64009,0.964623,211.5,North East,211.5,0.464623,6390.706543,0.455352
North West,18.636682,0.955357,559.5,North West,559.5,0.46875,6927.100586,0.496123
Scotland,18.132051,0.934602,420.0,Scotland,420.0,0.475624,6681.736328,0.497527
South East,18.151452,0.946148,668.0,South East,668.0,0.504114,7912.114746,0.494032
South West,19.02662,0.940828,422.0,South West,422.0,0.455621,6339.334961,0.494527
Wales,18.734239,0.947262,246.0,Wales,246.0,0.399594,6077.859863,0.458942
West Midlands,18.253225,0.96646,402.0,West Midlands,402.0,0.475776,6500.0,0.48451


In [29]:
# Define the dictionary to encode the "aopsocc" column which is Statement 1 (S1)
# S1: "Private enterprise is the best way to solve Britain’s economic problems."
eopsocc_dict = {"Strongly agree": 1,"Agree":2, "Not agree, disagree": 3 , "Disagree":4 , "Strongly disagree": 5,
                "don't know": -1 , "missing or wild": -9, "inapplicable":-8,
                "proxy and/or phone":-7, "refused":-2}

# Create a new column "aopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["eopsocc_encoded"] = df_residence["eopsocc"].replace(eopsocc_dict)

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S1"] = df_residence.groupby("eregion2")["eopsocc_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("eregion2")["eopsocc_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("eregion2")["eopsocc_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S1"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("eregion2")["eopsocc_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S1"] = output_df.apply(lambda x: duca_saving(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1
eregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
East Midlands,18.929772,0.946183,399.0,East Midlands,399.0,0.446809,6427.445312,0.485968,6.068943,3.198999,0.003276
East of England,18.511457,0.953883,411.5,East of England,411.5,0.470874,7721.132812,0.497099,5.260862,3.135922,0.001974
London,17.650852,0.938341,445.5,London,445.5,0.487668,8279.160156,0.520353,6.656564,3.339686,1.1e-05
North East,18.64009,0.964623,211.5,North East,211.5,0.464623,6390.706543,0.455352,5.040434,3.216981,6.4e-05
North West,18.636682,0.955357,559.5,North West,559.5,0.46875,6927.100586,0.496123,5.844236,3.283929,0.000887
Scotland,18.132051,0.934602,420.0,Scotland,420.0,0.475624,6681.736328,0.497527,7.773371,3.388823,0.000848
South East,18.151452,0.946148,668.0,South East,668.0,0.504114,7912.114746,0.494032,6.076755,3.204936,4.6e-05
South West,19.02662,0.940828,422.0,South West,422.0,0.455621,6339.334961,0.494527,6.860504,3.226036,0.000324
Wales,18.734239,0.947262,246.0,Wales,246.0,0.399594,6077.859863,0.458942,6.712932,3.36714,0.00064
West Midlands,18.253225,0.96646,402.0,West Midlands,402.0,0.475776,6500.0,0.48451,4.558054,3.070807,0.006533


In [30]:
# Define the dictionary to encode the "aopsocd" column which is Statement 2 (S2)
# S2: "Major public services and industries ought to be in state ownership."

eopsocd_dict = {"Strongly agree": 1,"Agree":2, "Not agree, disagree": 3 , "Disagree":4 , "Strongly disagree": 5,
                "don't know": -1 , "missing or wild": -9, "inapplicable":-8, "proxy and/or phone":-7, "refused":-2}

# Create a new column "aopsocc_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["eopsocd_encoded"] = df_residence["eopsocd"].replace(eopsocd_dict)

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S2"] = df_residence.groupby("eregion2")["eopsocd_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("eregion2")["eopsocd_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("eregion2")["eopsocd_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S2"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("eregion2")["eopsocd_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S2"] = output_df.apply(lambda x: duca_saving(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2
eregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
East Midlands,18.929772,0.946183,399.0,East Midlands,399.0,0.446809,6427.445312,0.485968,6.068943,3.198999,0.003276,5.688696,2.892365,0.0018
East of England,18.511457,0.953883,411.5,East of England,411.5,0.470874,7721.132812,0.497099,5.260862,3.135922,0.001974,5.220032,2.96966,0.001221
London,17.650852,0.938341,445.5,London,445.5,0.487668,8279.160156,0.520353,6.656564,3.339686,1.1e-05,5.990008,2.882287,0.002245
North East,18.64009,0.964623,211.5,North East,211.5,0.464623,6390.706543,0.455352,5.040434,3.216981,6.4e-05,4.642441,2.875,6.4e-05
North West,18.636682,0.955357,559.5,North West,559.5,0.46875,6927.100586,0.496123,5.844236,3.283929,0.000887,5.41472,2.801786,3.6e-05
Scotland,18.132051,0.934602,420.0,Scotland,420.0,0.475624,6681.736328,0.497527,7.773371,3.388823,0.000848,6.928948,2.825208,6.3e-05
South East,18.151452,0.946148,668.0,South East,668.0,0.504114,7912.114746,0.494032,6.076755,3.204936,4.6e-05,5.685174,2.962603,2.1e-05
South West,19.02662,0.940828,422.0,South West,422.0,0.455621,6339.334961,0.494527,6.860504,3.226036,0.000324,6.503878,2.953846,0.000297
Wales,18.734239,0.947262,246.0,Wales,246.0,0.399594,6077.859863,0.458942,6.712932,3.36714,0.00064,6.158124,2.855984,4.6e-05
West Midlands,18.253225,0.96646,402.0,West Midlands,402.0,0.475776,6500.0,0.48451,4.558054,3.070807,0.006533,4.226745,2.772671,0.000659


In [31]:
# Define the dictionary to encode the "aopsocd" column which is Statement 3 (S3)
# S3: "It is the government’s responsibility to provide a job for everyone who wants one."

eopsoce_dict = {"Strongly agree": 1,"Agree":2, "Not agree, disagree": 3 , "Disagree":4 , "Strongly disagree": 5,
                "don't know": -1 , "missing or wild": -9, "inapplicable":-8, "proxy and/or phone":-7, "refused":-2}

# Create a new column "aopsocd_encoded" in the dataframe by replacing the values with encoded values from the dictionary
df_residence["eopsoce_encoded"] = df_residence["eopsoce"].replace(eopsoce_dict)

# Political polarization measurement by Lindqvist and Ostling (2010)
output_df["Lindqvist_Ostling_S3"] = df_residence.groupby("eregion2")["eopsoce_encoded"].agg(["var"])

# Create new columns based on absolute sum of encoded values of opinions and counts
output_df["abs_sum"] = df_residence.groupby("eregion2")["eopsoce_encoded"].apply(lambda x: x.abs().sum())
output_df["counts"] = df_residence.groupby("eregion2")["eopsoce_encoded"].apply(lambda x: x.count())

# Political polarization measurement by Abramowitz and Saunders (2008)
output_df["Abramowitz_Saunders_S3"] = output_df.apply(lambda x: x["abs_sum"]/x["counts"], axis = 1)

output_df = output_df.drop(columns = ["abs_sum", "counts"], axis = 1)

# Group the dataframe by county and aopsocc_encoded values
output_df["dkp"] = df_residence.groupby("eregion2")["eopsoce_encoded"]

# Political polarization measurement by Duca and Saving (2016)
def duca_saving(x):
    county_tuple = x["dkp"]
    freq = list(county_tuple[1].value_counts())
    val = list(county_tuple[1].unique())
    norm_freq = [i/sum(freq) for i in freq]
    outer_sum = 0
    for i in range(len(norm_freq)):
        nkit = norm_freq[i]
        inner_sum = 0
        for j in range(len(norm_freq)):
            npit = norm_freq[j]
            dkp = abs(val[i] - val[j])
            inner_sum += npit*dkp
    outer_sum += (nkit**2)*inner_sum
    
    return outer_sum

output_df["Duca_Saving_S3"] = output_df.apply(lambda x: duca_saving(x), axis = 1)

# Drop "income" groupby column
output_df = output_df.drop(columns = ["dkp"], axis = 1)

output_df

Unnamed: 0_level_0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
eregion2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
East Midlands,18.929772,0.946183,399.0,East Midlands,399.0,0.446809,6427.445312,0.485968,6.068943,3.198999,0.003276,5.688696,2.892365,0.0018,5.60375,2.986233,0.00021
East of England,18.511457,0.953883,411.5,East of England,411.5,0.470874,7721.132812,0.497099,5.260862,3.135922,0.001974,5.220032,2.96966,0.001221,5.111845,3.072816,0.000129
London,17.650852,0.938341,445.5,London,445.5,0.487668,8279.160156,0.520353,6.656564,3.339686,1.1e-05,5.990008,2.882287,0.002245,6.093646,3.086323,0.000412
North East,18.64009,0.964623,211.5,North East,211.5,0.464623,6390.706543,0.455352,5.040434,3.216981,6.4e-05,4.642441,2.875,6.4e-05,4.252905,2.771226,6.2e-05
North West,18.636682,0.955357,559.5,North West,559.5,0.46875,6927.100586,0.496123,5.844236,3.283929,0.000887,5.41472,2.801786,3.6e-05,4.888318,2.836607,0.000392
Scotland,18.132051,0.934602,420.0,Scotland,420.0,0.475624,6681.736328,0.497527,7.773371,3.388823,0.000848,6.928948,2.825208,6.3e-05,6.442399,2.858502,0.000861
South East,18.151452,0.946148,668.0,South East,668.0,0.504114,7912.114746,0.494032,6.076755,3.204936,4.6e-05,5.685174,2.962603,2.1e-05,5.608987,3.103216,4e-06
South West,19.02662,0.940828,422.0,South West,422.0,0.455621,6339.334961,0.494527,6.860504,3.226036,0.000324,6.503878,2.953846,0.000297,6.449138,3.080473,0.000301
Wales,18.734239,0.947262,246.0,Wales,246.0,0.399594,6077.859863,0.458942,6.712932,3.36714,0.00064,6.158124,2.855984,4.6e-05,5.42053,2.720081,0.000574
West Midlands,18.253225,0.96646,402.0,West Midlands,402.0,0.475776,6500.0,0.48451,4.558054,3.070807,0.006533,4.226745,2.772671,0.000659,4.119119,2.921739,1.8e-05


In [32]:
output_df.reset_index(drop=True, inplace=True)
output_df

Unnamed: 0,std_dev_age,native_share,education_variability,region_name,job_variability,frac_employed,median_income,gini_index,Lindqvist_Ostling_S1,Abramowitz_Saunders_S1,Duca_Saving_S1,Lindqvist_Ostling_S2,Abramowitz_Saunders_S2,Duca_Saving_S2,Lindqvist_Ostling_S3,Abramowitz_Saunders_S3,Duca_Saving_S3
0,18.929772,0.946183,399.0,East Midlands,399.0,0.446809,6427.445312,0.485968,6.068943,3.198999,0.003276,5.688696,2.892365,0.0018,5.60375,2.986233,0.00021
1,18.511457,0.953883,411.5,East of England,411.5,0.470874,7721.132812,0.497099,5.260862,3.135922,0.001974,5.220032,2.96966,0.001221,5.111845,3.072816,0.000129
2,17.650852,0.938341,445.5,London,445.5,0.487668,8279.160156,0.520353,6.656564,3.339686,1.1e-05,5.990008,2.882287,0.002245,6.093646,3.086323,0.000412
3,18.64009,0.964623,211.5,North East,211.5,0.464623,6390.706543,0.455352,5.040434,3.216981,6.4e-05,4.642441,2.875,6.4e-05,4.252905,2.771226,6.2e-05
4,18.636682,0.955357,559.5,North West,559.5,0.46875,6927.100586,0.496123,5.844236,3.283929,0.000887,5.41472,2.801786,3.6e-05,4.888318,2.836607,0.000392
5,18.132051,0.934602,420.0,Scotland,420.0,0.475624,6681.736328,0.497527,7.773371,3.388823,0.000848,6.928948,2.825208,6.3e-05,6.442399,2.858502,0.000861
6,18.151452,0.946148,668.0,South East,668.0,0.504114,7912.114746,0.494032,6.076755,3.204936,4.6e-05,5.685174,2.962603,2.1e-05,5.608987,3.103216,4e-06
7,19.02662,0.940828,422.0,South West,422.0,0.455621,6339.334961,0.494527,6.860504,3.226036,0.000324,6.503878,2.953846,0.000297,6.449138,3.080473,0.000301
8,18.734239,0.947262,246.0,Wales,246.0,0.399594,6077.859863,0.458942,6.712932,3.36714,0.00064,6.158124,2.855984,4.6e-05,5.42053,2.720081,0.000574
9,18.253225,0.96646,402.0,West Midlands,402.0,0.475776,6500.0,0.48451,4.558054,3.070807,0.006533,4.226745,2.772671,0.000659,4.119119,2.921739,1.8e-05


In [18]:
output_df.to_csv(r"C:\Users\Sidrcs\Documents\Github\Geog_575_Final_Project\data\polarization1995_data.csv")