In [72]:
import pandas as pd
import os
import numpy as np
import sklearn
import dataFuncs
import matplotlib.pyplot as plt
import json 

#Initialise repositories
_projroot = os.path.abspath('.')
_datadir = os.path.join(_projroot,'data')
_preprocesseddir = os.path.join(_datadir,'preprocesseddata')
_rawdir = os.path.join(_datadir,'rawdata')
_geodir = os.path.join(_datadir,'geodata')

In [73]:
#Create datasets
from sklearn.model_selection import train_test_split

WCC_Survey = pd.read_csv(os.path.join(_preprocesseddir,'WCC_mobile.csv'))
WCC_Survey = WCC_Survey.dropna()
WCC_Survey = WCC_Survey.drop('Unnamed: 0', axis = 1)
WCC_Survey = WCC_Survey.drop('Q26j', axis = 1)



WCC_Survey_Features = WCC_Survey.drop('Target', axis = 1)

In [74]:
#Load json dictionary

with open(os.path.join(_preprocesseddir,'WCC_mobile.json')) as f:
    ref = json.load(f)
    
ref['OutputArea'] = {v: k for k, v in ref['OutputArea'].items()}

In [75]:
#Convert postcode values in WCC_Survey to postcode using dict

OA = []

for item in WCC_Survey['OutputArea']:
    OA.append(ref['OutputArea'][(item)])

WCC_Survey['OutputArea'] = OA
print(WCC_Survey['OutputArea'])
#Separate digitally excluded and non-excluded
WCC_Survey_1 = WCC_Survey[WCC_Survey['Target'] == 1]
WCC_Survey_0 = WCC_Survey[WCC_Survey['Target'] == 0]

print(WCC_Survey_1['OutputArea'])

0       E00024058
1       E00024058
2       E00024058
3       E00024058
4       E00024058
          ...    
1033          NaN
1034    E00023699
1035    E00023956
1036    E00023956
1037          NaN
Name: OutputArea, Length: 1004, dtype: object
4       E00024058
5       E00024058
42      E00023672
59      E00175265
61      E00175272
          ...    
995     E00024134
1001    E00175259
1002    E00175259
1005    E00175259
1020    E00024137
Name: OutputArea, Length: 139, dtype: object


In [55]:
#Load output aread data and creat dictionary for postcode, only for ofcom data

#OA = pd.read_csv(os.path.join(_geodir,'postcode_OA.csv'), index_col = 'pcd')
#OA = OA.drop('Unnamed: 0', axis = 1)
#OA = OA.to_dict()
#print(OA)

In [56]:
#Create new dataset with OAs
#Output_Area = []

#for item in WCC_Survey['Postcode_clean']:
 #   Output_Area.append(OA[item])

#WCC_Survey['Output Area'] = Output_Area
#print(WCC_Survey['Outpus Area'])


In [87]:
# Generate scatter plot of independent vs Dependent variable
fig = plt.figure(figsize = (18, 18))
from collections import Counter

exclusion = Counter(WCC_Survey_1["OutputArea"])
no_exclusion = Counter(WCC_Survey_0["OutputArea"])

#Create dataset of perc, digitally ecluded in each OA
ex = pd.DataFrame(exclusion.items(), columns = ['OA',"Excluded"])
no = pd.DataFrame(no_exclusion.items(), columns = ['OA',"Not Excluded"])

print(no.head(),ex)


          OA  Not Excluded
0  E00024058             6
1  E00023634             6
2  E00023631             6
3  E00023629             1
4  E00023628             2            OA  Excluded
0   E00024058         2
1   E00023672         1
2   E00175265         1
3   E00175272         1
4   E00023459         1
..        ...       ...
76  E00024077         1
77  E00023991         1
78  E00024134         1
79  E00175259         3
80  E00024137         1

[81 rows x 2 columns]


<Figure size 1296x1296 with 0 Axes>

In [88]:
tot = pd.merge(ex, no,how = 'outer',on = "OA").fillna(0)
tot['% excluded'] = (tot['Excluded']/(tot['Excluded']+tot['Not Excluded']))*100
print(tot)

            OA  Excluded  Not Excluded  % excluded
0    E00024058       2.0           6.0   25.000000
1    E00023672       1.0           7.0   12.500000
2    E00175265       1.0           3.0   25.000000
3    E00175272       1.0           3.0   25.000000
4    E00023459       1.0           5.0   16.666667
..         ...       ...           ...         ...
195  E00023782       0.0           8.0    0.000000
196  E00023825       0.0           6.0    0.000000
197  E00023734       0.0           8.0    0.000000
198  E00023676       0.0           5.0    0.000000
199  E00023675       0.0           2.0    0.000000

[200 rows x 4 columns]


In [None]:
#Try to load map

with open(os.path.join(_geodir,'Westminster.geojson')) as f:
    OAs = json.load(f)

#With Plotly
import plotly.express as px
from geojson_rewind import rewind

#Make the rings clockwwise (to make it compatible with plotly)    
counties_corrected=rewind(counties,rfc7946=False)

fig = px.choropleth(data, geojson=counties_corrected, locations='nuts218cd', featureidkey="properties.nuts218cd", color='value',
                            color_continuous_scale="PurPor", labels={'label name':'label name'}, title='MAP TITLE',
                            scope="europe")

fig.update_geos(fitbounds="locations", visible=False)    