In [1]:
import pandas as pd
import numpy as np

In [2]:
## import custom functions

from pathlib import Path
import sys

# Create path to custom functions
funcs_path = Path.cwd().parents[1]
sys.path.insert(1, str(funcs_path))

from funcs import paths, preprocessing

In [3]:
path_to_data = paths.get_path_to_data('Census 2011 postcode-small area.xlsx')

path_to_data

'C:\\Users\\RowanM\\Documents\\dublin_energy_masterplan\\Residential\\data\\inputs\\raw\\Census 2011 postcode-small area.xlsx'

In [4]:
FCC_and_DLR = pd.read_excel(
    path_to_data,
    sheet_name="FCC DLRCC",
    skiprows=1
)

FCC_and_DLR.head()

Unnamed: 0,D14,D16,D18,D13,D15,Co.Dublin,Co.Dublin.1
0,267044003.0,267010004,267126003,267130001,267028004/02,267081008,267102003
1,267042001.0,267010005,267126002,267130003,267028004/01,267159002,267102004
2,267044001.0,267010007,267126004,267130002,267028094,267081007,267102002
3,267044004.0,267010002,267038001,267130004,267028079,267159001,267102001
4,267044002.0,267010001,267126005/267126010,267004014,267028117,267092003,267158001


Convert to correct format by:
1. Pull out each column into a temporary df
2. create a postcode column of equal length, index SAs by Postcode 
3. append to global df


In [5]:
## Use function preprocessing.match_to_full_Postcode_name 
# to convert integer Postcodes etc. into correct format

postcodes_to_SAs_2011 = pd.DataFrame()

for col in FCC_and_DLR.columns:
    
    temp_df = pd.DataFrame(FCC_and_DLR[col].dropna())

    temp_df.rename(columns={col:"Small Area"},inplace=True)
    temp_df["Postcodes"] = preprocessing.match_to_full_Postcode_name(col)
    temp_df.set_index("Postcodes",inplace=True)
    
    postcodes_to_SAs_2011 = postcodes_to_SAs_2011.append(temp_df)

print("Head of postcodes_to_SAs_2011 with FCC_and_DLR")
display(postcodes_to_SAs_2011.head())

print("Tail of postcodes_to_SAs_2011 with FCC_and_DLR")
display(postcodes_to_SAs_2011.tail())

Head of postcodes_to_SAs_2011 with FCC_and_DLR


Unnamed: 0_level_0,Small Area
Postcodes,Unnamed: 1_level_1
Dublin 14,267044000.0
Dublin 14,267042000.0
Dublin 14,267044000.0
Dublin 14,267044000.0
Dublin 14,267044000.0


Tail of postcodes_to_SAs_2011 with FCC_and_DLR


Unnamed: 0_level_0,Small Area
Postcodes,Unnamed: 1_level_1
Co. Dublin,267118010
Co. Dublin,267065028
Co. Dublin,267065029
Co. Dublin,267065030
Co. Dublin,267065027/267065031


In [6]:
postcodes_to_SAs_2011.index.unique()

Index(['Dublin 14', 'Dublin 16', 'Dublin 18', 'Dublin 13', 'Dublin 15',
       'Co. Dublin'],
      dtype='object', name='Postcodes')

D14 	D16 	D18 	D13 	D15 	Co.Dublin 	Co.Dublin.1

In [7]:
%store postcodes_to_SAs_2011
%store path_to_data

Stored 'postcodes_to_SAs_2011' (DataFrame)
Stored 'path_to_data' (str)
