# Imports

In [None]:
# Only required if using Google Colab and Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import pyproj
gd = pyproj.Geod(ellps='WGS84')
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns', None)

# Load data

In [None]:
DATA_FOLDER = # ADD DATA FOLDER PATH, e.g. '/content/drive/MyDrive/Project_Name/data/'

In [None]:
# Load Polygon Neighbor tables - used to check
# Tables are created in GIS by first running buffer tool; in the case study 3m or 10m lateral space between buildings is required by laws enacted at different times, and thus
# these are used to estinate building WWR at sides, depending on a building's era of construction.
# Polygon Neighbor tables are thus created using 10m and 3m buffers to capture the lateral distances, and 0m in the case of attached buildings.

osm_neighbors_0m = pd.read_excel(DATA_FOLDER+'osm_polygon_neighbors_0m_buffer.xlsx')
osm_neighbors_3m = pd.read_excel(DATA_FOLDER+'osm_polygon_neighbors_3m_buffer.xlsx')
osm_neighbors_10m = pd.read_excel(DATA_FOLDER+'osm_polygon_neighbors_10m_buffer.xlsx')

In [None]:
# File with all buildings in OSM database, split at vertices, and calculated for latitude/longitude of central point of each segment

ctrpts_df = pd.read_excel(DATA_FOLDER+'osm_all_building_splitlines_with_center_pts.xlsx')

In [None]:
# Inference file, post-NMS and with stratified detections, grouped by image_id

grouped_stratified_df = pd.read_excel(DATA_FOLDER+'stratified_detections_by_image_id.xlsx')
grouped_stratified_df

Unnamed: 0,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,osm_id_final,WWR_lower,WWR_upper,WWR_total
0,3522.591255,11166.895194,23519.587158,59928.245544,0.0,23519.587158,14689.486448,83447.832702,1000108843_2,0.149773,0.186338,0.176032
1,3283.618088,68498.577063,29723.348465,210968.317392,0.0,29723.348465,71782.195150,240691.665857,1000108861_4,0.110473,0.324687,0.298233
2,6975.604874,54223.898540,31268.442640,304217.424117,0.0,31268.442640,61199.503414,335485.866757,1000122222_2,0.223088,0.178241,0.182421
3,9076.671941,80940.329612,36729.837287,271931.126526,0.0,36729.837287,90017.001554,308660.963813,1000122223_2,0.247120,0.297650,0.291637
4,4700.530478,15142.729676,55046.066986,145900.828750,0.0,55046.066986,19843.260154,200946.895735,1000122224_2,0.085393,0.103788,0.098749
...,...,...,...,...,...,...,...,...,...,...,...,...
1626,5724.402110,30112.720251,51920.080245,322254.297991,0.0,51920.080245,35837.122360,374174.378237,999963320_4,0.110254,0.093444,0.095777
1627,26094.520291,133253.201628,106783.547923,593059.077646,0.0,106783.547923,159347.721919,699842.625569,999963321_4,0.244368,0.224688,0.227691
1628,25411.339678,93822.539668,50423.156802,418818.330616,0.0,50423.156802,119233.879345,469241.487418,999963322_2,0.503962,0.224017,0.254099
1629,16794.114811,65524.186258,34596.729472,284772.153231,0.0,34596.729472,82318.301069,319368.882703,999963323_3,0.485425,0.230093,0.257753


In [None]:
# Load frontlines_df, prepared after script 01 and manually reviewed in GIS.
# All of these lines are already classified as street-facing fronts.
# Make a column ['in_prep_ds'] and set to True, noting that these lines are in the prepared dataset with fronts.
frontlines_df = pd.read_excel(DATA_FOLDER+'frontlines_for_extraction.xlsx')
frontlines_df['in_prep_ds'] = True
frontlines_df

Unnamed: 0,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,st_facing_manual,in_prep_ds
0,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,1,True
1,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,1,True
2,11,83471234,83471234_3,7.703627,45.097317,7.703663,45.097824,7.703645,45.097571,56.373605,2.911479,1,True
3,15,83471237,83471237_3,7.707488,45.095631,7.707522,45.096194,7.707505,45.095913,62.635197,2.412039,1,True
4,17,83471238,83471238_1,7.705814,45.096960,7.705768,45.096427,7.705791,45.096694,59.357366,183.508419,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2388,9757,1214584770,1214584770_2,7.697674,45.105997,7.697440,45.106057,7.697557,45.106027,19.549598,290.143764,1,True
2389,9762,1214584771,1214584771_3,7.698083,45.106000,7.698037,45.105940,7.698060,45.105970,7.622774,208.206956,1,True
2390,9763,1214584771,1214584771_4,7.698037,45.105940,7.697752,45.106041,7.697895,45.105990,25.095010,296.561084,1,True
2391,9768,1214584772,1214584772_1,7.698279,45.106267,7.698200,45.106145,7.698239,45.106206,14.912131,204.734596,1,True


In [None]:
# Splitlines_df - this is the file of all buildings in the case study, with line segments split at vertices
# The columns 'osm_polygon_length' and 'osm_polygon_area' are the geodesic length and area of the OSM polygon
# associated with an osm_id.
# 'Line_Bearing_Normal' is the perpendicular direction from a building line segment to the OUTSIDE of the building,
# and is used to determine glazing orientation in subsequent scripts in this package.
# The following are based on previous research: 'CIT_AR', 'Era', 'Main_Class', 'Final_Class', 'GFA'

splitlines_df = pd.read_excel(DATA_FOLDER+'splitlines_for_classification_algorithm.xlsx')
splitlines_df

Unnamed: 0,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA
0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599
1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599
2,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,272.411950,149.499485,758.515086,186093,4,AB,R4AB,5309.605599
3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599
4,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,92.904191,140.753656,789.657246,185454,4,AB,R4AB,5527.600720
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9771,9772,1233637785,1233637785_1,7.655736,45.134323,7.655596,45.134310,7.655666,45.134316,11.122694,262.512272,172.512272,37.803752,86.525441,1031143,8,SF,R8SF,86.525441
9772,9773,1233637785,1233637785_2,7.655596,45.134310,7.655583,45.134379,7.655589,45.134344,7.779111,352.529588,262.529588,37.803752,86.525441,1031143,8,SF,R8SF,86.525441
9773,9774,1233637785,1233637785_3,7.655583,45.134379,7.655723,45.134392,7.655653,45.134386,11.122699,82.511550,352.511550,37.803752,86.525441,1031143,8,SF,R8SF,86.525441
9774,9775,1233637785,1233637785_4,7.655723,45.134392,7.655736,45.134323,7.655730,45.134357,7.779249,172.529722,82.529722,37.803752,86.525441,1031143,8,SF,R8SF,86.525441


In [None]:
# Merge splitlines_df with frontlines_df['in_prep_ds'] to add whether lines were classified as street-facing fronts
# at GIS prep stage, as well as merge with grouped_stratified_df to add in the detected WWRs.

splitlines_df = pd.merge(splitlines_df, frontlines_df[['osm_id_final', 'in_prep_ds']], on='osm_id_final', how='left')
splitlines_df['in_prep_ds'] = splitlines_df.apply(lambda row: row['in_prep_ds'] if pd.notna(row['in_prep_ds']) else False, axis=1)
splitlines_df = pd.merge(splitlines_df, grouped_stratified_df, on='osm_id_final', how='left')
splitlines_df

Unnamed: 0,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total
0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,
1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,
2,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,272.411950,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,True,,,,,,,,,,,
3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,
4,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,92.904191,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,True,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9771,9772,1233637785,1233637785_1,7.655736,45.134323,7.655596,45.134310,7.655666,45.134316,11.122694,262.512272,172.512272,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,
9772,9773,1233637785,1233637785_2,7.655596,45.134310,7.655583,45.134379,7.655589,45.134344,7.779111,352.529588,262.529588,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,
9773,9774,1233637785,1233637785_3,7.655583,45.134379,7.655723,45.134392,7.655653,45.134386,11.122699,82.511550,352.511550,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,
9774,9775,1233637785,1233637785_4,7.655723,45.134392,7.655736,45.134323,7.655730,45.134357,7.779249,172.529722,82.529722,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,


# Define functions to classify lines as front, rear, side

In [None]:
# Our series of functions used to classify each line in splitlines_df as a front, side, or rear.


def f_is_front(df):
    """
    Function for initial classification as a front.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.

    Returns:
    df                    --  pd.DataFrame; output dataframe with added columns 'have_image' and 'is_front' with Boolean values.
    """

    # Create a new column 'have_image' if we have a WWR for that line
    df['have_image'] = df['WWR_total'].notna()
    # Create a new column 'is_front' and set to False
    df['is_front'] = False
    # Set 'is_front' to True if 'have_image' == True
    df.loc[(df['have_image'] == True) | (df['in_prep_ds'] == True), 'is_front'] = True
    return df


def f_is_attached_side(df):
    """
    Function to check for other buildings attached to side line segments based on osm_neighbors_0m,
    i.e. with a buffer of 0 meters, so the buildings are touching in GIS.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.

    Returns:
    df                    --  pd.DataFrame; output dataframe with added column 'is_attached_side' with Boolean values.
    """
    osm_id = df.loc[df.index[0], 'osm_id']
    # in the input dataframe (df), segments are potentially sides if a rows value for 'is_front'==False, thus create side_df and side_lst accordingly
    side_df = df.loc[df['is_front'] == False].reset_index()
    side_lst = df.loc[df['is_front'] == False, 'osm_id_final'].tolist()
    # determine if there are attached buildings using the neighbor tables with 0m buffer
    attached_nbr_lst = osm_neighbors_0m.loc[osm_neighbors_0m['src_osm_id'] == osm_id, 'nbr_osm_id'].tolist()
    attached_nbr_count = len(attached_nbr_lst)
    df_lst = []
    # for attached neighbor buildings, make temporary dfs with all line segments containing x,y coordinates, and append into attached_nbr_lst
    for i in range(len(attached_nbr_lst)):
      tmp_df = ctrpts_df.loc[ctrpts_df['osm_id'] == attached_nbr_lst[i]]
      df_lst.append(tmp_df)
    # if there are no attached sides, set 'is_attached_side' to False and return df
    if len(df_lst) == 0:
      df['is_attached_side'] = False
      return df
    # otherwise, proceed to figure out which segment(s) in the input df are attached sides.
    else:
      nbr_df = pd.concat(df_lst, ignore_index=True)
      # iterate through side_lst and get center x,y coordinates in the subject building to compare for distance against neibhbor buildings
      for i in range(len(side_lst)):
        ctr_lon = side_df.loc[side_df.index[i], 'Center_Lon']
        ctr_lat = side_df.loc[side_df.index[i], 'Center_Lat']
        # use PyProj library to calculate distance between segment central points
        nbr_df['dist_'+str(i)] = nbr_df.apply(lambda row: gd.inv(ctr_lon, ctr_lat, row['Ctr_Lon'], row['Ctr_Lat'])[2], axis=1)
      min_lst = []
      # iterate through side_lst and append only the segment from nbr_df which has the minimum distance to a subject segment
      for j in range(len(side_lst)):
        min_lst.append(nbr_df['dist_'+str(j)].min())
      min_np = np.argsort(min_lst)
      max_idx = attached_nbr_count
      min_np = min_np[:max_idx]

      # convert to min_np to boolean, where the lowest indexes are true, to a maximum number of true values equal to attached_nbr_count
      bool_dict = {}
      for idx, side in enumerate(side_lst):
        if idx in min_np:
          bool_dict[side] = True
        else:
          bool_dict[side] = False
      df['is_attached_side'] = False
      for key, value in bool_dict.items():
        df.loc[df['osm_id_final'] == key, 'is_attached_side'] = value
      return df


def f_is_connecting_mid_segment(df):
    """
    Function to identify a mid-segement as a front, i.e. a line that is in between exactly two
    previously established fronts is also a front.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.

    Returns:
    df                    --  pd.DataFrame; output dataframe with added column 'is_connecting_mid_segment' with Boolean values.
    """
    # Check if there are exactly two rows with is_front == True
    if df['is_front'].sum() != 2:
        df['is_connecting_mid_segment'] = False
        return df
    # Get the rows with is_front == True
    true_lines = df[df['is_front']]
    # Extract the coordinates of the true lines
    true_coords = true_lines[['Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat']].values
    # Function to check if a row connects the true lines
    def connects(row):
      start_match_1 = (row['Start_Lon'], row['Start_Lat']) == (true_coords[0][2], true_coords[0][3])
      end_match_1 = (row['End_Lon'], row['End_Lat']) == (true_coords[1][0], true_coords[1][1])
      start_match_2 = (row['Start_Lon'], row['Start_Lat']) == (true_coords[1][2], true_coords[1][3])
      end_match_2 = (row['End_Lon'], row['End_Lat']) == (true_coords[0][0], true_coords[0][1])
      return (start_match_1 and end_match_1) or (start_match_2 and end_match_2)

    # Apply the above function to determine if one of the segments in the input df connects between two
    # other segments in the input df
    df['is_connecting_mid_segment'] = df.apply(lambda row: connects(row) if not row['is_front'] else False, axis=1)
    return df


def f_is_angled_front(df, angle_thresh = 60):
    """
    Function to identify additional fronts as the case study shows many bent front line segments with angles
    smaller than 90 degrees. Uses a default angle threshold of 60 degrees.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.
    angle_thresh          --  float; value below which a connected segment is not classified as a side but instead as an 'angled_front'; since
                              the angle between fronts and sides is typically close to 90 degrees. The default is set to 60 degrees.

    Returns:
    df                    --  pd.DataFrame; output dataframe with added column 'is_angled_front' with Boolean values.
    """
    # Make temporary df of segments which are previously identified as fronts
    df_fr = df.loc[df['is_front'] == True].copy()
    # Make temporary df of all other segments in input df
    df_non = df.loc[df['is_front'] == False].copy()
    # Make list of osm_id_final identifiers for all segments in df_fr
    id_fr = df_fr['osm_id_final'].tolist()
    kv_dict = {}
    # iterate through segments in df_fr and get coordinates and bearing for each
    for i in range(len(df_fr)):
      true_coords = df_fr.iloc[i][['Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat']].values
      true_bearing = df_fr.iloc[i]['LINE_BEARING']
      # For each segment in df_non, set 'match' to True if it has one coordinate that is the same as the segment in df_fr
      df_non.loc[:,'match'+str(id_fr[i])] = df_non.apply(lambda row: (row['Start_Lon'], row['Start_Lat']) == (true_coords[2], true_coords[3]) or
                                                                       (row['End_Lon'], row['End_Lat']) == (true_coords[0], true_coords[1]), axis=1)
      # Calculate the difference in bearing between the front segment and each non-front segment
      df_non.loc[:,'bearing_diff'+str(id_fr[i])] = abs(df_non['LINE_BEARING'] - true_bearing)
      # Set 'bearing_in_thresh' to True if the bearing difference is within the input threshold
      df_non.loc[:,'bearing_in_thresh'+str(id_fr[i])] = df_non.apply(lambda row: True if (row['bearing_diff'+str(id_fr[i])] > (360 - angle_thresh) or
                                                                         row['bearing_diff'+str(id_fr[i])] < angle_thresh) else False, axis = 1)
      # Create dictionary entry with matching ids, bearing differences, and whether bearing is within threshold
      kv_dict[i] = ('match'+str(id_fr[i]), 'bearing_diff'+str(id_fr[i]), 'bearing_in_thresh'+str(id_fr[i]))

    return_lst = []
    # Iterature through kv_dict, get osm_id_final value of segment which have 'match'==True and 'bearing_in_thresh'==True
    # (These correspond to kv_dict indexes [0] and [2], respectively).
    for j in range(len(kv_dict)):
      return_id = df_non.loc[(df_non[kv_dict[j][0]] == True) & (df_non[kv_dict[j][2]] == True), 'osm_id_final'].values
      return_id = return_id.tolist()
      return_lst.append(return_id)
    return_collapsed = [item for sublist in return_lst for item in sublist]
    return_set = set(return_collapsed)
    df['is_angled_front'] = False
    if len(return_set) > 0:
      for return_id in return_set:
        df.loc[df['osm_id_final'] == return_id, 'is_angled_front'] = True
    return df


def f_is_front_final(df):
    """
    Final classification of front line segments, to include mid-segments and additional angled fronts.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.

    Returns:
    df                    --  pd.DataFrame; output dataframe with added column 'is_front_final' with Boolean values.
    """
    df['is_front_final'] = False
    df.loc[((df['is_front'] == True) | (df['is_connecting_mid_segment'] == True) | df['is_angled_front'] == True), 'is_front_final'] = True
    return df


def f_is_side(row, df_fr):
    """
    Classify lines as sides if they share one point with a previously established front,
    or if they are already established as attached sides.

    Arguments:
    row                    --  pd.DataFrame; rows of input dataframe with one building only.
    df_fr                  --  pd.DataFrame; input dataframe with segments previously classified as fronts only.

    Returns:
    df                     --  pd.DataFrame; output dataframe with added column 'is_side' with Boolean values.
    """
    if row['is_front_final']:
        return False
    elif row['is_attached_side']:
        return True
    else:
        start_match = ((df_fr['Start_Lon'] == row['Start_Lon']) & (df_fr['Start_Lat'] == row['Start_Lat'])).any()
        end_match = ((df_fr['End_Lon'] == row['Start_Lon']) & (df_fr['End_Lat'] == row['Start_Lat'])).any()
        start_end_match = ((df_fr['Start_Lon'] == row['End_Lon']) & (df_fr['Start_Lat'] == row['End_Lat'])).any()
        end_start_match = ((df_fr['End_Lon'] == row['End_Lon']) & (df_fr['End_Lat'] == row['End_Lat'])).any()
        both_match = ((df_fr['Start_Lon'] == row['Start_Lon']) & (df_fr['Start_Lat'] == row['Start_Lat']) &
                      (df_fr['End_Lon'] == row['End_Lon']) & (df_fr['End_Lat'] == row['End_Lat'])).any()
        return (start_match or end_match or start_end_match or end_start_match) and not both_match


def f_is_rear(df):
    """
    Clasify lines as rears if they are not fronts or sides.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.

    Returns:
    df                    --  pd.DataFrame; output dataframe with added column 'is_rear' with Boolean values.
    """
    # for each row where df_split.loc[(df_split['is_front'] == False) & (df_split['is_side'] == False)], return True in the column 'is_rear'
    df['is_rear'] = df.apply(lambda row: True if (row['is_front_final'] == False) & (row['is_side'] == False) else False, axis=1)
    return df


def f_no_front(df):
    """
    # If there are no fronts within the osm_id, then re-classify all segments to False
    # since the building cannot be properly classified.

    Arguments:
    df                    --  pd.DataFrame; input dataframe with one building only, where each row in dataframe corresponds to a line segment.

    Returns:
    df                    --  pd.DataFrame; output dataframe with potentially modified values for 'is_front_final', 'is_side', and 'is_rear'.
    """
    # if 'is_front_final' for all rows in df, also set 'is_side' and 'is_rear' to False
    if df['is_front_final'].sum() == 0:
        df['is_side'] = False
        df['is_rear'] = False
    return df


# Wrapper function, runs all above functions
def classify_lines(osm_id):
    """
    # Wrapper function, runs all above functions

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.

    Returns:
    df_split              --  pd.DataFrame; output dataframe for one osm_id with all segments classified.
    df_unclassified       --  pd.DataFrame; output dataframe for same osm_id if segments could not be classified.
    """
    # Make a temporary dataframe with all line segments in splitlines_df for a given input osm_id
    df_split = splitlines_df.loc[splitlines_df['osm_id'] == osm_id].reset_index()

    # Determine which segments are initially set as front segments based on whether we have images or if the segment is in prep_ds
    df_split = f_is_front(df_split)

    # Find attached neighbors --> if side has an attached_neighbor, it is a side
    df_split = f_is_attached_side(df_split)

    # Find connecting mid segments, i.e. only if there are two fronts, it finds a segment in between, which must also be a front
    df_split = f_is_connecting_mid_segment(df_split)

    # Run f_is_angled_front, which classifies angled front based on (a) whether the segment is connected to a segment where 'is_front'==True
    # or 'is_connecting_mid_segment'==True, and (b) if the absolute difference in bearing between the segments is less than the threshold (default of 60 degrees).
    df_split = f_is_angled_front(df_split, angle_thresh = 60)

    # Before classifying sides & rears, figure out if any other segments are fronts based on previous functions.
    df_split = f_is_front_final(df_split)

    # Make df_front and use as an input to f_is_side, which classifies sides based on (a) whether it was found as an attached side or
    # (b) whether the line is attached at one end only to a front
    df_front = df_split.loc[(df_split['is_front_final'] == True)]
    df_split['is_side'] = df_split.apply(f_is_side, axis=1, df_fr=df_front)

    # Remaining segments are classified as rear
    df_split = f_is_rear(df_split)

    # If all records have 'is_front_final' == False, then set all records to False for 'is_side' and 'is_rear' --> building will not be classified
    df_split = f_no_front(df_split)

    # If any row has 'is_front' == False and 'is_side' == False and 'is_rear' == False, then put this row into df_unclassified
    df_unclassified = df_split.loc[(df_split['is_front_final'] == False) & (df_split['is_side'] == False) & (df_split['is_rear'] == False)]

    return df_split, df_unclassified

# Create temporary dataframes to debug

In [None]:
def create_temp_dfs(osm_id):
    """
    Function used if user wants to check functions one by one for a given input osm_id.

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.

    Returns:
    df_split              --  pd.DataFrame; output dataframe for one osm_id with each row corresponding to a separate line segment.
    """
    df_split = splitlines_df.loc[splitlines_df['osm_id'] == osm_id].reset_index()

    return df_split

In [None]:
# Call the wrapper function and classifies one osm_id with all included functions.
chk_osm_id = 1184969746
df_class, df_unclass = classify_lines(chk_osm_id)
df_class

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,True,True,False,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False,False,True,False,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True,False


In [None]:
# Alternatively, create a temporary dataframe and debug one function at a time.
chk_osm_id = 1184969746
df_split = create_temp_dfs(chk_osm_id)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,


In [None]:
df_split = f_is_front(df_split)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False


In [None]:
df_split = f_is_attached_side(df_split)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True


In [None]:
df_split = f_is_connecting_mid_segment(df_split)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False


In [None]:
df_split = f_is_angled_front(df_split, angle_thresh=60)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,True
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False


In [None]:
df_split = f_is_front_final(df_split)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,True,True
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False,False,True
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False


In [None]:
df_front = df_split.loc[(df_split['is_front_final'] == True)]
df_split['is_side'] = df_split.apply(f_is_side, axis=1, df_fr=df_front)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,True,True,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False,False,True,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True


In [None]:
df_split = f_is_rear(df_split)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,True,True,False,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False,False,True,False,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True,False


In [None]:
df_split = f_no_front(df_split)
df_split

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear
0,9214,9215,1184969746,1184969746_1,7.69697,45.092513,7.696894,45.092514,7.696932,45.092514,6.033278,271.906893,181.906893,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
1,9215,9216,1184969746,1184969746_2,7.696894,45.092514,7.696822,45.092495,7.696858,45.092505,6.041909,249.02948,159.02948,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
2,9216,9217,1184969746,1184969746_3,7.696822,45.092495,7.696737,45.092589,7.696779,45.092542,12.390444,327.271497,237.271497,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True,False
3,9217,9218,1184969746,1184969746_4,7.696737,45.092589,7.696854,45.092639,7.696795,45.092614,10.82396,58.544622,328.544622,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,False,False,True,True,False,False
4,9218,9219,1184969746,1184969746_5,7.696854,45.092639,7.696968,45.092643,7.696911,45.092641,8.997275,87.655881,357.655881,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,True,1705.913113,15055.494773,14597.805112,60269.980037,4089.694082,10508.11103,16761.407886,70778.091066,0.162343,0.249801,0.236816,True,True,False,False,False,True,False,False
5,9219,9220,1184969746,1184969746_6,7.696968,45.092643,7.69697,45.092513,7.696969,45.092578,14.459367,179.378325,89.378325,58.746234,216.188698,187579,3,AB,R3AB,1297.132188,False,,,,,,,,,,,,False,False,True,False,False,False,True,False


# Classify lines into front, rear, side

In [None]:
# Iterate through all unique osm_ids and run classify_lines function

counter = 0
all_results = []
unclassified_lst = []
not_result_lst =[]
for osm_id in splitlines_df['osm_id'].unique():
    counter+=1
    result = classify_lines(osm_id) # Modify to store the single return value
    if isinstance(result, tuple) and len(result) == 2 and not result[0].empty:
        df_split, df_unclassified = result # Unpack only if osm_id was found
        all_results.append(df_split)
        if not df_unclassified.empty:
          unclassified_lst.append(df_unclassified)
    else:  # Handle cases where classify_lines didn't return the expected output
        not_result_lst.append(osm_id)

    if counter % 100 == 0:
        print(counter)

print(counter)
# Concatenate all results into a single dataframe
classified_df = pd.concat(all_results, ignore_index=True)
classified_df

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1818


Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear
0,0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,False,True
1,1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False
2,2,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,272.411950,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,True,,,,,,,,,,,,False,True,False,False,False,True,False,False
3,3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False
4,4,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,92.904191,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,True,,,,,,,,,,,,False,True,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9771,9770,9771,1214584772,1214584772_4,7.698073,45.106321,7.698279,45.106267,7.698176,45.106294,17.299046,110.479065,20.479065,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,10062.794488,20567.603542,57665.776902,128124.708204,0.0,57665.776902,30630.39803,185790.485107,0.174502,0.160528,0.164865,True,True,False,False,False,True,False,False
9772,9771,9772,1233637785,1233637785_1,7.655736,45.134323,7.655596,45.134310,7.655666,45.134316,11.122694,262.512272,172.512272,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False
9773,9772,9773,1233637785,1233637785_2,7.655596,45.134310,7.655583,45.134379,7.655589,45.134344,7.779111,352.529588,262.529588,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False
9774,9773,9774,1233637785,1233637785_3,7.655583,45.134379,7.655723,45.134392,7.655653,45.134386,11.122699,82.511550,352.511550,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False


In [None]:
# make new column 'status' and include values of 'front', 'side', 'rear' based on True/False values in 'is_front', 'is_side', 'is_rear'
classified_df['status'] = np.where(classified_df['is_front_final'], 'front',
                          np.where(classified_df['is_side'], 'side',
                          np.where(classified_df['is_rear'], 'rear', 'n/a')))
classified_df

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status
0,0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear
1,1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side
2,2,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,272.411950,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,True,,,,,,,,,,,,False,True,False,False,False,True,False,False,front
3,3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side
4,4,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,92.904191,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,True,,,,,,,,,,,,False,True,False,False,False,True,False,False,front
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9771,9770,9771,1214584772,1214584772_4,7.698073,45.106321,7.698279,45.106267,7.698176,45.106294,17.299046,110.479065,20.479065,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,10062.794488,20567.603542,57665.776902,128124.708204,0.0,57665.776902,30630.39803,185790.485107,0.174502,0.160528,0.164865,True,True,False,False,False,True,False,False,front
9772,9771,9772,1233637785,1233637785_1,7.655736,45.134323,7.655596,45.134310,7.655666,45.134316,11.122694,262.512272,172.512272,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False,
9773,9772,9773,1233637785,1233637785_2,7.655596,45.134310,7.655583,45.134379,7.655589,45.134344,7.779111,352.529588,262.529588,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False,
9774,9773,9774,1233637785,1233637785_3,7.655583,45.134379,7.655723,45.134392,7.655653,45.134386,11.122699,82.511550,352.511550,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False,


In [None]:
# This is the number classified in the algorithm, i.e. it excludes those that were
# previously identified during GIS stage
line_counts = classified_df.loc[classified_df['in_prep_ds'] == False]
line_counts = line_counts.loc[line_counts['status']!='n/a']
line_counts

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status
0,0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear
1,1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side
3,3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side
5,5,6,83471233,83471233_2,7.705801,45.097166,7.705623,45.097172,7.705712,45.097169,14.012680,272.873595,182.873595,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side
6,6,7,83471233,83471233_3,7.705623,45.097172,7.705660,45.097679,7.705642,45.097426,56.373535,2.911490,272.911490,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9765,9764,9765,1214584771,1214584771_6,7.697779,45.106069,7.697836,45.106053,7.697808,45.106061,4.820707,111.283905,21.283905,78.600012,258.815488,123074,4,AB,R4AB,1811.708414,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear
9766,9765,9766,1214584771,1214584771_7,7.697836,45.106053,7.697888,45.106137,7.697862,45.106095,10.196017,23.641854,293.641854,78.600012,258.815488,123074,4,AB,R4AB,1811.708414,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear
9767,9766,9767,1214584771,1214584771_8,7.697888,45.106137,7.698016,45.106107,7.697952,45.106122,10.585736,108.225190,18.225190,78.600012,258.815488,123074,4,AB,R4AB,1811.708414,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear
9769,9768,9769,1214584772,1214584772_2,7.698200,45.106145,7.698022,45.106201,7.698111,45.106173,15.290303,294.134786,204.134786,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side


In [None]:
# Side count
# in classified_df, group by 'osm_id' and aggregate 'is_side' to sum the value
groupby_side = classified_df.groupby('osm_id')['is_side'].sum()
groupby_side.value_counts()

Unnamed: 0_level_0,count
is_side,Unnamed: 1_level_1
2,1430
0,239
3,80
1,35
4,26
5,5
6,2
9,1


# Define function to check neighbor distances per urban planning laws

In [None]:
def check_neighbors(osm_id):
    """
    Function to check number of neighbors at appropriate buffer distances (0m, 3m, 10m), depending on era of construction.

    For all input building ids, the function also checks buffer = 0m --> if the building has an attached neighbor, it is attached building, and thus has zero glazing.

    If no. attached neighbors = 0, then WWR on both sides = 5% --> the function does not need to check which side is attached.
    If no. attached neighbors >= 2, then there is zero glazing on both sides --> the function does not need to check which side is attached.
    If no. attached neighbors = 1, then run second half of function to figure out which side has the attached neighbor --> this side has zero glazing; the other side has WWR = 5%.

    Buffer distances and eras of construction are particular to the case study but can be modified to suit other geographical contexts.

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.

    Returns:
    num_neighbors         --  int;
    osm_id_final_return   --  int; the osm_id_final of
    """
    # Get era from classified_df
    era = classified_df.loc[classified_df['osm_id'] == osm_id, 'Era'].values[0]

    # Get number of rows in classified_df that have this cit_ar with 2 True values for 'is_side
    num_sides = len(classified_df.loc[(classified_df['osm_id'] == osm_id) & (classified_df['is_side'] == True)])

    # if era >= 6, minimum is 10m between buildings by law
    if era >= 6:
      num_neighbors = len(osm_neighbors_10m.loc[osm_neighbors_10m['src_osm_id'] == osm_id].index)
      neighbors = osm_neighbors_10m.loc[osm_neighbors_10m['src_osm_id'] == osm_id, 'nbr_osm_id'].tolist()

    # elif era >= 4 and era < 6, minimum is 3m between buildings by law
    elif era >= 4 and era < 6:
      num_neighbors = len(osm_neighbors_3m.loc[osm_neighbors_3m['src_osm_id'] == osm_id].index)
      neighbors = osm_neighbors_3m.loc[osm_neighbors_3m['src_osm_id'] == osm_id, 'nbr_osm_id'].tolist()

    # else era < 4, no law applies --> use only zero buffer dataframe
    else:
      num_neighbors = len(osm_neighbors_0m.loc[osm_neighbors_0m['src_osm_id'] == osm_id].index)
      neighbors = osm_neighbors_0m.loc[osm_neighbors_0m['src_osm_id'] == osm_id, 'nbr_osm_id'].tolist()

    # only run if num_neighbors is 1, then figure out which neighbor has the attached side
    if num_neighbors == 1 and num_sides == 2:

      src_df = classified_df.loc[classified_df['osm_id'] == osm_id]
      src_df = src_df.loc[src_df['is_side'] == True].reset_index()
      ctr_lon_1 = src_df['Center_Lon'].values[0]
      ctr_lat_1 = src_df['Center_Lat'].values[0]
      ctr_lon_2 = src_df['Center_Lon'].values[1]
      ctr_lat_2 = src_df['Center_Lat'].values[1]

      nbr_df = ctrpts_df.loc[ctrpts_df['osm_id']==neighbors[0]].reset_index()
      # use gd.inv function to calculate distance from Ctr_Lon/Ctr_Lat of each row row compared to ctr_lon_1/ctr_lon_2 in one new column
      nbr_df['dist_0'] = nbr_df.apply(lambda row: gd.inv(ctr_lon_1, ctr_lat_1, row['Ctr_Lon'], row['Ctr_Lat'])[2], axis=1)
      nbr_df['dist_1'] = nbr_df.apply(lambda row: gd.inv(ctr_lon_2, ctr_lat_2, row['Ctr_Lon'], row['Ctr_Lat'])[2], axis=1)

      min_dist_0 = nbr_df['dist_0'].min()
      min_dist_1 = nbr_df['dist_1'].min()

      if min_dist_0 < min_dist_1:
        src_side = 0
      else:
        src_side = 1

      osm_id_final_return = src_df.loc[src_df.index[src_side], 'osm_id_final']

      return num_neighbors, osm_id_final_return

    else:
      return num_neighbors, False

# Assign WWR based on status: front, rear, side

In [None]:
def assign_wwr(osm_id):
    """
    Function assign wwr to each osm_id based on front/rear/side status
    and WWR_total (for fronts) or WWR_upper values (for rears) present in dataframe.
    Also calls check_neighbors function when assigning WWR to sides.

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.

    Returns:
    temp_df               --  pd.DataFrame; output dataframe for one osm_id with WWRs assigned for each row.
    """
    temp_df = classified_df.loc[classified_df['osm_id'] == osm_id].copy()

    # For all values where 'status' == 'front' and 'WWR_upper' is NOT nan, get weighted average WWR based on 'LINE_LENGTH'.
    upper_df = temp_df.loc[temp_df['status'] == 'front']
    upper_df = upper_df.loc[upper_df['WWR_upper'].notna()]
    upper_df['LINE_LENGTH_SUM'] = upper_df['LINE_LENGTH'].sum()
    upper_df['LINE_LENGTH_PERCENT'] = upper_df['LINE_LENGTH'] / upper_df['LINE_LENGTH_SUM']
    upper_df['WWR_upper_weighted'] = upper_df['WWR_upper'] * upper_df['LINE_LENGTH_PERCENT']
    upper_wwr_weighted = upper_df['WWR_upper_weighted'].sum()

    # For all values where 'status' == 'front' and 'WWR_total' is NOT nan, get weighted average WWR based on 'LINE_LENGTH'.
    total_df = temp_df.loc[temp_df['status'] == 'front']
    total_df = total_df.loc[total_df['WWR_total'].notna()]
    total_df['LINE_LENGTH_SUM'] = total_df['LINE_LENGTH'].sum()
    total_df['LINE_LENGTH_PERCENT'] = total_df['LINE_LENGTH'] / total_df['LINE_LENGTH_SUM']
    total_df['WWR_total_weighted'] = total_df['WWR_total'] * total_df['LINE_LENGTH_PERCENT']
    total_wwr_weighted = total_df['WWR_total_weighted'].sum()

    for row in temp_df.index:
    # If the segment is a front and has a WWR detected, use this; if not, use the weighted average WWR_total
      if temp_df.loc[row, 'status'] == 'front':
        if np.isnan(temp_df.loc[row, 'WWR_total']):
          temp_df.loc[row, 'WWR_final'] = total_wwr_weighted
        else:
          temp_df.loc[row, 'WWR_final'] = temp_df.loc[row, 'WWR_total']
    # If the segment is a rear, use the weighted average WWR_upper (unless this doesn't exist, in which case use weighted average WWR_total)
      elif temp_df.loc[row, 'status'] == 'rear':
        if upper_df.empty:
          temp_df.loc[row, 'WWR_final'] = total_wwr_weighted
        else:
          temp_df.loc[row, 'WWR_final'] = upper_wwr_weighted
    # If the segment is a side, call the check_neighbors function to determine how many attached neighbors are present, and
    # if so, identify which segment is attached.
      elif temp_df.loc[row, 'status'] == 'side':
        num_neighbors, osm_id_final_return = check_neighbors(osm_id)
        if num_neighbors == 0:
          temp_df.loc[row, 'WWR_final'] = 0.05
        elif num_neighbors >= 2:
          temp_df.loc[row, 'WWR_final'] = 0.0
        elif num_neighbors == 1:
          osm_id_final = temp_df.loc[row, 'osm_id_final']
          if osm_id_final_return == osm_id_final:
            temp_df.loc[row, 'WWR_final'] = 0.0
          else:
            temp_df.loc[row, 'WWR_final'] = 0.05

    return temp_df

In [None]:
# Run for loop on unique osm_id values and call assign_wwr,
# then create wwr_df with applied WWR values.

counter = 0
temporary_df_lst = []

for osm_id in classified_df['osm_id'].unique():
  temporary_df = assign_wwr(osm_id)
  temporary_df_lst.append(temporary_df)

  counter+=1
  if counter % 100 == 0:
    print(counter)
print(counter)
wwr_df = pd.concat(temporary_df_lst, ignore_index=True)
wwr_df

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1818


Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status,WWR_final
0,0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.000000
1,1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000
2,2,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,272.411950,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,True,,,,,,,,,,,,False,True,False,False,False,True,False,False,front,0.000000
3,3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000
4,4,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,92.904191,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,True,,,,,,,,,,,,False,True,False,False,False,True,False,False,front,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9771,9770,9771,1214584772,1214584772_4,7.698073,45.106321,7.698279,45.106267,7.698176,45.106294,17.299046,110.479065,20.479065,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,10062.794488,20567.603542,57665.776902,128124.708204,0.0,57665.776902,30630.39803,185790.485107,0.174502,0.160528,0.164865,True,True,False,False,False,True,False,False,front,0.164865
9772,9771,9772,1233637785,1233637785_1,7.655736,45.134323,7.655596,45.134310,7.655666,45.134316,11.122694,262.512272,172.512272,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False,,
9773,9772,9773,1233637785,1233637785_2,7.655596,45.134310,7.655583,45.134379,7.655589,45.134344,7.779111,352.529588,262.529588,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False,,
9774,9773,9774,1233637785,1233637785_3,7.655583,45.134379,7.655723,45.134392,7.655653,45.134386,11.122699,82.511550,352.511550,37.803752,86.525441,1031143,8,SF,R8SF,86.525441,False,,,,,,,,,,,,False,False,False,False,False,False,False,False,,


In [None]:
# Reduce wwr_df to only records where WWR_final is not np.nan

wwr_df = wwr_df.loc[wwr_df['WWR_final'].notna()]
wwr_df

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status,WWR_final
0,0,1,83471230,83471230_1,7.706395,45.096729,7.706361,45.096166,7.706378,45.096448,62.646302,182.411524,92.411524,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.000000
1,1,2,83471230,83471230_2,7.706361,45.096166,7.706208,45.096171,7.706285,45.096168,12.109261,272.427701,182.427701,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000
2,2,3,83471230,83471230_3,7.706208,45.096171,7.706241,45.096734,7.706225,45.096452,62.635242,2.411950,272.411950,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,True,,,,,,,,,,,,False,True,False,False,False,True,False,False,front,0.000000
3,3,4,83471230,83471230_4,7.706241,45.096734,7.706395,45.096729,7.706318,45.096732,12.108680,92.375219,2.375219,149.499485,758.515086,186093,4,AB,R4AB,5309.605599,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000
4,4,5,83471233,83471233_1,7.705838,45.097673,7.705801,45.097166,7.705819,45.097419,56.362096,182.904191,92.904191,140.753656,789.657246,185454,4,AB,R4AB,5527.600720,True,,,,,,,,,,,,False,True,False,False,False,True,False,False,front,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9767,9766,9767,1214584771,1214584771_8,7.697888,45.106137,7.698016,45.106107,7.697952,45.106122,10.585736,108.225190,18.225190,78.600012,258.815488,123074,4,AB,R4AB,1811.708414,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.056696
9768,9767,9768,1214584772,1214584772_1,7.698279,45.106267,7.698200,45.106145,7.698239,45.106206,14.912131,204.734596,114.734596,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,0.000000,0.000000,89068.562937,0.000000,0.0,89068.562937,0.00000,89068.562937,0.000000,,0.000000,True,True,False,False,False,True,False,False,front,0.000000
9769,9768,9769,1214584772,1214584772_2,7.698200,45.106145,7.698022,45.106201,7.698111,45.106173,15.290303,294.134786,204.134786,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000
9770,9769,9770,1214584772,1214584772_3,7.698022,45.106201,7.698073,45.106321,7.698048,45.106261,13.924715,16.666161,286.666161,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000


In [None]:
def have_min_one_detection(osm_id):
    """
    Function to check if a building has at least one facade detected for WWR.

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.

    Returns:
    return_dict           --  dictionary; output dictionary with osm_id and whether the building has at least one facade detected for WWR.
    """
    tmp_df = wwr_df.loc[wwr_df['osm_id'] == osm_id].copy()
    return_dict = {}
    if tmp_df['have_image'].sum() >= 1:
      return_dict = {'osm_id':osm_id, 'have_min_one_detection':True}
      return return_dict
    else:
      return_dict = {'osm_id':osm_id, 'have_min_one_detection':False}
      return return_dict

In [None]:
# Run above function in for loop to determine if buildings have at least one detection
have_img_dict_lst = []
for osm_id in wwr_df['osm_id'].unique():
  have_img_dict = have_min_one_detection(osm_id)
  have_img_dict_lst.append(have_img_dict)

print(len(have_img_dict_lst))

1597


In [None]:
# Reduce wwr_df to only results if a building has a min of one detection
have_img_df = pd.DataFrame(have_img_dict_lst)
wwr_df = pd.merge(wwr_df, have_img_df, on='osm_id', how='left')
wwr_df = wwr_df.loc[wwr_df['have_min_one_detection']==True]
wwr_df

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status,WWR_final,have_min_one_detection
20,20,21,83471240,83471240_1,7.708351,45.095895,7.708318,45.095332,7.708334,45.095614,62.646436,182.411618,92.411618,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,True,16930.345056,84877.205472,90693.376505,419706.168332,0.0,90693.376505,101807.550527,510399.544837,0.186677,0.202230,0.199466,True,True,False,False,False,True,False,False,front,0.199466,True
21,21,22,83471240,83471240_2,7.708318,45.095332,7.708164,45.095337,7.708241,45.095335,12.109366,272.427715,182.427715,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True
22,22,23,83471240,83471240_3,7.708164,45.095337,7.708198,45.095900,7.708181,45.095619,62.646619,2.418707,272.418707,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.202230,True
23,23,24,83471240,83471240_4,7.708198,45.095900,7.708351,45.095895,7.708274,45.095898,12.101470,92.428606,2.428606,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True
24,24,25,83471241,83471241_1,7.709266,45.096186,7.708926,45.095812,7.709096,45.095999,49.416307,212.704064,122.704064,122.521160,585.214298,184893,4,AB,R4AB,4096.500085,True,2320.703730,20356.036889,31650.655384,131043.144541,0.0,31650.655384,22676.740619,162693.799925,0.073322,0.155338,0.139383,True,True,False,False,False,True,False,False,front,0.139383,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8527,9766,9767,1214584771,1214584771_8,7.697888,45.106137,7.698016,45.106107,7.697952,45.106122,10.585736,108.225190,18.225190,78.600012,258.815488,123074,4,AB,R4AB,1811.708414,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.056696,True
8528,9767,9768,1214584772,1214584772_1,7.698279,45.106267,7.698200,45.106145,7.698239,45.106206,14.912131,204.734596,114.734596,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,0.000000,0.000000,89068.562937,0.000000,0.0,89068.562937,0.000000,89068.562937,0.000000,,0.000000,True,True,False,False,False,True,False,False,front,0.000000,True
8529,9768,9769,1214584772,1214584772_2,7.698200,45.106145,7.698022,45.106201,7.698111,45.106173,15.290303,294.134786,204.134786,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True
8530,9769,9770,1214584772,1214584772_3,7.698022,45.106201,7.698073,45.106321,7.698048,45.106261,13.924715,16.666161,286.666161,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True


In [None]:
def check_thresh(osm_id, front_thresh=0.3):
    """
    Function to run squareness test on a subject building, to eliminate long rectangular ('slab-style') buildings that have short street-facing segments which
    are observed in the case study to have a low WWR (or even zero glazing). Thus these short segments' WWR may not be representative of the overall WWR, and the
    entire building is subsequently excluded from the case study.

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.
    front_thresh          --  float; threshold to use in the squareness test. The default of 0.3 means that if a short segment is with (1.0 - 0.3) = 0.7 of the
                              longest line, then this short segment will pass the squareness test, i.e. 'within_thresh' = True.
                              If not, the line does not pass the test, i.e. 'within_thresh' = False.

    Returns:
    return_dict           --  dictionary; output dictionary with osm_id and whether the building passes the squareness test (i.e. within_thresh=True/False).
    """
    tmp_df = wwr_df.loc[wwr_df['osm_id'] == osm_id].copy()
    # Make a tempoary df with only lines classified as fronts
    fronts = tmp_df.loc[(tmp_df['status'] == 'front')].copy()
    # Get the maximum length of all segments in tmp_df
    max_len = tmp_df['LINE_LENGTH'].max()
    # Determine the minimum threshold length based on the input value for front_thresh
    max_minus_thresh = max_len * (1 - front_thresh)
    # Using lines classified as fronts, determine if their length is greater than the max_minus_thresh --> if so, set 'within_thresh'=True
    fronts.loc[:,'within_thresh'] = fronts.apply(lambda row: (row['LINE_LENGTH'] >= max_minus_thresh and row['have_image']==True), axis=1)
    # If one of the fronts has 'within_thresh'==True, calling .max() will return True in return_dict['within_thresh']
    return_val = fronts['within_thresh'].max()
    return_dict = {'osm_id':osm_id, 'within_thresh':return_val}

    return return_dict

In [None]:
# Run for loop on all osm_id unique values within wwr_df, call check_thresh to determine if building meets squareness threshold
# create thresh_df and then merge with wwr_df by 'osm_id'

thresh_dict_lst = []
for osm_id in wwr_df['osm_id'].unique():
  thresh_dict = check_thresh(osm_id)
  thresh_dict_lst.append(thresh_dict)

# Reduce wwr_df to include only buildings which pass the squareness test
thresh_df = pd.DataFrame(thresh_dict_lst)
thresh_df = thresh_df.loc[thresh_df['within_thresh']==True]
wwr_df = pd.merge(wwr_df, thresh_df, on='osm_id', how='left')
wwr_df = wwr_df.loc[wwr_df['within_thresh']==True]
wwr_df

Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status,WWR_final,have_min_one_detection,within_thresh
0,20,21,83471240,83471240_1,7.708351,45.095895,7.708318,45.095332,7.708334,45.095614,62.646436,182.411618,92.411618,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,True,16930.345056,84877.205472,90693.376505,419706.168332,0.0,90693.376505,101807.550527,510399.544837,0.186677,0.202230,0.199466,True,True,False,False,False,True,False,False,front,0.199466,True,True
1,21,22,83471240,83471240_2,7.708318,45.095332,7.708164,45.095337,7.708241,45.095335,12.109366,272.427715,182.427715,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True
2,22,23,83471240,83471240_3,7.708164,45.095337,7.708198,45.095900,7.708181,45.095619,62.646619,2.418707,272.418707,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.202230,True,True
3,23,24,83471240,83471240_4,7.708198,45.095900,7.708351,45.095895,7.708274,45.095898,12.101470,92.428606,2.428606,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True
4,24,25,83471241,83471241_1,7.709266,45.096186,7.708926,45.095812,7.709096,45.095999,49.416307,212.704064,122.704064,122.521160,585.214298,184893,4,AB,R4AB,4096.500085,True,2320.703730,20356.036889,31650.655384,131043.144541,0.0,31650.655384,22676.740619,162693.799925,0.073322,0.155338,0.139383,True,True,False,False,False,True,False,False,front,0.139383,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6754,9758,9759,1214584770,1214584770_4,7.697510,45.106176,7.697719,45.106113,7.697615,45.106144,17.931609,113.090262,23.090262,65.066177,257.733721,123075,5,AB,R5AB,1288.668604,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.190556,True,True
6763,9767,9768,1214584772,1214584772_1,7.698279,45.106267,7.698200,45.106145,7.698239,45.106206,14.912131,204.734596,114.734596,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,0.000000,0.000000,89068.562937,0.000000,0.0,89068.562937,0.000000,89068.562937,0.000000,,0.000000,True,True,False,False,False,True,False,False,front,0.000000,True,True
6764,9768,9769,1214584772,1214584772_2,7.698200,45.106145,7.698022,45.106201,7.698111,45.106173,15.290303,294.134786,204.134786,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True
6765,9769,9770,1214584772,1214584772_3,7.698022,45.106201,7.698073,45.106321,7.698048,45.106261,13.924715,16.666161,286.666161,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True


In [None]:
# Check how many builings this applies to
print('Number of buildings passing squareness test: ', wwr_df['osm_id'].nunique())

Number of buildings passing squareness test:  1064


In [None]:
# Export wwr_df, as it will be used in script 07 in this repository, which converts WWR to a
# per-orientation basis, for use in UBEM.

fpath = # ADD FILE PATH HERE, e.g. './Project_Name/classified_splitlines_with_WWRs.xlsx'
wwr_df.to_excel(fpath, index=False)

# Calculate building-scale WWR

In [None]:
def calc_bldg_wwr(osm_id):
    """
    Function to calculate WWR for an entire building.

    WWR is the weighted average based on line length, except for attached sides, where attached walls are not included in the WWR calculation.

    Arguments:
    osm_id                --  int; unique building identifier, typically fed into function using a for loop for a large list of buildings to be classified.

    Returns:
    tmp_df                --  pd.DataFrame; dataframe for given osm_id with weighted WWR.
    """
    tmp_df = wwr_df.loc[wwr_df['osm_id'] == osm_id].copy()
    total_line_len = tmp_df.apply(lambda row: row['LINE_LENGTH'] if (row['is_attached_side']!=True) else 0, axis=1).sum()
    tmp_df.loc[:,'line_weight'] = tmp_df.apply(lambda row: row['LINE_LENGTH'] / total_line_len if (row['is_attached_side']!=True) else 0, axis=1)
    tmp_df.loc[:,'weighted_WWR'] = tmp_df['WWR_final'] * tmp_df['line_weight']
    return tmp_df

In [None]:
# Run for loop to calculate building-scale WWR using calc_bldg_wwr function.

df_lst = []
counter = 0
for osm_id in wwr_df['osm_id'].unique():
  tmp_df = calc_bldg_wwr(osm_id)
  df_lst.append(tmp_df)
  counter += 1
  if counter % 100 == 0:
    print(counter)
print(counter)
bldg_wwr_df = pd.concat(df_lst, ignore_index=True)
bldg_wwr_df

100
200
300
400
500
600
700
800
900
1000
1064


Unnamed: 0,index,OBJECTID,osm_id,osm_id_final,Start_Lon,Start_Lat,End_Lon,End_Lat,Center_Lon,Center_Lat,LINE_LENGTH,LINE_BEARING,Line_Bearing_Normal,osm_polygon_length,osm_polygon_area,CIT_AR,Era,Main_Class,Final_Class,GFA,in_prep_ds,lower_window_area,upper_window_area,lower_wall_area,upper_wall_area,garage_area,lower_wall_area_NET,total_window_area,total_wall_area,WWR_lower,WWR_upper,WWR_total,have_image,is_front,is_attached_side,is_connecting_mid_segment,is_angled_front,is_front_final,is_side,is_rear,status,WWR_final,have_min_one_detection,within_thresh,line_weight,weighted_WWR
0,20,21,83471240,83471240_1,7.708351,45.095895,7.708318,45.095332,7.708334,45.095614,62.646436,182.411618,92.411618,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,True,16930.345056,84877.205472,90693.376505,419706.168332,0.0,90693.376505,101807.550527,510399.544837,0.186677,0.202230,0.199466,True,True,False,False,False,True,False,False,front,0.199466,True,True,0.419029,0.083582
1,21,22,83471240,83471240_2,7.708318,45.095332,7.708164,45.095337,7.708241,45.095335,12.109366,272.427715,182.427715,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True,0.080997,0.004050
2,22,23,83471240,83471240_3,7.708164,45.095337,7.708198,45.095900,7.708181,45.095619,62.646619,2.418707,272.418707,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.202230,True,True,0.419030,0.084740
3,23,24,83471240,83471240_4,7.708198,45.095900,7.708351,45.095895,7.708274,45.095898,12.101470,92.428606,2.428606,149.503891,758.362410,184907,4,AB,R4AB,5308.536873,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True,0.080944,0.004047
4,24,25,83471241,83471241_1,7.709266,45.096186,7.708926,45.095812,7.709096,45.095999,49.416307,212.704064,122.704064,122.521160,585.214298,184893,4,AB,R4AB,4096.500085,True,2320.703730,20356.036889,31650.655384,131043.144541,0.0,31650.655384,22676.740619,162693.799925,0.073322,0.155338,0.139383,True,True,False,False,False,True,False,False,front,0.139383,True,True,0.403329,0.056217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5630,9758,9759,1214584770,1214584770_4,7.697510,45.106176,7.697719,45.106113,7.697615,45.106144,17.931609,113.090262,23.090262,65.066177,257.733721,123075,5,AB,R5AB,1288.668604,False,,,,,,,,,,,,False,False,False,False,False,False,False,True,rear,0.190556,True,True,0.275590,0.052515
5631,9767,9768,1214584772,1214584772_1,7.698279,45.106267,7.698200,45.106145,7.698239,45.106206,14.912131,204.734596,114.734596,61.426195,234.187350,122005,4,AB,R4AB,936.749400,True,0.000000,0.000000,89068.562937,0.000000,0.0,89068.562937,0.000000,89068.562937,0.000000,,0.000000,True,True,False,False,False,True,False,False,front,0.000000,True,True,0.242765,0.000000
5632,9768,9769,1214584772,1214584772_2,7.698200,45.106145,7.698022,45.106201,7.698111,45.106173,15.290303,294.134786,204.134786,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True,0.248922,0.012446
5633,9769,9770,1214584772,1214584772_3,7.698022,45.106201,7.698073,45.106321,7.698048,45.106261,13.924715,16.666161,286.666161,61.426195,234.187350,122005,4,AB,R4AB,936.749400,False,,,,,,,,,,,,False,False,False,False,False,False,True,False,side,0.050000,True,True,0.226690,0.011335


In [None]:
# Group WWR results by building

bldg_wwr_df_grouped = bldg_wwr_df.groupby('osm_id').agg({
                                                      'CIT_AR':'first',
                                                      'Era':'first',
                                                      'Main_Class':'first',
                                                      'Final_Class':'first',
                                                      'GFA':'first',
                                                      'weighted_WWR':'sum'
                                                      }).reset_index()
bldg_wwr_df_grouped.rename(columns={'weighted_WWR':'bldg_WWR'}, inplace=True)
bldg_wwr_df_grouped

Unnamed: 0,osm_id,CIT_AR,Era,Main_Class,Final_Class,GFA,bldg_WWR
0,2682663,138752,2,AB,R2AB,9613.067991,0.167962
1,83471240,184907,4,AB,R4AB,5308.536873,0.176420
2,83471241,184893,4,AB,R4AB,4096.500085,0.076385
3,83471251,186091,5,AB,R5AB,5527.615138,0.106271
4,83471253,257482,4,AB,R4AB,5717.830371,0.126678
...,...,...,...,...,...,...,...
1059,1214584763,121209,4,AB,R4AB,1061.306572,0.159713
1060,1214584767,267524,2,AB,R2AB,2278.244159,0.126871
1061,1214584769,123070,6,AB,R6AB,3962.465058,0.126446
1062,1214584770,123075,5,AB,R5AB,1288.668604,0.119902
