## Preliminaries

## Testing function: test_sequence()

In [1]:
# function to streamline testing
def test_sequence():

    # Instantiate dwelling permits 
    dwelling_permits = PermitsProcessor("issued_building_permits_filter_dwelling_purposes.csv")
    print('Dwelling permits instantiated.\n')
    
    # Clean data 
    dwelling_permits, dwelling_permits_df = dwelling_permits.clean_data()
    print('Data has been cleaned.\n')
    display(dwelling_permits_df.info())

In [2]:
# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings

# Important functionality for this lesson
from scipy import stats
import statsmodels.api as sm
import os

In [3]:
# Set directory
os.chdir("C:/Users/emshe/Desktop/BRAINSTATION/CAPSTONE/capstonegitrepo/DATA/PERMITS")

## Define base data processor class

In [4]:
class BaseDataProcessor:
    # Initialize with just a filepath
    def __init__(self, data_path):
        self.data_path = data_path
        self.original_data = pd.read_csv(self.data_path)
        self.data = pd.read_csv(self.data_path)
        self.cleaned = False
    def reset_data(self):
        self.data = pd.read_csv(self.data_path)
        self.cleaned = False
        return self,self.data
    def is_clean(self):
        print(f'Data is clean: {self.cleaned}')
        return self
    def clean_data(self):
        pass

    def preprocess_data(self):
        pass

    def load_and_process_data(self):
        pass

    def save_processed_data(self):
        pass

In [5]:
class PermitsProcessor(BaseDataProcessor):
    def clean_data(self):
        if self.cleaned is False:
            self.data = self.update_data_types()
            self.data = self.drop_unnecessary_cols()
            self.data = self.remove_ProjectValue_zeros()
            self.cleaned = True
        return self, self.data
    def update_data_types(self):
        for date_col in ['IssueDate','PermitNumberCreatedDate']:
            self.data[date_col] = pd.to_datetime(self.data[date_col],format='%Y-%m-%d') # Change date columns to datetime format
        for date_col in ['YearMonth']:
            self.data[date_col] = pd.to_datetime(self.data[date_col],format = '%Y-%m')
        return self.data
    def drop_unnecessary_cols(self):
        if self.cleaned is False:
            # Drop IssueYear if there are no conflicts
            if (self.data['IssueDate'].dt.year != self.data['IssueYear']).sum() == 0: # Check if IssueDate and IssueYear are consistent
                        self.data = self.data.drop(columns = ['IssueYear']) # Drop redundant IssueYear column
                        # return self.data
            # Drop YearMonth from data set if there are no conflicts
            YYConflicts = (self.data['IssueDate'].dt.year != self.data['YearMonth'].dt.year)
            MMConflicts = (self.data['IssueDate'].dt.month != self.data['YearMonth'].dt.month)
            conflict_sum = YYConflicts.sum() + MMConflicts.sum()
            if conflict_sum == 0:
                self.data = self.data.drop(columns = ['YearMonth'])
            return self.data
    def remove_ProjectValue_zeros(self):
        self.data = self.data[self.data['ProjectValue'] != 0.0]
        return self.data

In [6]:
# Run test sequence: initation, cleaning, etc.
test_sequence()

Dwelling permits instantiated.

Data has been cleaned.

<class 'pandas.core.frame.DataFrame'>
Index: 25824 entries, 0 to 32743
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   PermitNumber               25824 non-null  object        
 1   PermitNumberCreatedDate    25824 non-null  datetime64[ns]
 2   IssueDate                  25824 non-null  datetime64[ns]
 3   PermitElapsedDays          25824 non-null  int64         
 4   ProjectValue               25824 non-null  float64       
 5   TypeOfWork                 25824 non-null  object        
 6   Address                    25686 non-null  object        
 7   ProjectDescription         25824 non-null  object        
 8   PermitCategory             16822 non-null  object        
 9   Applicant                  25824 non-null  object        
 10  ApplicantAddress           25745 non-null  object        
 11  PropertyUse     

None

In [7]:
# Initialize dwelling permits instance
dwelling_permits = PermitsProcessor("issued_building_permits_filter_dwelling_purposes.csv")

# Reset data using .reset_data() method
dwelling_permits.reset_data()

# Check if data is clean
print(f'Data is clean: {dwelling_permits.cleaned}')

Data is clean: False


In [8]:
# store dwelling_permits_og_df using .original_data method
dwelling_permits_og_df = dwelling_permits.original_data
dwelling_permits_og_df.columns

Index(['PermitNumber', 'PermitNumberCreatedDate', 'IssueDate',
       'PermitElapsedDays', 'ProjectValue', 'TypeOfWork', 'Address',
       'ProjectDescription', 'PermitCategory', 'Applicant', 'ApplicantAddress',
       'PropertyUse', 'SpecificUseCategory', 'BuildingContractor',
       'BuildingContractorAddress', 'IssueYear', 'GeoLocalArea', 'Geom',
       'YearMonth', 'geo_point_2d'],
      dtype='object')

In [9]:
# Uncomment to run .clean_data() method
# Set dwelling_permits_df as numpy array returned by .clean_data() method
    # dwelling_permits_df = dwelling_permits.clean_data()
    # dwelling_permits_df.info()

In [10]:
# Clean data
dwelling_permits, dwelling_permits_df = dwelling_permits.clean_data()

In [11]:
# Check if data is clean
dwelling_permits.is_clean()

Data is clean: True


<__main__.PermitsProcessor at 0x2af0a92e150>

## Step 1: update data types

In [12]:
# Update date columns of dwelling_permits_df to datetime format with .update_data_types method
# dwelling_permits_df = dwelling_permits.update_data_types()
# dwelling_permits_df.info()

In [13]:
# Update data types 
# dwelling_permits_df = dwelling_permits.update_data_types()
# dwelling_permits_df.info()

## Step 2: Drop unnecessary columns (IssueYear, YearMonth)

In [14]:
# Check if IssueYear is present as column
issue_year_present = ('IssueYear' in dwelling_permits_df.columns)
print(f'Issue Year in Columns:  {issue_year_present}' )

Issue Year in Columns:  False


In [15]:
# Reset data and check if cleaned (should be False)
dwelling_permits.reset_data()
dwelling_permits_df = dwelling_permits.data
print(f'Data is clean: {dwelling_permits.cleaned}')

Data is clean: False


In [16]:
# Check column info
display(dwelling_permits_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32744 entries, 0 to 32743
Data columns (total 20 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   PermitNumber               32744 non-null  object 
 1   PermitNumberCreatedDate    32744 non-null  object 
 2   IssueDate                  32744 non-null  object 
 3   PermitElapsedDays          32744 non-null  int64  
 4   ProjectValue               32744 non-null  float64
 5   TypeOfWork                 32744 non-null  object 
 6   Address                    32581 non-null  object 
 7   ProjectDescription         32744 non-null  object 
 8   PermitCategory             16931 non-null  object 
 9   Applicant                  32744 non-null  object 
 10  ApplicantAddress           32653 non-null  object 
 11  PropertyUse                32744 non-null  object 
 12  SpecificUseCategory        32743 non-null  object 
 13  BuildingContractor         22636 non-null  obj

None

In [17]:
# Drop Issue Year from data set if there are no conflicts
# YearConflicts = (dwelling_permits_df['IssueDate'].dt.year != dwelling_permits_df['IssueYear'])
# conflict_sum = YearConflicts.sum()
# if conflict_sum == 0:
#     print('There are no conflicts between issue date and issue year. Issue year will be dropped.')
#     dwelling_permits_df.drop(['IssueYear'], axis = 1,inplace = True)
#     print(f'''Column IssueYear Present: {'IssueYear' in dwelling_permits_df.columns}''')
#     # display(dwelling_permits_df.head())
# else: Print(f'''There is a conflict between issue data and issue year. Issue year will not be dropped and should be investigated.''')
                

In [18]:
# Drop YearMonth from data set if there are no conflicts
# YYConflicts = (dwelling_permits_df['IssueDate'].dt.year != dwelling_permits_df['YearMonth'].dt.year)
# MMConflicts = (dwelling_permits_df['IssueDate'].dt.month != dwelling_permits_df['YearMonth'].dt.month)
# conflict_sum = YYConflicts.sum() + MMConflicts.sum()
# if conflict_sum == 0:
#     print('There are no conflicts between issue date and issue month/year. Issue monthyear will be dropped.')
#     dwelling_permits_df.drop(['YearMonth'], axis = 1,inplace = True)
#     print(f'''Column IssueYear Present: {'YearMonth' in dwelling_permits_df.columns}''')
#     # display(dwelling_permits_df.head())
# else: Print(f'''There is a conflict between issue data and issue month/year. Issue year will not be dropped and should be investigated.''')
                

## Step 3: Deal with projects with zero project value

In [19]:
# restrict to permits with 0.0 dollar value
zero_value_df = dwelling_permits_df[dwelling_permits_df['ProjectValue'] == 0.0]
print(f'The zero_value_df is length {len(zero_value_df)}.')
display(zero_value_df.sample(10))

The zero_value_df is length 6920.


Unnamed: 0,PermitNumber,PermitNumberCreatedDate,IssueDate,PermitElapsedDays,ProjectValue,TypeOfWork,Address,ProjectDescription,PermitCategory,Applicant,ApplicantAddress,PropertyUse,SpecificUseCategory,BuildingContractor,BuildingContractorAddress,IssueYear,GeoLocalArea,Geom,YearMonth,geo_point_2d
11405,BP-2017-02182,2017-04-26,2017-06-16,51,0.0,Salvage and Abatement,"250 E 53RD AVENUE, Vancouver, BC V5X 1H9",Low Density Housing - Salvage and Abatement - ...,,SHARON TATEISHI,"7600 Decourcy Crescent \nRICHMOND, BC V7C4E9",Dwelling Uses,Single Detached House,East West Excavating Ltd,"4918 VICTORIA DRIVE \nVancouver, BC V5P 3T6",2017,Sunset,"{""coordinates"":[-123.1006047,49.2216766],""type...",2017-06,"49.2216766, -123.1006047"
29042,BP-2024-01879,2024-05-24,2024-05-31,7,0.0,Salvage and Abatement,"1410 W 38TH AVENUE, Vancouver, BC V6M 1R5",Low Density Housing - Salvage and Abatement - ...,,VictorEric Design Group Ltd.,"#-220-3410 LOUGHEED HIGHWAY\nVANCOUVER, BC V5...",Dwelling Uses,Single Detached House,WEST DEMOLITION SERVICE LTD,,2024,Shaughnessy,"{""coordinates"":[-123.1373622,49.2368011],""type...",2024-05,"49.2368011, -123.1373622"
27339,BP-2025-01874,2025-04-22,2025-04-24,2,0.0,Salvage and Abatement,"1242 KELOWNA STREET, Vancouver, BC V5K 4E2",Low Density Housing - Salvage and Abatement - ...,,Satnam Lalli DBA: Lalli Homes,"6868 Killarney St\nVancouver, BC V5S 2Y3",Dwelling Uses,Single Detached House,Lalli Homes Ltd,"6868 KILLARNEY ST \nVancouver, BC V5S 2Y3",2025,Hastings-Sunrise,"{""coordinates"":[-123.0423562,49.2739802],""type...",2025-04,"49.2739802, -123.0423562"
11986,BP-2022-03832,2022-08-03,2022-12-22,141,0.0,Salvage and Abatement,"2959 ROSEMONT DRIVE, Vancouver, BC V5S 2C7",Low Density Housing - Salvage and Abatement - ...,,NAMTEZ SOHAL DBA: Athoula Management Ltd.,"1358 E 60TH AV\nVANCOUVER, BC V5X 2A9",Dwelling Uses,Single Detached House,AKAL DEMOLITION & EXCAVATING LTD,,2022,Killarney,"{""coordinates"":[-123.0457071,49.2170602],""type...",2022-12,"49.2170602, -123.0457071"
11629,BP-2016-02907,2016-10-07,2017-02-14,130,0.0,Salvage and Abatement,"4453 W 14TH AVENUE, Vancouver, BC V6R 2Y2",Low Density Housing - Salvage and Abatement - ...,,Mike Chu DBA: Westpoint Design & Development Ltd.,"2268 West 34th Avenue \nVancouver, BC V6M1G6",Dwelling Uses,Single Detached House,All Star Excavating & Demolition Ltd,"11764 Kerr Bay\nDelta, BC V4C 1K1",2017,West Point Grey,"{""coordinates"":[-123.2079107,49.2604298],""type...",2017-02,"49.2604298, -123.2079107"
8103,BP-2017-03475,2017-06-30,2017-07-21,21,0.0,Salvage and Abatement,"4985 MOSS STREET, Vancouver, BC V5R 3T5",Enquiry Centre - Salvage and Abatement - Inter...,,Joginder Khaira,"7591 117th St\nDelta, BC V4C 6A6",Dwelling Uses,Single Detached House,,,2017,Renfrew-Collingwood,"{""coordinates"":[-123.0458062,49.2396646],""type...",2017-07,"49.2396646, -123.0458062"
31768,BP-2020-02916,2020-09-21,2021-02-10,142,0.0,New Building,"2472 DUNDAS STREET, Vancouver, BC V5K 1P6",High Density Housing / Commercial - New Buildi...,,Natalia Rybakov,"27620 Railcar Cr\nAbbotsford, BC V4X0B8",Dwelling Uses,Infill Single Detached House,,,2021,Hastings-Sunrise,"{""coordinates"":[-123.0550296,49.2847424],""type...",2021-02,"49.2847424, -123.0550296"
23112,BP-2022-03174,2022-06-17,2022-07-27,40,0.0,Salvage and Abatement,"737 W 68TH AVENUE, Vancouver, BC V6P 2T8",Low Density Housing - Salvage and Abatement - ...,,QI LI DBA: LQ Design GROUP Ltd,"2171 WEST 15TH AVENUE\nVANCOUVER, BC V6K 2Y4",Dwelling Uses,Single Detached House,Van-City Excavating Ltd,,2022,Marpole,"{""coordinates"":[-123.1222418,49.2105099],""type...",2022-07,"49.2105099, -123.1222418"
27789,BP-2018-01905,2018-04-10,2018-06-12,63,0.0,Salvage and Abatement,"2838 NANAIMO STREET, Vancouver, BC V5N 5G1",Enquiry Centre - Salvage and Abatement - Inter...,,Ngoc-Hoang-Tuan Nguyen DBA: Nanaimo 2838 Prope...,"592 W 28th Avenue\nVancouver, BC V5Z 2H1",Dwelling Uses,Single Detached House,All Star Excavating & Demolition Ltd,"11764 Kerr Bay\nDelta, BC V4C 1K1",2018,Renfrew-Collingwood,"{""coordinates"":[-123.0561696,49.2587745],""type...",2018-06,"49.2587745, -123.0561696"
2281,BP-2017-00170,2017-01-11,2017-04-07,86,0.0,Salvage and Abatement,"296 E 63RD AVENUE, Vancouver, BC V5X 2J7",Low Density Housing - Salvage and Abatement - ...,,RAJEEV KAPUR DBA: RAJEEV KAPUR,"662 W71ST AVENUE\nVANCOUVER, BC V6P3A1",Dwelling Uses,Single Detached House,Canadian Excavating Ltd,"6898 130 St\nSurrey, BC V3W 4J5",2017,Sunset,"{""coordinates"":[-123.100387,49.2132932],""type""...",2017-04,"49.2132932, -123.100387"


In [20]:
# drop all rows with value = zero
dwelling_permits_df = dwelling_permits_df[dwelling_permits_df['ProjectValue'] != 0.0]

## Step 4: Deal with columns with null values one by one

In [21]:
# Find columns with nulls

# Find total rows for percentage calculation
total_rows = len(dwelling_permits_df)

# List of dictionaries to hold null info
null_info = []

for col in dwelling_permits_df.columns:
    null_count = dwelling_permits_df[col].isna().sum()
    if null_count > 0:
        null_info.append({
            'Column': col,
            'Null Count': null_count,
            'Percent Null': round((null_count / total_rows) * 100, 2)
        })

# Create and display DataFrame sorted by % null
null_summary_df = pd.DataFrame(null_info).sort_values(by='Percent Null', ascending=False)
display(null_summary_df)

Unnamed: 0,Column,Null Count,Percent Null
5,BuildingContractorAddress,13083,50.66
1,PermitCategory,9002,34.86
4,BuildingContractor,7489,29.0
6,GeoLocalArea,315,1.22
7,Geom,315,1.22
8,geo_point_2d,315,1.22
0,Address,138,0.53
2,ApplicantAddress,79,0.31
3,SpecificUseCategory,1,0.0


In [22]:
# List all columns
display(dwelling_permits_df.columns)

Index(['PermitNumber', 'PermitNumberCreatedDate', 'IssueDate',
       'PermitElapsedDays', 'ProjectValue', 'TypeOfWork', 'Address',
       'ProjectDescription', 'PermitCategory', 'Applicant', 'ApplicantAddress',
       'PropertyUse', 'SpecificUseCategory', 'BuildingContractor',
       'BuildingContractorAddress', 'IssueYear', 'GeoLocalArea', 'Geom',
       'YearMonth', 'geo_point_2d'],
      dtype='object')

In [23]:
# Define function to analyze null values for particular columns
def examine_col_nulls(df,col,output = True):
    null_df = dwelling_permits_df.isna()
    null_count = null_df[col].sum()
    null_perc = 100 * null_count / len(null_df)
    col_null_df = df[df[col].isnull()]
    if output == True:
        print(f'\nPermitCategory has the following value counts.\n')
        display(dwelling_permits_df['PermitCategory'].value_counts())
        print(f'\nPermitCategory has {null_count} null values.\n')
        print(f'\nPermitCategory nulls make up has {null_perc} % of the dataset.\n') 
        print(f'\nWe present an example of the subdataset where {col} is null\n')
        display(col_null_df.sample(5))
        col_null_df_restricted = col_null_df[['PermitNumberCreatedDate',
        'ProjectValue', 'TypeOfWork', 'Address', 'Applicant','PropertyUse', 
        'SpecificUseCategory', 'BuildingContractor','BuildingContractorAddress',
                                            'GeoLocalArea']]
        for feature in col_null_df_restricted:
            print(f'\nIn the col_null_df, the feature {feature} has the following value counts.\n') 
            display(col_null_df[feature].value_counts())
    return null_count, null_perc, col_null_df

### Step 2.1: imputing 'unknown' into categorical columns

In [24]:
# Examine PermitCategory nulls
examine_col_nulls(dwelling_permits_df,"PermitCategory")


PermitCategory has the following value counts.



PermitCategory
Renovation - Residential - Lower Complexity              7995
New Build - Low Density Housing                          6680
New Build - Standalone Laneway                           2037
Renovation - Commercial/ Mixed Use - Lower Complexity     110
Name: count, dtype: int64


PermitCategory has 9002 null values.


PermitCategory nulls make up has 34.8590458488228 % of the dataset.


We present an example of the subdataset where PermitCategory is null



Unnamed: 0,PermitNumber,PermitNumberCreatedDate,IssueDate,PermitElapsedDays,ProjectValue,TypeOfWork,Address,ProjectDescription,PermitCategory,Applicant,ApplicantAddress,PropertyUse,SpecificUseCategory,BuildingContractor,BuildingContractorAddress,IssueYear,GeoLocalArea,Geom,YearMonth,geo_point_2d
19960,BP-2019-04279,2019-09-24,2020-09-19,361,300000.0,New Building,"1346 E 11TH AVENUE, Vancouver, BC V5N 1Y5",High Density Housing / Commercial - New Buildi...,,Carman Kwan DBA: Architectural Collective Inc.,"677 East 27th Avenue\nVancouver, BC V5V 2K7",Dwelling Uses,Infill Single Detached House,,,2020,Kensington-Cedar Cottage,"{""coordinates"":[-123.0765023,49.2602642],""type...",2020-09,"49.2602642, -123.0765023"
30636,BP-2018-06476,2018-12-13,2019-08-21,251,15000.0,Demolition / Deconstruction,"1519 W 37TH AVENUE, Vancouver, BC V6M 1M5",Low Density Housing - Demolition / Deconstruct...,,QI LI DBA: LQ Design GROUP Ltd,"2171 WEST 15TH AVENUE\nVANCOUVER, BC V6K 2Y4",Dwelling Uses,Single Detached House,East West Excavating Ltd,"968 E 53RD AV \nVancouver, BC V5X 1J6",2019,Shaughnessy,"{""coordinates"":[-123.1402184,49.2383541],""type...",2019-08,"49.2383541, -123.1402184"
7229,DB-2016-01293,2016-07-21,2017-02-22,216,15000.0,Demolition / Deconstruction,"1628 E 61ST AVENUE, Vancouver, BC V5P 2J6",Low Density Housing - Demolition / Deconstruct...,,Tamanna Design Group Ltd. DBA: Tamanna Design ...,"809 - 6622 Pearson Way\nRichmond, BC V7C 0E4",Dwelling Uses,Single Detached House,Hans Demolition and Excavating Ltd.,"12498 55 Ave\nSurrey, BC V3X 3V5",2017,Victoria-Fraserview,"{""coordinates"":[-123.0735738,49.2143522],""type...",2017-02,"49.2143522, -123.0735738"
17320,BP-2018-01181,2018-03-05,2018-07-24,141,15000.0,Demolition / Deconstruction,"2093 E 5TH AVENUE, Vancouver, BC V5N 1M3",Low Density Housing - Demolition / Deconstruct...,,Khang Nguyen DBA: Architrix Design Studio,"289 Alexander Street\n# 216\nVancouver, BC V6...",Dwelling Uses,Duplex,Canadian Excavating Ltd,"6898 130 St\nSurrey, BC V3W 4J5",2018,Grandview-Woodland,"{""coordinates"":[-123.062157,49.2661799],""type""...",2018-07,"49.2661799, -123.062157"
5487,BP-2019-00240,2019-01-18,2019-06-26,159,15000.0,Demolition / Deconstruction,"1319 W 46TH AVENUE, Vancouver, BC V6M 2K4",Low Density Housing - Demolition / Deconstruct...,,Ken Tsang,"N210 - 5811 Cooney Rd\nRichmond, BC V6X 3M1",Dwelling Uses,Single Detached House,GRG Demolition & Excavating Ltd,,2019,Oakridge,"{""coordinates"":[-123.1355141,49.2298161],""type...",2019-06,"49.2298161, -123.1355141"



In the col_null_df, the feature PermitNumberCreatedDate has the following value counts.



PermitNumberCreatedDate
2021-05-27    22
2022-05-05    22
2016-12-22    21
2021-04-13    19
2024-05-31    16
              ..
2024-07-04     1
2023-04-04     1
2024-03-14     1
2020-09-22     1
2019-01-03     1
Name: count, Length: 2182, dtype: int64


In the col_null_df, the feature ProjectValue has the following value counts.



ProjectValue
15000.0      4719
40000.0       253
20000.0       186
30000.0       123
50000.0       116
             ... 
420050.0        1
122400.0        1
1515000.0       1
362000.0        1
121050.0        1
Name: count, Length: 1342, dtype: int64


In the col_null_df, the feature TypeOfWork has the following value counts.



TypeOfWork
Demolition / Deconstruction             5727
Addition / Alteration                   2278
New Building                             934
Salvage and Abatement                     30
Outdoor Uses (No Buildings Proposed)      22
Temporary Building / Structure            11
Name: count, dtype: int64


In the col_null_df, the feature Address has the following value counts.



Address
8460 ASH STREET, Vancouver, BC V6P 3M2           8
1501 HARO STREET, Vancouver, BC V6G 1G4          4
2901 E HASTINGS STREET, Vancouver, BC V5K 5J1    4
3620 W 20TH AVENUE, Vancouver, BC V6S 1E8        3
124 DUNLEVY AVENUE, Vancouver, BC V6A 3T6        3
                                                ..
2083 W 48TH AVENUE, Vancouver, BC V6M 2P4        1
2209 E 2ND AVENUE, Vancouver, BC V5N 1G1         1
1604 SALSBURY DRIVE, Vancouver, BC V5L 4B8       1
3934 LILLOOET STREET, Vancouver, BC V5R 2E8      1
6272 BUTLER STREET, Vancouver, BC V5S 3K4        1
Name: count, Length: 8597, dtype: int64


In the col_null_df, the feature Applicant has the following value counts.



Applicant
QI LI DBA: LQ Design GROUP Ltd                    212
Vincent Wan DBA: D.V. Design Ltd.                 198
Carman Kwan DBA: Architectural Collective Inc.    174
Michael Lu DBA: DWG Design Work Group Ltd.        101
Khang Nguyen DBA: Architrix Design Studio          98
                                                 ... 
Winga LAM                                           1
HUEN KEE LIEW                                       1
RAJWINDER MANN                                      1
Matt Stogryn DBA: iFortune Homes Inc.               1
Garinder Deo                                        1
Name: count, Length: 2875, dtype: int64


In the col_null_df, the feature PropertyUse has the following value counts.



PropertyUse
Dwelling Uses                                                                        8647
Dwelling Uses, Retail Uses                                                             87
Dwelling Uses, Parking Uses                                                            53
Dwelling Uses, Parking Uses, Retail Uses                                               38
Dwelling Uses, Office Uses                                                             21
Dwelling Uses, Office Uses, Retail Uses                                                19
Dwelling Uses, Retail Uses, Service Uses                                               18
Dwelling Uses, Parking Uses, Retail Uses, Service Uses                                 17
Dwelling Uses, Service Uses                                                            17
Dwelling Uses, Institutional Uses                                                      11
Dwelling Uses, Institutional Uses, Parking Uses                                         


In the col_null_df, the feature SpecificUseCategory has the following value counts.



SpecificUseCategory
Single Detached House                                                                                        4922
Single Detached House w/Sec Suite                                                                            1459
Multiple Dwelling                                                                                            1081
Duplex                                                                                                        413
Multiple Conversion Dwelling                                                                                  191
                                                                                                             ... 
Printing or Publishing, Dwelling Unit                                                                           1
Dwelling Unit w/ Other Use, Restaurant - Class 1                                                                1
Miscellaneous Products Mfg-Class A, General Office, Restaurant - Cla


In the col_null_df, the feature BuildingContractor has the following value counts.



BuildingContractor
Canadian Excavating Ltd              412
Bhullar Excavating and Demolition    260
East West Excavating Ltd             236
JVT EXCAVATING & DEMOLITION LTD      231
Kingsman Excavating Ltd.             177
                                    ... 
A-1 Window Mfg Ltd                     1
Thi M T Tran                           1
PROFORM CONSTRUCTION GROUP LTD         1
Cornat Construction Ltd                1
JPC Services Inc                       1
Name: count, Length: 1022, dtype: int64


In the col_null_df, the feature BuildingContractorAddress has the following value counts.



BuildingContractorAddress
6898 130 St\nSurrey, BC  V3W 4J5                            412
968 E 53RD AV  \nVancouver, BC  V5X 1J6                     204
Unit 1104, 7360 137 ST\n15559 59 Ave\nSurrey, BC  V3S4N8    177
12498 55 Ave\nSurrey, BC  V3X 3V5                           172
5649 ASH ST  \nVancouver, BC  V5Z 3G8                       144
                                                           ... 
2115 W 34TH AV  \nVancouver, BC  V6M 1G3                      1
4830 INVERNESS ST  \nVancouver, BC  V5V 4X6                   1
6498 DUMFRIES ST  \nVancouver, BC  V5P 3B4                    1
4418 JAMES ST  \nVancouver, BC  V5V 3J1                       1
12306 McTavish Rd  \nPitt Meadows, BC  V3Y 1Z1                1
Name: count, Length: 686, dtype: int64


In the col_null_df, the feature GeoLocalArea has the following value counts.



GeoLocalArea
Kensington-Cedar Cottage    966
Renfrew-Collingwood         749
Hastings-Sunrise            730
Riley Park                  634
Dunbar-Southlands           614
Sunset                      537
Kitsilano                   533
Grandview-Woodland          448
Victoria-Fraserview         418
Marpole                     405
West Point Grey             331
Killarney                   307
Kerrisdale                  301
Mount Pleasant              297
Oakridge                    275
Arbutus Ridge               270
Shaughnessy                 255
South Cambie                240
West End                    220
Downtown                    165
Fairview                    135
Strathcona                  106
Name: count, dtype: int64

(9002,
 34.8590458488228,
         PermitNumber PermitNumberCreatedDate   IssueDate  PermitElapsedDays  \
 2      BP-2022-02723              2022-05-25  2022-11-14                173   
 4      DB-2017-02311              2017-05-02  2017-08-23                113   
 10     BP-2017-05079              2017-09-27  2018-08-03                310   
 12     DB-2019-02085              2019-05-13  2019-06-24                 42   
 15     BP-2017-00509              2017-01-30  2017-06-12                133   
 ...              ...                     ...         ...                ...   
 32713  BP-2022-03253              2022-06-22  2022-06-28                  6   
 32730  BP-2022-04148              2022-08-25  2023-06-22                301   
 32736  BP-2022-05542              2022-12-16  2023-08-22                249   
 32740  DB-2019-02157              2019-05-16  2020-06-22                403   
 32742  BP-2022-01383              2022-03-16  2022-10-14                212   
 
        Proj

In [25]:
# Value counts for the TypeOfWork column
dwelling_permits_df['TypeOfWork'].value_counts()

TypeOfWork
Addition / Alteration                   10383
New Building                             9651
Demolition / Deconstruction              5727
Salvage and Abatement                      30
Outdoor Uses (No Buildings Proposed)       22
Temporary Building / Structure             11
Name: count, dtype: int64

### MISCELLANEOUS EDA

In [26]:
# Display dwelling permits info
display(dwelling_permits_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 25824 entries, 0 to 32743
Data columns (total 20 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   PermitNumber               25824 non-null  object 
 1   PermitNumberCreatedDate    25824 non-null  object 
 2   IssueDate                  25824 non-null  object 
 3   PermitElapsedDays          25824 non-null  int64  
 4   ProjectValue               25824 non-null  float64
 5   TypeOfWork                 25824 non-null  object 
 6   Address                    25686 non-null  object 
 7   ProjectDescription         25824 non-null  object 
 8   PermitCategory             16822 non-null  object 
 9   Applicant                  25824 non-null  object 
 10  ApplicantAddress           25745 non-null  object 
 11  PropertyUse                25824 non-null  object 
 12  SpecificUseCategory        25823 non-null  object 
 13  BuildingContractor         18335 non-null  object 


None