# Seattle Accessory Dwelling Units
Source: [City of Seattle Open Data Portal](https://data.seattle.gov/)

In [52]:
# Load Packages
from pprint import pprint # For tidy printing
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path # For storing filepaths as a Path Object

# Load Packages (API Pulls)
import requests
from io import StringIO
import time # To record elapsed time

In [59]:
# Custom Functions
def missing_stats(series):

    """Calculate the Number and Percentage of Missing Values within a series. Use with .agg() to apply to multiple data frame columns."""

    total_missing = series.isnull().sum()
    percentage_missing = str((total_missing / len(series) * 100).round(2)) + '%'
    return f"Total Missing: {total_missing}, Percentage Missing: {percentage_missing}%"


## Load in ADU Data

In [54]:
data_folder = Path.cwd() / 'data' 

df_aadu = pd.read_csv(data_folder /'raw'/'Detached_Accessory_Dwelling_Units_(DADUs).csv')
df_dadu = pd.read_csv(data_folder /'raw'/'Attached_Accessory_Dwelling_Units_(AADUs).csv')

## Explore ADU Data

In [55]:
print(df_aadu.shape)
print(df_dadu.shape)

# Check if AADU and DADU Data have all the same varaibles
df_aadu.columns == df_dadu.columns

# Since all columns are similar -- will only examine one data set (for now)
df_aadu.head()
df_aadu.info()

(2580, 60)
(3522, 60)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2580 entries, 0 to 2579
Data columns (total 60 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   OBJECTID                         2580 non-null   int64  
 1   Project Address                  2580 non-null   object 
 2   Development Site Square Feet     2574 non-null   float64
 3   Permit Number                    2580 non-null   int64  
 4   Master User Permit Number        92 non-null     float64
 5   Permit Stage                     2580 non-null   object 
 6   Type of Permit                   2580 non-null   object 
 7   New Units Permitted              2580 non-null   int64  
 8   Demolished Units Permitted       2577 non-null   float64
 9   Net Units Permitted              2577 non-null   float64
 10  Sleeping Rooms Permitted         2580 non-null   int64  
 11  Permit Value                     2556 non-null   float64
 12

## Append and Clean Data

In [60]:
# Create Source Tag Variables
df_aadu['Source'] = 'AADU'
df_dadu['Source'] = 'DADU'

# Concatenate the 2 Pandas Data Frames Together
df_adu = pd.concat([df_aadu, df_dadu], 
                   ignore_index = True, join='inner')

# Subset to Variables of Interest
vars = ['OBJECTID', 
        'Type of Dwelling Unit', 'Development Site Square Feet', 'Permit Value', 'Description of Work',
        'New Units Permitted', 'Demolished Units Permitted','Net Units Permitted', 'Sleeping Rooms Permitted',  
        'Application Date', 'Issued Date', 'Final Date', 'Most Recent Inspection Date', 'Most Recent Inspection Type',
        'Project Address', 'Neighborhood', 'Council District', 'GEOID10', 'GEOID20', 'Census Block 2020', 'Census Block Group 2020',
        'Longitude', 'Latitude', 'Source']

dat_adu = df_adu[vars].drop_duplicates()

# Assessing Missingness
dat_adu.agg(missing_stats).apply(print)

Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 9, Percentage Missing: 0.15%%
Total Missing: 92, Percentage Missing: 1.51%%
Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 3, Percentage Missing: 0.05%%
Total Missing: 3, Percentage Missing: 0.05%%
Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 5, Percentage Missing: 0.08%%
Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 1356, Percentage Missing: 22.22%%
Total Missing: 795, Percentage Missing: 13.03%%
Total Missing: 793, Percentage Missing: 13.0%%
Total Missing: 0, Percentage Missing: 0.0%%
Total Missing: 10, Percentage Missing: 0.16%%
Total Missing: 10, Percentage Missing: 0.16%%
Total Missing: 10, Percentage Missing: 0.16%%
Total Missing: 10, Percentage Missing: 0.16%%
Total Missing: 10, Percentage Missing: 0.16%%
Total Missing: 10, Percentage Missing: 0.16%%
Total Missing: 10, Percentage Missing: 0.16%%


OBJECTID                        None
Type of Dwelling Unit           None
Development Site Square Feet    None
Permit Value                    None
Description of Work             None
New Units Permitted             None
Demolished Units Permitted      None
Net Units Permitted             None
Sleeping Rooms Permitted        None
Application Date                None
Issued Date                     None
Final Date                      None
Most Recent Inspection Date     None
Most Recent Inspection Type     None
Project Address                 None
Neighborhood                    None
Council District                None
GEOID10                         None
GEOID20                         None
Census Block 2020               None
Census Block Group 2020         None
Longitude                       None
Latitude                        None
Source                          None
dtype: object