# Permits
Analysis of RGW permits

In [9]:
import json

import geopandas as gpd
import pandas as pd
from PIL import Image
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import requests
import sqlite3

In [4]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [5]:
px.set_mapbox_access_token(open("../.mapbox_token").read())

## Plot settings

In [71]:
HIGHLIGHT_COLOR = "#df2935"
COLORS = ["#007C77", "#1a1b41", "#f48668", "#f2bb05", "#E2A0FF", "#8acb88"]
CITY_OUTLINE = "light grey"

In [77]:
pio.templates["created_by"] = go.layout.Template(
    layout=go.Layout(
        #font=dict(family="Open Sans"), # Can update to our own stylings when we have it
        title_x=0.05,
        yaxis=dict(tickformat=",.0f"),
        colorway = COLORS,
        hovermode='x unified',
        width=600,
        height=600,
    ),
    layout_annotations=[
        dict(
            name="created by",
            text="Created by Electrify RVA",
            opacity=0.9,
            font=dict(color="black", size=12),
            xref="paper",
            yref="paper",
            x=-0.05,
            y=-0.15,
            showarrow=False,
        )
    ]
)

pio.templates.default = "ggplot2+created_by"

In [74]:
# Add city outline
richmond_city_outline = "https://github.com/generalpiston/geojson-us-city-boundaries/blob/master/cities/va/richmond.json"

result = requests.get(richmond_city_outline)
RICHMOND_GEOMETRY = json.loads(result.json()['payload']['blob']['rawLines'][0])['features'][0]['geometry']

def add_city_outline(fig):
    fig.update_layout(
        mapbox={
            "layers": [
                {
                    "source": RICHMOND_GEOMETRY,
                    "below": "traces",
                    "type": "line",
                    "color": CITY_OUTLINE,
                    "line": {"width": 1.5},
                }
            ],
        },
    )
    return(fig)
    

# Load data
The `permit.db` database is presumed to be in the `data` directory. This is currently ignored by git.

In [11]:
con = sqlite3.connect("data/permit.db")
cur = con.cursor()

Show all tables

In [12]:
cur.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()

[('permit',)]

In [14]:
# Read into pandas Dataframe
permits_all = pd.read_sql_query("SELECT * FROM permit", con, parse_dates=["issued_date", "application_date", "expiration_date", "completion_date", "final_date"])
permits_all.head(10)

Unnamed: 0,case_id,status,issued_date,application_date,expiration_date,completion_date,final_date,request_date,source_city,source_postal_code,source_full_address,geocoded_city,geocoded_postal_code,geocoded_latitude,geocoded_longitude,reason,destination
0,34fb46eb-46a0-4670-880a-94230f5484b8,Issued,2023-11-08 16:28:54+00:00,2023-11-08 16:28:18+00:00,2026-11-09 16:28:54+00:00,NaT,NaT,,Henrico,23231,2841 Williamsburg Road Henrico VA 23231,Henrico County,23231,37.517365,-77.346959,MainExtension,Commercial
1,4100b90e-a4c9-499a-a29e-8b4e5d4db1b2,Issued,2023-10-13 13:41:18+00:00,2023-10-13 13:40:32+00:00,2026-10-12 13:41:18+00:00,NaT,NaT,,Richmond,23231,4106 Williamsburg Road Richmond VA 23231,Henrico County,23231,37.518417,-77.357685,MainExtension,Commercial
2,518e7339-4884-4a8b-aefd-c1f760e41edd,Issued,2023-09-25 19:33:21+00:00,2023-09-25 19:31:57+00:00,2026-09-24 19:33:21+00:00,NaT,NaT,,Richmond,23229,8899 River Road Richmond VA 23229,Henrico County,23229,37.568148,-77.587671,MainExtension,Residential
3,1fca4d8c-9342-4476-b1ba-ddcd9b6036a4,Issued,2023-12-21 20:30:44+00:00,2023-12-21 20:30:01+00:00,2026-12-21 20:30:44+00:00,NaT,NaT,,Richmond,23233,12241 W Broad St C Richmond VA 23233,Henrico County,23233,37.659711,-77.6376,Addition,Commercial
4,114afe4f-0bc8-4d9d-b013-51515b85626a,Expired,2017-05-25 04:00:00+00:00,NaT,2019-07-08 04:00:00+00:00,NaT,2017-07-10 04:00:00+00:00,,RICHMOND,23235,4004 MEADOWDALE Blvd RICHMOND VA 23235,Chesterfield County,23234,37.438203,-77.472917,Upgrade,Commercial
5,f5c93b52-c8ed-4cba-ba35-63322ce19ba4,Issued,2023-12-19 20:57:03+00:00,2023-12-19 20:56:21+00:00,2026-12-18 20:57:03+00:00,NaT,NaT,,Henrico,23228,1632 E Parham Road Suite A Henrico VA 23228,Henrico County,23228,37.639644,-77.473023,New,Commercial
6,c036fe6d-154f-4ba2-88d9-ab4406d00d56,Issued,2023-08-07 15:45:19+00:00,2023-08-07 15:39:25+00:00,2026-08-06 15:45:19+00:00,NaT,NaT,,Henrico,23233,13150 Old Three Chopt Road Henrico VA 23233,Henrico County,23233,37.650524,-77.611294,MainExtension,Commercial
7,1c7997f9-3616-4d4a-90ba-bf37552a2937,Issued,2023-12-04 05:00:00+00:00,2023-12-04 16:58:10+00:00,2024-06-03 17:12:05+00:00,NaT,NaT,,Richmond,23221,304 St Davids Lane Richmond VA 23221,Richmond,23221,37.550525,-77.496726,HeavyRemodel,Residential
8,528d28e3-736c-458f-b0d3-56143885099b,Issued,2023-11-09 05:00:00+00:00,2023-11-09 15:57:49+00:00,2024-05-07 04:00:00+00:00,NaT,NaT,,Richmond,23224,6255 Old Warwick Road Richmond VA 23224,Richmond,23224,37.489992,-77.486822,Addition,Commercial
9,371d4869-c0a9-4817-a92b-7a6db4871505,Finaled,2023-10-03 04:00:00+00:00,2023-10-03 18:26:49+00:00,2024-05-14 15:15:08+00:00,NaT,2024-01-02 17:40:11+00:00,,Richmond,23220,1912 W Cary St Richmond VA 23220,Richmond,23220,37.548023,-77.465566,HeavyRemodel,Commercial


In [15]:
permits_all.value_counts("status")

status
Finaled            56856
Issued             24420
Expired            15524
Cancelled            166
Voided               150
Killed               104
Ready for Meter       33
On Hold               26
Withdrawn             19
Under Review           3
Name: count, dtype: int64

There are a lot of irrelevant permits. We'll keep only those that are Finaled, Issued, or Ready for Meter.

In [18]:
permits = permits_all[permits_all["status"].isin(["Finaled", "Issued", "Ready for Meter"])].copy()

Prepare data for analysis

In [20]:
permits["issued_date"] = permits["issued_date"].fillna(permits["application_date"])
permits = permits.sort_values("issued_date")
permits["issued_day"] = permits["issued_date"].round("D")
permits["issued_month_date"] = permits["issued_date"].dt.to_period('M').dt.to_timestamp()
permits["issued_year_date"] = permits["issued_date"].dt.to_period('Y').dt.to_timestamp()
permits["issued_year"] = permits["issued_date"].dt.year.astype(int)
permits["issued_year_str"] = permits["issued_year"].astype(str)
permits["Year Issued"] = permits["issued_year_str"]

# There was a huge surge in permits in the surrounding counties starting in 2008
permits["pre-2008"] = permits["issued_year"].apply(lambda x: "1993-2007" if x < 2008 else "2008-Present")

permits["Location"] = permits["geocoded_city"].apply(lambda r: "Richmond" if r == "Richmond" else "Outside the city")
permits['latitude'] = permits['geocoded_latitude']
permits['longitude'] = permits['geocoded_longitude']

permits.head()

  permits["issued_month_date"] = permits["issued_date"].dt.to_period('M').dt.to_timestamp()
  permits["issued_year_date"] = permits["issued_date"].dt.to_period('Y').dt.to_timestamp()


Unnamed: 0,case_id,status,issued_date,application_date,expiration_date,completion_date,final_date,request_date,source_city,source_postal_code,source_full_address,geocoded_city,geocoded_postal_code,geocoded_latitude,geocoded_longitude,reason,destination,issued_day,issued_month_date,issued_year_date,issued_year,issued_year_str,Year Issued,pre-2008,Location,latitude,longitude
23312,149AD162-6876-4CC9-B45D-DA45EACF339C,Finaled,1993-04-02 05:00:00+00:00,1993-04-02 05:00:00+00:00,NaT,NaT,1993-04-23 04:00:00+00:00,,,23222.0,2303 2ND Ave 23222,Richmond,23222.0,37.563289,-77.423072,Addition,Residential,1993-04-02 00:00:00+00:00,1993-04-01,1993-01-01,1993,1993,1993,1993-2007,Richmond,37.563289,-77.423072
31664,85265007-E229-4EC5-85BD-A6A1F570804F,Finaled,1993-04-02 05:00:00+00:00,1993-04-02 05:00:00+00:00,NaT,NaT,1994-09-06 04:00:00+00:00,,,23220.0,900 HERMITAGE Road 23220,Richmond,23220.0,37.55994,-77.460848,New,Commercial,1993-04-02 00:00:00+00:00,1993-04-01,1993-01-01,1993,1993,1993,1993-2007,Richmond,37.55994,-77.460848
97300,F71634BA-3341-4893-B13A-BE9C656E4778,Finaled,1993-04-05 04:00:00+00:00,1993-04-05 04:00:00+00:00,NaT,NaT,1993-06-04 04:00:00+00:00,,,,,,,,,New,Residential,1993-04-05 00:00:00+00:00,1993-04-01,1993-01-01,1993,1993,1993,1993-2007,Outside the city,,
97299,A0B8890D-B541-4D09-82D1-500DF9DD7514,Finaled,1993-04-05 04:00:00+00:00,1993-04-05 04:00:00+00:00,NaT,NaT,1993-09-07 04:00:00+00:00,,,,,,,,,New,Residential,1993-04-05 00:00:00+00:00,1993-04-01,1993-01-01,1993,1993,1993,1993-2007,Outside the city,,
61819,F06110CE-0F25-4B9E-9BA4-1255C078AC98,Issued,1993-04-05 04:00:00+00:00,1993-04-05 04:00:00+00:00,NaT,NaT,NaT,,,23227.0,3911 BROOK Road 23227,Richmond,23227.0,37.589523,-77.45157,Upgrade,Residential,1993-04-05 00:00:00+00:00,1993-04-01,1993-01-01,1993,1993,1993,1993-2007,Richmond,37.589523,-77.45157


Subset to Residential

In [23]:
residential = permits[permits['destination'] == "Residential"].copy()
residential['latitude3'] = residential['latitude'].round(3)
residential['longitude3'] = residential['longitude'].round(3)
residential['latitude2'] = residential['latitude'].round(2)
residential['longitude2'] = residential['longitude'].round(2)

In [24]:
# Summary of number of permits with lat/long rounded to 3 decimal places
residential_latlong3 = residential.groupby(['pre-2008', 'Location', 'latitude3', 'longitude3']).size().to_frame('Num Permits').reset_index()
residential_latlong3['dummy_size'] = 1

# Rounded to two decimal places
residential_latlong2 = residential.groupby(['pre-2008', 'Location', 'latitude2', 'longitude2']).size().to_frame('Num Permits').reset_index()
residential_latlong2['dummy_size'] = 1

In [25]:
residential_new = residential[residential['reason'] == 'New']

# Plots

In [84]:
fig = px.density_mapbox(
    residential_new,
    title="New Permits: Before & After 2008",
    lat="geocoded_latitude", lon="geocoded_longitude",
    radius=2,
    animation_frame="pre-2008",
    #color="destination",
    #opacity=0.1,
    #size="car_hours",
    color_continuous_scale=px.colors.sequential.Electric, 
    #size_max=15, 
    zoom=9,
    width=600,
    height=600,
)
add_city_outline(fig)
fig.show()
fig.write_html("../docs/_includes/charts/permits_residential_new_prepost2008.html", include_plotlyjs="cdn")

In [89]:
fig = px.scatter_mapbox(
    residential_new, 
    title="Permits: New",
    lat="geocoded_latitude", lon="geocoded_longitude",
    animation_frame="Year Issued",
    opacity=0.3,
    center={"lat":37.556518, "lon":-77.481927},
    color='Location',
    color_discrete_map={'Outside the city': HIGHLIGHT_COLOR},
    #size="car_hours",
    #color_continuous_scale=px.colors.sequential.Reds, 
    #size_max=15, 
    zoom=9,
    width=700,
    height=600,
)
add_city_outline(fig)
fig.show()
fig.write_html("../docs/_includes/charts/permits_residential_new_timeline.html", include_plotlyjs="cdn")

In [88]:
fig = px.density_mapbox(
    residential_new[(residential_new["Location"] == "Outside the city")],
    title="Permits Outside the City: Before and After 2008",
    lat="geocoded_latitude", lon="geocoded_longitude",
    radius=3,
    animation_frame="pre-2008",
    center={"lat":37.556518, "lon":-77.481927},
    color_continuous_scale=px.colors.sequential.Electric, 
    zoom=9,
    width=600,
    height=600,
)
add_city_outline(fig)

fig.show()
fig.write_html("../docs/_includes/charts/permits_residential_new_outside_prepost2008.html", include_plotlyjs="cdn")

## Center of gravity

In [93]:
center_of_gravity = residential_new.groupby(by=['Year Issued'])[['latitude', 'longitude']].mean().reset_index()

fig = px.scatter_mapbox(
    center_of_gravity,
    title="Center of gravity",
    lat="latitude", lon="longitude",
    animation_frame="Year Issued",
    opacity=1,
    center={"lat":37.556518, "lon":-77.481927},
    #color='Location',
    #color_discrete_map={'Outside the city': HIGHLIGHT_COLOR},
    #size="car_hours",
    #color_continuous_scale=px.colors.sequential.Reds, 
    #size_max=15, 
    zoom=9,
    width=700,
    height=600,
)
fig.show()

## Make animated gifs

In [None]:
def make_gif(image_names:list[str], dir:str="images", duration=1000) -> str:
    """Take list of image file names and create a gif, the name of which is returned"""
    image_name_gif = f"{dir}/{image_names[0][:-4]}.gif"
    images = [Image.open(f"{dir}/{image_name}") for image_name in image_names]
    
    
    images[0].save(image_name_gif, save_all=True, append_images=images[1:], duration=duration, loop=0)
    print(f"Saved gif to: {image_name_gif}")
    
    return image_name_gif


def run_plot_iteration_and_gif(
    df : pd.DataFrame,
    title_prefix : str,
    plot_fn : Callable, 
    plot_args : dict, 
    image_name : str, 
    image_dir : str = "images",
    iteration_column : str = "Year Issued",
    duration=1000,
    show_colorscale=True,
) -> str:
    
    iteration_values = df[iteration_column].unique()

    iteration_names = [f"{image_name}_{iteration_column}-{iteration_value}.png" for iteration_value in iteration_values]
    print(iteration_names)
    
    for iteration_value, iteration_name in zip(iteration_values, iteration_names):
        print(f"Iteration value: {iteration_value} | iteration name: {iteration_name}")
        df_iteration = df[df[iteration_column] == iteration_value]
        fig = plot_fn(df_iteration, title=f"{title_prefix}{iteration_value}", **plot_args)
        
        if not show_colorscale:
            fig.update_layout(coloraxis_showscale=False)
        
        fig.write_image(f"{image_dir}/{iteration_name}")
    
    gif_name = make_gif(iteration_names, image_dir, duration)
    
    return gif_name


def run_plot_iteration_sticky_and_gif(
    df : pd.DataFrame,
    title_prefix : str,
    plot_fn : Callable, 
    plot_args : dict, 
    image_name : str, 
    image_dir : str = "images",
    iteration_column : str = "Year Issued",
    duration=1000,
    show_legend=True,
    max_lag=2,
) -> str:
    
    iteration_values = df[iteration_column].unique()

    iteration_names = [f"{image_name}_{iteration_column}-{iteration_value}.png" for iteration_value in iteration_values]
    print(iteration_names)
    
    for iteration_value, iteration_name in zip(iteration_values, iteration_names):
        print(f"Iteration value: {iteration_value} | iteration name: {iteration_name}")
        
        df_iteration = df[df[iteration_column] <= iteration_value].copy()
        max_year = df_iteration["issued_year"].max()
        df_iteration["lag"] = np.minimum(max_year - df_iteration["issued_year"], max_lag).astype(str)
        fig = plot_fn(df_iteration, title=f"{title_prefix}{iteration_value}", **plot_args)
        
        fig.update_layout(showlegend=show_legend)
        
        fig.write_image(f"{image_dir}/{iteration_name}")
    
    gif_name = make_gif(iteration_names, image_dir, duration)
    
    return gif_name