## Jacksonville, Florida - Property Database

The data used in this notebook comes from the January 2021 uncertified tax roll for Duval County. The file is accessible on the City of Jacksonville website at: https://www.coj.net/departments/property-appraiser/information-offerings

In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
# Convert text file to Pandas dataframe.

df = pd.read_csv('D:\\raw_data\\UncertifiedRE_01_04_21.txt', 
                 sep="|", 
                 names=list(range(31)), 
                 encoding="ANSI")
df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,1,0000010005R,1,2S,23E,3401,N,C/O LARSON & MCGOWIN LLC,PO BOX 161139,MOBILE,...,32420.0,32420.0,0.0,521850.0,32420.0,32420.0,32420.0,GS,3198369.0,73.42
1,2,0000010005R,1,01-2S-23E 73.42,,,,,,,...,,,,,,,,,,
2,2,0000010005R,2,N 3/4 LYING W OF RR,,,,,,,...,,,,,,,,,,
3,3,0000010005R,1,RAYONIER FORREST RESOURCES L P,,,,,,,...,,,,,,,,,,
4,4,0000010005R,0,N,US 301,HWY,,JACKSONVILLE,32234,0,...,,,,,,,,,,


In [3]:
print(df.shape)

(10596669, 31)


This dataframe contains data from 17 tables and needs to be broken down into its component tables with the schemas provided by the city.

In [4]:
# Column names for each table.

parcel_vals = ['RE', 'section', 'township', 'range', 'tile', 'confidential', 'mailing_address_1',
              'mailing_address_2', 'city', 'state', 'zipcode', 'property_use', 'subdivision_num', 
              'subdivision_name', 'plat_book', 'plat_page', 'neighborhood', 'perc_capped', 'value_method',
              'cap_base_yr', 'market_value', 'assessed_value', 'building_value', 'just_value',
              'school_taxable', 'county_taxable', 'sjrwmd_taxable', 'tax_district', 'lot_sf', 'acres']
legal_vals = ['RE', 'line', 'legal_descr']
owner_vals = ['RE', 'line', 'owner']
site_vals = ['RE', 'street_num', 'direction', 'street_name', 'street_type', 'unit', 'city', 'zipcode',
            'building_num']
building_vals = ['RE', 'building', 'type_code', 'type_descr', 'style', 'class', 'quality', 
                 'actual_yr_built', 'effec_yr_built', 'perc_complete', 'value', 'heated_sf']
structure_vals = ['RE', 'building', 'element_category', 'element_type', 'element_type_descr',
                 'perc_element']
utility_vals = ['RE', 'building', 'structure_code', 'structure_descr', 'units']
subarea_vals = ['RE',  'building', 'substructure_code', 'substructure_descr', 'actual_area', 'effec_area',
                'heated_sf']
condo_vals = ['RE', 'building', 'condo_num', 'condo_type', 'value', 'view_code', 'perc_adjustment', 
             'bedrooms', 'bathrooms', 'area', 'floor_adjustment']
amenity_vals = ['RE', 'building', 'amenity_code', 'unit', 'amenity_type', 'amenity_value']
feature_vals = ['RE', 'building', 'ft_category', 'ft_descr', 'grade', 'length', 'width', 'units', 'ppu',
               'actual_yr_built', 'effec_yr_built', 'deprec_value']
common_vals = ['RE', 'use_code', 'use_descr', 'zoning', 'unit_type', 'front_feet', 'depth', 'units',
               'ppu', 'land_val']
agland_vals = ['RE', 'use_code', 'use_descr', 'zoning', 'unit_type', 'units', 'ppu']
marketag_vals = ['RE', 'use_code', 'use_descr', 'zoning', 'unit_type', 'units', 'ppu']
exemption_vals = ['RE', 'line', 'exempt_holder', 'exempt_code', 'exempt_descr', 'perc_exempt', 
                  'exempt_amount', 'override']
character_vals = ['RE', 'category', 'char_type', 'char_descr']
sale_vals = ['RE', 'sale_id', 'seller', 'or_book', 'or_page', 'qualification', 'improved',
            'instrument_type', 'instrument_descr', 'deed_type', 'deed_description', 'sale_date', 
            'record_date', 'price']


In [5]:
def table_split(i, lst):
    """Breaks single dataframe into dataframes of each table with named columns.

    Parameters
    ----------
    i : int
        The number of the table id, provided by City of Jacksonville's schema
    lst : list
        List of column names

    Returns
    -------
    DataFrame
        Pandas dataframe of one of 17 tables included in original text file
    """
    # Slice df by table number
    df2 = df[df[0] == i].dropna(how='all',axis='columns')
    # Drop table number column
    df2 = df2.drop(columns=[0])
    # Rename columns using list of column names
    columns = dict(zip(list(range(1,len(lst)+1)),lst))
    df2.rename(columns=columns, inplace=True)
    # Return dataframe
    return df2

For the purpose of analyzing Jacksonville's housing market, I do not need all 17 tables. To save memory and computing power, I will omit the tables I do not need. To make each dataframe, I will run the table_split function I wrote above.

In [6]:
parcel = table_split(1, parcel_vals)
legal = table_split(2, legal_vals)
owner = table_split(3, owner_vals)
site = table_split(4, site_vals)
building = table_split(5, building_vals)
structure = table_split(6, structure_vals)
utility = table_split(7, utility_vals)
subarea = table_split(8, subarea_vals)
condo = table_split(9, condo_vals)
amenity = table_split(10, amenity_vals)
feature = table_split(11, feature_vals)
common = table_split(12, common_vals)
agland = table_split(13, agland_vals)
marketag = table_split(14, marketag_vals)
exemption = table_split(15, exemption_vals)
character = table_split(16, character_vals)
#sale = table_split(17, sale_vals)

I also need to import a table of the qualification codes used by appraisers in describing transactions (qualified arms-length, sales of multiple parcels, etc.).

In [7]:
qual_codes = pd.read_csv('D:\\raw_data\\Qualification.csv')

Property sales are stored in a separate city txt file with fixed column widths.

In [8]:
widths = [5, 12, 5, 50, 8, 8, 2, 1, 2, 1, 25, 25, 11]
df = pd.read_fwf("D:\\raw_data\\Sales_01_04_2021.txt", widths=widths)
df = df[df['00001']=='00003']
df.drop(columns=['00001'],inplace=True)
df.rename(columns=dict(zip(df.columns,['RE', 'sale_id', 'seller', 'or_bk', 'or_pg','q_id',
                  'improved', 'instrument', 'sales_source', 'sale_date', 'record_date',
                  'price'])),inplace=True)

In [9]:
df['date_lngth'] = [len(str(x)) for x in df['sale_date']]
for i in df.loc[df['date_lngth']!=10,'sale_date'].index:
    try: df['sale_date'][i] = df['sale_date'][i][:11].strip()
    except: continue
df['sale_date'] = pd.to_datetime(df['sale_date'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try: df['sale_date'][i] = df['sale_date'][i][:11].strip()


In [10]:
df['date_lngth'] = [len(str(x)) for x in df['record_date']]
for i in df.loc[df['date_lngth']!=10,'record_date'].index:
    try: df['record_date'][i] = df['record_date'][i][:11].strip()
    except: continue
df['record_date'] = pd.to_datetime(df['record_date'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  try: df['record_date'][i] = df['record_date'][i][:11].strip()


In [11]:
df.drop(columns=['date_lngth'],inplace=True)
df['q_id'] = df['q_id'].replace('1I',None).fillna('0').astype('int64')

Lastly, I will create a SQLite database and populate it with the dataframes created in this notebook.

In [12]:
with sqlite3.connect('D:\\Data\\DuvalProperties.sqlite') as conn:
    cur = conn.cursor()

In [13]:
    # I only need to run this block the first time I create my SQLite file
    
    cur.executescript('''
    DROP TABLE IF EXISTS Parcel;
    DROP TABLE IF EXISTS Legal;
    DROP TABLE IF EXISTS Owner;
    DROP TABLE IF EXISTS Site;
    DROP TABLE IF EXISTS Building;
    DROP TABLE IF EXISTS Structure;
    DROP TABLE IF EXISTS Utility;
    DROP TABLE IF EXISTS Subarea;
    DROP TABLE IF EXISTS Condo;
    DROP TABLE IF EXISTS Amenity;
    DROP TABLE IF EXISTS Feature;
    DROP TABLE IF EXISTS Common;
    DROP TABLE IF EXISTS Agland;
    DROP TABLE IF EXISTS Marketag;
    DROP TABLE IF EXISTS Exemption;
    DROP TABLE IF EXISTS Character;
    DROP TABLE IF EXISTS Sale;
    DROP TABLE IF EXISTS Qualification;


    CREATE TABLE Parcel (
        RE TEXT, section TEXT, township TEXT, range TEXT, tile INTEGER, confidential TEXT, 
        mailing_address_1 TEXT, mailing_address_2 TEXT, city TEXT, state TEXT, zipcode TEXT,
        property_use INTEGER, subdivision_num FLOAT, subdivision_name TEXT,
        plat_book TEXT, plat_page TEXT, neighborhood TEXT, perc_capped FLOAT, value_method INTEGER, 
        cap_base_year INTEGER, market_value INTEGER, assessed_value INTEGER, building_value INTEGER, 
        just_value INTEGER, school_taxable INTEGER, county_taxable INTEGER, sjrwmd_taxable INTEGER, 
        tax_district TEXT, lot_sf INTEGER, acres FLOAT
        );

    CREATE TABLE Legal (
        RE TEXT, line INTEGER, legal_descr TEXT
        );    

    CREATE TABLE Owner (
        RE TEXT, line INTEGER, owner TEXT
        );  

    CREATE TABLE Site (
        RE TEXT, street_num TEXT, direction TEXT, street_name TEXT, street_type TEXT, unit TEXT, city TEXT,
        zipcode TEXT, building_num INTEGER
        );    

    CREATE TABLE Building (
        RE TEXT, building INTEGER, type_code TEXT, type_descr TEXT, style INTEGER, class INTEGER,
        quality INTEGER, actual_yr_built INTEGER, effec_yr_built INTEGER, perc_complete FLOAT,
        value INTEGER, heated_sf INTEGER
        );

    CREATE TABLE Structure (
        RE TEXT, building INTEGER, element_category TEXT, element_type INTEGER, element_type_descr TEXT, 
        perc_element FLOAT
    );

    CREATE TABLE Utility (
        RE TEXT, building INTEGER, structure_code TEXT, structure_descr TEXT, units FLOAT
        );

    CREATE TABLE Subarea (
        RE TEXT, substructure_code TEXT, substructure_descr TEXT, actual_area INTEGER, effec_area INTEGER,
        heated_sf INTEGER
        );

    CREATE TABLE Condo (
        RE TEXT, building INTEGER, condo_num INTEGER, condo_type TEXT, value INTEGER, view_code TEXT,
        perc_adjustment FLOAT, bedrooms FLOAT, bathrooms FLOAT, area INTEGER, floor_adjustment FLOAT
        );

    CREATE TABLE Amenity (
        RE TEXT, building INTEGER, amenity_code INTEGER, unit FLOAT, amenity_type TEXT, amenity_value INTEGER
        );

    CREATE TABLE Feature (
        RE TEXT, building INTEGER, ft_category TEXT, ft_descr TEXT, grade FLOAT, length INTEGER,
        width INTEGER, units FLOAT, ppu FLOAT, actual_yr_built INTEGER, effec_yr_built INTEGER,
        deprec_value INTEGER
        );    

    CREATE TABLE Common (
        RE TEXT, use_code TEXT, use_descr TEXT, zoning TEXT, unit_type TEXT, front_feet INTEGER,
        depth INTEGER, units FLOAT, ppu FLOAT, land_val INTEGER
        );

    CREATE TABLE Agland (
        RE TEXT, use_code TEXT, use_descr TEXT, zoning TEXT, unit_type TEXT, units FLOAT, ppu FLOAT
        );  

    CREATE TABLE Marketag (
        RE TEXT, use_code TEXT, use_descr TEXT, zoning TEXT, unit_type TEXT, units FLOAT, ppu FLOAT
        );

    CREATE TABLE Exemption (
        RE TEXT, line INTEGER, exempt_holder TEXT, exempt_code TEXT, exempt_descr TEXT, perc_exempt FLOAT,  
        exempt_amount INTEGER, override INTEGER
        );

    CREATE TABLE Character (
        RE TEXT, category TEXT, char_type TEXT, char_descr TEXT
        );
        
    CREATE TABLE Sale (
        RE TEXT, sale_id INTEGER, seller TEXT, or_bk TEXT, or_pg TEXT, q_id INTEGER, improved TEXT,
        instrument TEXT, sales_source TEXT, sale_date TEXT, record_date TEXT, price INTEGER
        );   
        
    CREATE TABLE Qualification (
        q_id INTEGER, status TEXT, reason TEXT
        );    
    ''')

<sqlite3.Cursor at 0x2c6e6687ce0>

In [14]:
    parcel.to_sql('Parcel', conn, if_exists='replace', index=False)
    legal.to_sql('Legal', conn, if_exists='replace', index=False)
    owner.to_sql('Owner', conn, if_exists='replace', index=False)
    site.to_sql('Site', conn, if_exists='replace', index=False)
    building.to_sql('Building', conn, if_exists='replace', index=False)
    structure.to_sql('Structure', conn, if_exists='replace', index=False)
    utility.to_sql('Utility', conn, if_exists='replace', index=False)
    subarea.to_sql('Subarea', conn, if_exists='replace', index=False)
    condo.to_sql('Condo', conn, if_exists='replace', index=False)
    amenity.to_sql('Amenity', conn, if_exists='replace', index=False)
    feature.to_sql('Feature', conn, if_exists='replace', index=False)
    common.to_sql('Common', conn, if_exists='replace', index=False)
    agland.to_sql('Agland', conn, if_exists='replace', index=False)
    marketag.to_sql('Marketag', conn, if_exists='replace', index=False)
    exemption.to_sql('Exemption', conn, if_exists='replace', index=False)
    character.to_sql('Character', conn, if_exists='replace', index=False)
    df.to_sql('Sale', conn, if_exists='replace', index=False)
    qual_codes.to_sql('Qualification', conn, if_exists='replace', index=False)