# TCAD Exploratory Analysis

Prerequisites: This notebook relies on the raw TCAD already being parsed and stored as parquet files.

In [1]:
%load_ext autoreload
%autoreload 2

First, the preprocessed parquet files are loaded in from the specified directory using a custom package (tcad).
The required files include prop.parquet, imp_info.parquet, imp_det.parquet, and imp_atr.parquet.

In [2]:
import tcad.selector as tcs

tables = tcs.Selector('data/processed/TCAD')

The dataframes can be accessed directly as properties of the Selector object.

In [3]:
display(tables.prop_df.head(2))
display(tables.imp_info_df.head(2))
display(tables.imp_det_df.head(2))
display(tables.imp_info_df.head(2))

Unnamed: 0,prop_id,prop_type_cd,prop_val_yr,geo_id,partial_owner,udi_group,situs_street_prefx,situs_street,situs_street_suffix,situs_city,...,dataset_id,deed_num,exemption_percentage,sic_code,omitted_imprv_hstd_val,omitted_imprv_non_hstd_val,pp_late_interstate_allocation_val,appraised_val_reflecting_productivity_loss,assessed_val_reflecting_productivity_loss,late_correction_val
0,100008,R,2023,100030105,F,0,S,LAMAR,BLVD,,...,0,2014035621TR,0,,0,0,0,0,0,0
1,100012,R,2023,100030109,F,0,S,LAMAR,BLVD,,...,0,2011172140TR,0,,0,0,0,0,0,0


Unnamed: 0,prop_id,prop_val_yr,imprv_id,imprv_type_cd,imprv_type_desc,imprv_state_cd,imprv_homesite,imprv_val,imprv_homesite_pct,omitted,omitted_imprv_val
0,100008,2023,5225541,0,Detail Only,F1,N,12278,100.0,N,0
1,100008,2023,5225542,32,RESTAURANT,F1,N,176710,100.0,N,0


Unnamed: 0,prop_id,prop_val_yr,imprv_id,imprv_det_id,Imprv_det_type_cd,Imprv_det_type_desc,Imprv_det_class_cd,yr_built,depreciation_yr,imprv_det_area,imprv_det_val
0,100008,2023,5225541,41038597,551,PAVED AREA,AA,1984,1984,17100,0
1,100008,2023,5225542,41038598,1ST,1st Floor,C,2013,2013,2986,161525


Unnamed: 0,prop_id,prop_val_yr,imprv_id,imprv_type_cd,imprv_type_desc,imprv_state_cd,imprv_homesite,imprv_val,imprv_homesite_pct,omitted,omitted_imprv_val
0,100008,2023,5225541,0,Detail Only,F1,N,12278,100.0,N,0
1,100008,2023,5225542,32,RESTAURANT,F1,N,176710,100.0,N,0


You can also search for specific zip codes or building types provided as a string or list of strings. This will return a new selector object, which you use the above functions to access the dataframes. This also makes it possible to chain function calls to an extent

In [4]:
tables2 = tables.query(bldg_types='1 FAM DWELLING',zip_codes=['78733','78741'])

The get_single_family_building_summary combines data from the four dataframes in the Selector into a single dataframe and returns it. Internally, it will also limit results to single family buildings as data for other types is structured differently and not handled correctly at this time.


In [5]:
tables.get_single_family_building_summary()

Unnamed: 0,prop_id,imprv_id,prop_val_yr_x,situs_num,situs_street_prefx,situs_street,situs_street_suffix,situs_unit,situs_city,situs_zip,...,imprv_non_hstd_val,market_value,ten_percent_cap,assessed_val,imprv_homesite,imprv_homesite_pct,en_exempt,pc_exempt,so_exempt,eco_exempt
0,100027,5225554,2023,1008,S,8,ST,,,78704,...,0,1005168,244300,760868,Y,100.0,F,F,F,F
1,100037,5225569,2023,1107,W,MONROE,ST,,,78704,...,0,2164466,550786,1613680,Y,100.0,F,F,F,F
2,100038,5225571,2023,1105,W,MONROE,ST,,,78704,...,0,2242846,0,2242846,Y,100.0,F,F,F,F
3,100039,5225572,2023,1103,W,MONROE,ST,,AUSTIN,78704,...,0,1619220,46220,1573000,Y,100.0,F,F,F,F
4,100040,5225573,2023,1011,W,MONROE,ST,,,78704,...,0,1346000,260714,1085286,Y,100.0,F,F,F,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296777,975099,5784718,2023,801,W,JOHANNA,ST,,,78704,...,134857,1051063,0,1051063,Y,100.0,F,F,F,F
296778,975133,5786764,2023,1610,,HOUSTON,ST,,,,...,0,1911868,0,1911868,Y,100.0,F,F,F,F
296779,975190,5784797,2023,5408,,JEFF DAVIS,AVE,,,78756,...,0,2566535,0,2566535,Y,100.0,F,F,F,F
296780,975415,5785429,2023,8401,,HUB,CV,,,78759,...,0,1266721,0,1266721,Y,100.0,F,F,F,F


An example of chaining:

In [6]:
tables.query(bldg_types='1 FAM DWELLING',zip_codes=['78733','78741']).get_single_family_building_summary()

Unnamed: 0,prop_id,imprv_id,prop_val_yr_x,situs_num,situs_street_prefx,situs_street,situs_street_suffix,situs_unit,situs_city,situs_zip,...,imprv_non_hstd_val,market_value,ten_percent_cap,assessed_val,imprv_homesite,imprv_homesite_pct,en_exempt,pc_exempt,so_exempt,eco_exempt
0,108470,5232774,2023,3107,,POINT O WOODS,,,AUSTIN,78733,...,0,1907098,597771,1309327,Y,100.0,F,F,F,F
1,108472,5232776,2023,3103,,POINT O WOODS,,,,78733,...,0,3404582,1247031,2157551,Y,100.0,F,F,F,F
2,113905,5237403,2023,3001,,CREEKS EDGE,PKWY,,AUSTIN,78733,...,0,1869601,720989,1148612,Y,100.0,F,F,F,F
3,113907,5237404,2023,3005,,CREEKS EDGE,PKWY,,AUSTIN,78733,...,0,1467199,508996,958203,Y,100.0,F,F,F,F
4,113908,5237405,2023,3007,,CREEKS EDGE,PKWY,,AUSTIN,78733,...,0,1534618,519997,1014621,Y,100.0,F,F,F,F
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6426,971361,5783031,2023,6206,,FELIX,AVE,B,,78741,...,189236,227236,0,227236,N,100.0,F,F,F,F
6427,971372,5783040,2023,6215,,HOGAN,AVE,A,,78741,...,0,519224,0,519224,Y,100.0,F,F,F,F
6428,971373,5783044,2023,6215,,HOGAN,AVE,B,,78741,...,0,278006,0,278006,Y,100.0,F,F,F,F
6429,974334,5784149,2023,6215,,CLUB,TER,,,78741,...,0,585314,0,585314,Y,100.0,F,F,F,F


In [7]:
tables.prop_df

Unnamed: 0,prop_id,prop_type_cd,prop_val_yr,geo_id,partial_owner,udi_group,situs_street_prefx,situs_street,situs_street_suffix,situs_city,...,dataset_id,deed_num,exemption_percentage,sic_code,omitted_imprv_hstd_val,omitted_imprv_non_hstd_val,pp_late_interstate_allocation_val,appraised_val_reflecting_productivity_loss,assessed_val_reflecting_productivity_loss,late_correction_val
0,100008,R,2023,0100030105,F,0,S,LAMAR,BLVD,,...,0,2014035621TR,0,,0,0,0,0,0,0
1,100012,R,2023,0100030109,F,0,S,LAMAR,BLVD,,...,0,2011172140TR,0,,0,0,0,0,0,0
2,100015,R,2023,0100030112,F,0,S,LAMAR,BLVD,,...,0,2001096152TR,0,,0,0,0,0,0,0
3,100018,R,2023,0100030115,F,0,S,LAMAR,BLVD,,...,0,2014043470TR,0,,0,0,0,0,0,0
4,100020,R,2023,0100030201,F,0,S,LAMAR,BLVD,,...,0,2007035338TR,0,,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
477141,977395,P,2023,,F,0,,KEVIN,LN,AUSTIN,...,0,,0,,0,0,0,0,0,0
477142,977396,P,2023,,F,0,,VARIOUS LOCATIONS,,,...,0,,0,,0,0,0,0,0,0
477143,977397,MH,2023,0259410216,F,0,,KILLINGSWORTH,LN,PFLUGERVILLE,...,0,,0,,0,0,0,0,0,0
477144,977401,P,2023,,F,0,,PANTHER,LN,PFLUGERVILLE,...,0,,0,,0,0,0,0,0,0


View the available building types for the current selector using this property.

In [8]:
tables.bldg_types

['01 SFR COMM',
 '02 DUPLEX COMM',
 '03 TRIPLEX COMM',
 '04 FOURPLEX COMM',
 '1 FAM DWELLING',
 '2 FAM DWELLING',
 'ADDITIONAL LIVING QUARTER',
 'ADU COMM',
 'ALT LIVING CTR',
 'APARTMENT 100+',
 'APARTMENT 26-49',
 'APARTMENT 5-25',
 'APARTMENT 50-100',
 'ASSISTED LIVING/MEMORY',
 'Accessory Dwelling Unit (',
 'BANK - OFFICE',
 'BANK BRANCH OFF',
 'BANK DRIVE THRU',
 'BED/BKFAST(SFR)',
 'BOWLING CENTER',
 'CAR WASH (FULL SERVICE)',
 'CLASSROOM',
 'CLUBHOUSE',
 'COMM SHOP CTR',
 'COMMERCIAL SPACE CONDOS',
 'COMPUTER DATA CENTER',
 'CONDO (STACKED)',
 'CONTINUING CARE',
 'CONVENIENCE STOR',
 'COUNTRY CLUB',
 'DAY CARE CENTER',
 'DEALERSHIP',
 'DEPT STR >25,000',
 'DISC STR >25,000',
 'DORMITORY',
 'DORMITORY HIRISE',
 'DRUGSTORE',
 'Detail Only',
 'FAST FOOD REST',
 'FOURPLEX',
 'FRAT/SORORITY',
 'GARAGE APARTMENT',
 'GARAGE APT COMM',
 'GROCERY STORE',
 'HEALTH CLUB',
 'HIRISE CONDO/APT',
 'HOSPITAL',
 'HOTEL-FULL SERVC',
 'HOTEL-LMTD SERVC',
 'INDEPENDENT LIVING',
 'INDL 20K+ 25-49%',

In [9]:
tables.imp_det_df[tables.imp_det_df['prop_id']==108470]

Unnamed: 0,prop_id,prop_val_yr,imprv_id,imprv_det_id,Imprv_det_type_cd,Imprv_det_type_desc,Imprv_det_class_cd,yr_built,depreciation_yr,imprv_det_area,imprv_det_val
60121,108470,2023,5232774,41099107,1ST,1st Floor,R3,1997,1997,3337,345905
60122,108470,2023,5232774,41099108,2ND,2nd Floor,R3,1997,1997,958,89375
60123,108470,2023,5232774,41099109,011,PORCH OPEN 1ST F,R3,1997,1997,91,3969
60124,108470,2023,5232774,41099110,011,PORCH OPEN 1ST F,R3,1997,1997,344,12698
60125,108470,2023,5232774,41099111,041,GARAGE ATT 1ST F,R3,1997,1997,924,26076
60126,108470,2023,5232774,41099112,095,HVAC RESIDENTIAL,R3,1997,1997,4295,8367
60127,108470,2023,5232774,41099113,251,BATHROOM,R3,1997,1997,4,0
60128,108470,2023,5232774,41099114,447,SPA CONCRETE,R3,1997,1997,1,4870
60129,108470,2023,5232774,41099115,522,FIREPLACE,R3,1997,1997,4,24055
60130,108470,2023,5232774,41099116,604,POOL RES CONC,R3,1997,1997,1,24351


In [10]:
tables.imp_atr_df[(tables.imp_atr_df['imprv_attr_desc']=='Roof Covering')]['prop_id'].duplicated()

29         False
40         False
54         False
66         False
73         False
           ...  
3361911    False
3361916     True
3362186    False
3362196    False
3362205    False
Name: prop_id, Length: 426272, dtype: bool

In [11]:
len(tables.imp_atr_df[tables.imp_atr_df['imprv_attr_desc']=='Foundation'])

427735

In [12]:
tables.imp_atr_df[tables.imp_atr_df['imprv_attr_desc']=='Foundation']['imprv_attr_cd'].unique()

['SLAB', 'PIER AND B', 'BLOCKS', 'FLOATING S', 'OTHER', 'WOOD POST']
Categories (170, object): ['07', '08', '09', '1', ..., 'UQ', 'W', 'WOOD POST', 'WOOD SHING']

In [13]:
tables.imp_atr_df[tables.imp_atr_df['imprv_attr_desc']=='Roof Covering']['imprv_attr_cd'].unique()

['METAL', 'COMPOSITIO', 'BUILT-UP', 'OTHER', 'TILE', 'WOOD SHING']
Categories (170, object): ['07', '08', '09', '1', ..., 'UQ', 'W', 'WOOD POST', 'WOOD SHING']

In [14]:
tables.imp_atr_df[(tables.imp_atr_df['imprv_attr_desc']=='Roof Covering')&(tables.imp_atr_df['prop_id']==974335)]

Unnamed: 0,prop_id,prop_val_yr,imprv_id,imprv_det_id,imprv_attr_id,imprv_attr_desc,imprv_attr_cd
3360516,974335,2023,5784144,45369179,39867665,Roof Covering,BUILT-UP
3360517,974335,2023,5784144,45369179,39867666,Roof Covering,COMPOSITIO
3360518,974335,2023,5784144,45369179,39867667,Roof Covering,METAL
3360519,974335,2023,5784144,45369179,39867668,Roof Covering,OTHER
3360520,974335,2023,5784144,45369179,39867669,Roof Covering,TILE
3360521,974335,2023,5784144,45369179,39867670,Roof Covering,WOOD SHING
3360569,974335,2023,5784144,45369182,39867718,Roof Covering,BUILT-UP
3360570,974335,2023,5784144,45369182,39867719,Roof Covering,COMPOSITIO
3360571,974335,2023,5784144,45369182,39867720,Roof Covering,METAL
3360572,974335,2023,5784144,45369182,39867721,Roof Covering,OTHER


In [15]:
tables.query(bldg_types='1 FAM DWELLING',zip_codes=['78733','78741']).get_single_family_building_summary().to_parquet('data/processed/tcad_78733_78741_single_fam.parquet')
#prop_df = tables.process_prop_df()

Interesting finds in tcad database.
2100 TRIPSHAW LN 64C, TX 78741 - Some times single family homes can have 3 stories - townhouse area

1504 Miami Dr, 78733 - no hvac

