# Recommendation System
## Import Dependencies and Data

In [None]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
class census_api:
  def __init__(self):
    # Define base URL
    self.HOST = 'https://api.census.gov/data'
    self.year = '2021'
    self.dataset = 'acs/acs5'
    self.base_url = '/'.join([self.HOST, self.year, self.dataset])
    # Define list of variables
    self.var_list = ['NAME', 'B01001_001E', 'B01002_001E', 'B19113_001E', 'B19301_001E', 'B07013_001E', 'B07013_002E', 'B07013_003E', \
                'B08101_001E', 'B08101_009E', 'B08101_017E', 'B08101_025E', 'B08101_033E', 'B08101_041E', 'B08101_049E', 'B25001_001E', \
                'B25002_002E', 'B25002_003E', 'B25003_002E', 'B25003_003E', 'B17001_001E', 'B17001_002E', 'B19083_001E']
    # Define mapping of variables and descriptions
    self.var_map = {'B01001_001E':'population', 'B01002_001E':'median_age', 'B19113_001E':'median_family_income', 'B19301_001E':'per_capita_income',\
           'B07013_001E':'total_households', 'B07013_002E':'owner_occupied_households', 'B07013_003E':'renter_occupied_households', \
           'B08101_001E':'transportation_total', 'B08101_009E': 'transportation_drove', 'B08101_017E':'transporation_carpooled', 'B08101_025E': \
           'transporation_public_transit', 'B08101_033E': 'transportation_walked', 'B08101_041E':'transportation_other', 'B08101_049E':'transportation_wfh',\
           'B25001_001E':'total_units', 'B25002_002E':'occupied_units', 'B25002_003E':'vacant_units', 'B25003_002E':'owner_occupied_units', \
           'B25003_003E':'renter_occupied_units', 'B17001_001E':'total_income_poverty', 'B17001_002E':'total_income_below_poverty',
           'B19083_001E': 'gini_index'}

  # Converts the list of variable names to a single string
  def get_vars(self):
    return ','.join(self.var_list)

  # Takes in a list of zip codes
  # Returns a pandas dataframe of census data with human-readable column names
  def get_data(self, zip_list):
    zip_string = ','.join(zip_list)
    var_str = self.get_vars()
    url = f'{self.base_url}?get={var_str}&for=zip%20code%20tabulation%20area:{zip_string}'
    response = requests.get(url)
    data_json = response.json()
    data_df = pd.DataFrame(data_json[1:], columns=data_json[0])
    data_df = data_df.rename(columns=self.var_map)
    return data_df

In [None]:
listings = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Regis Practicum II/cleaned_redfin_listings.csv')
listings['zip'] = listings['zip'].astype('int')
zips = listings['zip'].astype('str').to_list()

In [None]:
api = census_api()
census = api.get_data(zips)

In [None]:
listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671 entries, 0 to 670
Columns: 409 entries, Unnamed: 0 to address
dtypes: float64(405), int64(2), object(2)
memory usage: 2.1+ MB


In [None]:
census.head()

Unnamed: 0,NAME,population,median_age,median_family_income,per_capita_income,total_households,owner_occupied_households,renter_occupied_households,transportation_total,transportation_drove,...,transportation_wfh,total_units,occupied_units,vacant_units,owner_occupied_units,renter_occupied_units,total_income_poverty,total_income_below_poverty,gini_index,zip code tabulation area
0,ZCTA5 72701,46959,24.6,68046,27381,40242,18703,21539,20282,13813,...,2466,19203,17206,1997,7352,9854,40435,11012,0.5492,72701
1,ZCTA5 72703,34274,32.5,80807,37954,33501,15495,18006,17733,13368,...,1208,16770,15642,1128,6188,9454,33865,6158,0.531,72703
2,ZCTA5 72704,27740,32.0,97791,43172,27347,17131,10216,16851,13089,...,2315,12901,11967,934,7141,4826,27689,2207,0.422,72704
3,ZCTA5 72712,37794,35.2,111707,48157,36580,23120,13460,18422,14619,...,2084,15400,13964,1436,7951,6013,37001,2750,0.4871,72712
4,ZCTA5 72713,22269,30.4,117346,46943,21986,12752,9234,11775,8479,...,1962,8717,8231,486,4375,3856,22218,1314,0.3834,72713


In [None]:
census.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 24 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   NAME                          14 non-null     object
 1   population                    14 non-null     object
 2   median_age                    14 non-null     object
 3   median_family_income          14 non-null     object
 4   per_capita_income             14 non-null     object
 5   total_households              14 non-null     object
 6   owner_occupied_households     14 non-null     object
 7   renter_occupied_households    14 non-null     object
 8   transportation_total          14 non-null     object
 9   transportation_drove          14 non-null     object
 10  transporation_carpooled       14 non-null     object
 11  transporation_public_transit  14 non-null     object
 12  transportation_walked         14 non-null     object
 13  transportation_other  

In [None]:
census.rename(mapper={'zip code tabulation area':'zip'}, axis=1, inplace=True)
census.drop('NAME', axis=1, inplace=True)
census = census.apply(pd.to_numeric)
all_data = pd.merge(listings, census, on='zip')

In [None]:
pd.set_option('display.max_columns', None)
assert not all_data.isnull().values.any()
all_data.head()

Unnamed: 0.1,Unnamed: 0,zip,price,beds,baths,square feet,lot size,year built,days on market,$/square feet,hoa/month,latitude,longitude,id,sale type_For-Sale-by-Owner Listing,sale type_MLS Listing,sale type_New Construction Home,sale type_New Construction Plan,property type_Condo/Co-op,property type_Multi-Family (2-4 Unit),property type_Multi-Family (5+ Unit),property type_Single Family Residential,property type_Townhouse,city_Bella Vista,city_Bentonville,city_Centerton,city_Farmington,city_Fayetteville,city_Garfield,city_Gravette,city_Pea Ridge,city_Rogers,city_Springdale,location_-,location_--,location_.,location_02-19-29-RURAL,location_03-19-29 Rural,location_03-20-30-RURAL,location_08-20-29-RURAL,location_09-19-30-ROGERS,location_12-19-30 Rogers,location_12-19-30-ROGERS,location_14-19-32 Rural,location_15-20-30 Rural,location_17-15-30,location_17-16-30-Fayetteville Outlots,location_18-19-30-Rogers,location_18-20-31-RURAL,location_22-19-30-ROGERS,location_23-17-28,location_23-19-30,location_24-20-32-RURAL,location_27-16-30 Fayetteville Outlots,location_27-19-29-RURAL,location_27-21-30-RURAL,location_28-19-29-RURAL,location_29-20-29 RURAL,location_33-19-30-Rogers,location_36-17-31,location_AJ SUB-BENTONVILLE,location_ANNIKA ESTATES PH II-CENTERTON,location_Academy Add Rogers,location_Aiken Point Sub Rurban,location_Allied Sub Trac 2,location_Allied Sub Tract 2,location_Altons Brush Creek Hls Rogers,location_Amended Happy Home Add Bentonville,location_Amis Road Add Rogers,location_Anderson Farm,location_Angel Falls,location_Angel Falls North Bentonville,location_Annika Estates,location_Annika Estates Ph 1 Centerton,location_Annika Estates Ph Ii Centerton,location_Atalanta Point Sub Rogers,location_Aurora,location_Aurora Ph 1,location_Aurora Sub Ph 2 Bentonville,location_Aurora Sub Ph I Bentonville,location_Autumn Hills,location_Autumn Hills Bentonville,location_B F Sikes Add,location_Baldwin,location_Banks,location_Banks Add Bentonville,location_Banks Addition,location_Beaver Shores Unit 1 Rurban,location_Beaver Shores Unit 2 Rurban,location_Beaver Shores Unit 4 Rurban,location_Beaver Shores Unit 6 Rurban,location_Beavorama Park Sub,location_Bellafont Gardens Hpr,location_Bellawood Add,location_Belle Heights Bentonville,location_Bentonville Orig Bentonville,location_Bird Haven Terrace,location_Blackburn Add Rogers,location_Braithwaite Park Add Bentonville,location_Bridgeport Sub Ph Vii,location_Brighton Cottages,location_Brighton Cottages Sub Bentonville,location_Brighton Heights Sub Bentonville,location_Brookbury Woods,location_Brooklands @ Mountain Ranch,location_Brookside Estates,location_Brookside Estates Sub Centerton,location_Brophy Add I Ph I,location_Browns Port Sub Rurban,location_Burks Addition,location_Burl Dodd Add,location_Cahill Add Bentonville,location_Cambridge Park Ph 3 Rogers,location_Camden Way Sub Rogers,location_Cardinal Creek Ph 2,location_Cedarwood Add Ph Ii,location_Champions Estates Rogers,location_Champions Estates-Rogers,location_Chapel Hill Ph 1 Bentonville,location_Chardonnay Sub,location_Chardonnay Sub Bentonville,location_Chestnut Farms Sub,location_City/Rogers,location_Clabber Creek Sub Ph I,location_Clarks 2nd Add Bentonville,location_Clarks Add Bentonville,location_Clearwood Crossings Sub,location_Clover Creek Ii,location_Clower Sub Rogers,location_Cobble Creek Sub Rogers,location_College Place Sub PH9-Bentonville,location_College Place Sub Ph 2 Bentonville,location_Commons At Walnut Crossing,location_Copper Creek Sub,location_Copper Mine Acres Rurban,location_Copper Oaks Centerton,location_Cornerstone Ridge,location_Cornerstone Ridge Sub Ph 6 Bentonville,location_Cottonwood Place,location_Country Club Estates Rogers,location_County Court,location_Covington Trace Ph 1 Rurban,location_Coyote Trail,location_Creekside,location_Creekside Meadows,location_Creekside Sub Ph 1 Centerton,location_Creekstone Sub Ph I Bentonville,location_Crescent Lake,location_Crestwood Acres Add,location_Crossover Heights Ph I,location_Crystal Springs,location_Crystal Springs Phase II,location_Curtis Heights Bentonville,location_Deer Haven,location_Demings 2nd Add,location_Demings Add Bentonville,location_Dickson,location_Dickson Add Bentonville,"location_District, The Bentonville",location_Dixieland Crossing,location_Double Tree Estates,location_Dream Valley 2 Rurban,location_Driggs Sub Rurban,location_Driggs Subdivision,location_Duckworths Add Rogers,location_Dunn & Davis Add Bentonville,location_Dutchmans 8th Add Rurban,location_Dutton Woods,location_East Side 3rd Add Bentonville,location_Eastgate Add,location_Eastwood Sub,location_Edgehill Sub,location_Elkins Outlots,location_Esculapia Estates,location_Estates At Dogwood Canyon,location_Evans Farm,location_FEATHERSTON VILLAGE PH 1-CENTERTON,location_Fairview Heights Add Bentonville,location_Farmington Heights,location_Fayetteville,location_Fayetteville Outlots,location_Fountain Park Rplt Park Sp Bentonville,location_GLEN ARBOR SUB-BENTONVILLE,location_Garrett Road Sub Rogers,location_Gilchrist Meadows,location_Gilmores ADD Bentonville,location_Gilmores Add Bentonville,location_Glen Arbor,location_Glendale Add,location_Goose Creek Village,location_Goshen Outlots,location_Grammercy Park Sub Ph 1 Bentonville,location_Greenacres Sub Rurban,location_Greenridge Add Blk 3 Bentonville,location_Greenstone Estates Rurban,location_Guinn,location_Harters Fairview Add,location_Hawkins House At Shiloh,location_Haxton Estates Rogers,location_Healing Spgs Rurban,location_Hearthstone,location_Hendrix Sub Bentonville,location_Heritage Bay,location_Hexham Sub Bvv,location_Hidden Lake Estates Fay,location_Hill Avenue Townhomes,location_Hillcrest Add Rev Rogers,location_Hilldale,location_Hilldale Sub Centerton,location_Huffman Add Bentonville,location_Hughmount Village,location_Hyland Park Phase IV,location_Hyland Park Rogers,location_Idle Wheel Mhp Rogers,location_Indian Head Park Rurban,"location_Iveys Ph III, The Rogers",location_J D Neill,location_J Wade Sikes Park,location_Jackson Heights,location_Jackson Place,location_Justin Add,location_Kensington Ph 1,location_Kerelaw Castle Bentonville,location_Kiphart Sub 2Nd Rogers,location_Lakeview Add Rogers,location_Lakeview Heights,location_Lakewood Estates Rurban,location_Larimore & Garner,location_Larue Acres,location_Lee Valley Sub Ph IV,location_Lefors Add Bentonville,location_Legacy Bldg Hpr,location_Legacy Pointe,location_Legacy Pointe Ph Ii & Iii,location_Lenham Heights,location_Lewisbaldwin Rev,location_Liberty Bell North Rogers,location_Lierly Lane Sub,location_Lincoln & Rice Add Bentonville,location_Mack Grimes 2Nd,location_Magnolia Estates Bentonville,location_Magnolia Park,location_Maidstone Sub Bentonville,location_Maplewood Addition,location_Masonic Add,location_McClain Ridge At Woods Creek Bentonville,location_Mcgaugheys Orchard Add,location_Meadowlands,location_Mission Estates,location_Monte Ne Shores,location_Morningside Estates Ph 4 Centerton,location_Nelson Valley Estates,location_North Heights Add,location_OSAGE HILLS SUB PH 1-BENTONVILLE,location_OZ Village,location_Oak Tree Centerton,location_Oak View ADD,location_Oak Woods Condos Hpr,location_Oakbrooke Sub Ph 1,location_Oakhills Sub Rurban,location_Oakhurst Add Bentonville,location_Oakland Hills Add,location_Osage Hills,location_Osage Hills Sub Ph 1 Bentonville,location_Oz Village,location_Ozark Hills,location_Ozark Hills Rurban,location_Paradise Valley Sub Tr B,location_Park Meadows,location_Parkers Valley View Acres,location_Pine St,location_Pine Street,location_Piney Point Add Rurban,location_Pinnacle,location_Pinnacle Golf & Country Club,location_Pinnacle Station Condos Horiz Prop Reg-R,"location_Pinnacle Sub Ph 1, The Rogers",location_Pinnacle Sub Ph 4 Rogers,location_Pinnacle Village,location_Pinnacle Vlg,"location_Pinnacle, Champions Golf & Cc Rogers",location_Pleasant Acres Sub Rogers,"location_Point At Falls Hollow, The Rurban",location_Point Pleasant,location_Preston Park,location_Preston Park Phase 2,location_Providence Village,location_Providence Village Ph Ii Bentonville,location_Providence Village Ph3,location_Putmans 3rd Add Rurban,location_Quailridge,location_Quailridge Ph I Centerton,location_Quailridge Ph Ii Centerton,location_Quailridge Ph-1,location_Railroad Add Bentonville,location_Razorback Add Bentonville,location_Red Oak Hills Rurban,location_Ridgecrest Meadows Ph 4 Rogers,location_Ridgewood,location_River Oaks Estates,location_Rivercliff,location_Rivercliff Village Rurban,location_Riverside Estates,location_Riverwalk,location_Riverwalk Farm,location_Riverwalk Farm Estates Ph Iv Bentonville,location_Riverwalk Ph II,location_Riverwalk Ph IV,location_Riverwalk Sub Ph 4,location_Riverwalk Sub Ph I,location_Roberts & Musteen Add Rogers,location_Robin Haven Add Bentonville,location_Robinson Mountain Estates,location_Robinwood Estates 01 Ph 02,location_Rockhaven,location_Rockwood Add,location_Rodgers Place,location_Rosewood Add Rogers,location_Runnymede Subdivision,location_Rupple Meadows,location_Rural,location_SKY VALLEY ESTATES-ROGERS,location_SUBDIVISION OPTIONS,location_Salem Heights Sub Ph I,location_Sanford Green,location_Seminole Park,location_Shadow Valley,location_Shadow Valley PUD Ph Vii Rogers,location_Shadow Valley Ph VII,location_Silver Meadows,location_Silverthorne Subdivision,location_Simsberry Place Ph 2 Bentonville,location_Sloanbrooke,location_Snyder Station Ph Ix Highfill,location_South Hampton Add,location_Southern Hills 2nd Rogers,location_Southern Woods,location_Southside Sub Bentonville,location_Southwinds,location_Spinnaker Rdg At Woods Crk Ph 2,location_Spring Hollow Estates,location_St James Park Sub,location_St. James Park,location_Stadium Centre Cottages,location_Stone Hollow Residential Sub,location_Stone Meadow Add Bentonville,location_Stone Meadow Add-Bentonville,location_Stonebridge Meadows Ph I,location_Stonebridge Meadows Ph Iv,location_Stonebridge Meadows Ph V,location_Stonebridge Meadows ph V Sub,location_Stonebrook,location_Stonebrook S/D,location_Stoney Creek Place Sub-Rogers,location_Strawberry Hill,location_Summit Heights Rogers,location_Summit Place,location_Sunset Add,location_Sunset Bay,location_T J HOLLANDS SUB-BENTONVILLE,location_The Element,location_The Elements,location_The Estates of Lakeway,location_The Hollows at Beaver Lake,location_The Knolls,location_The Pines at Orchard Park,location_The Woodlands,location_Three Oaks Townhomes,location_Timber Ridge Estates,location_Towne West,location_Townsends 2nd Add Rurban,location_Townsends Add Rurban,location_Township Heights,location_Treetops Sub,location_Trenton Heights,location_Valley West Sub Rogers,location_Village On Shiloh Hpr,location_Vineyard Sub,location_W A BURK'S ADD-BENTONVILLE,location_W A Burks,location_W A Burks Add Bentonville,location_WILLOWBROOK FARMS SUB PH II-BENTONVILLE,location_WOODRIDGE S/D PH III & IV,location_Wallace Add Rogers,location_Warren Glen Rogers,location_Watson Sub Rogers,location_Wedington Woods,location_West Haven Sub,location_Westside Add Bentonville,location_White Oak Trails Sub Ph 1 Bentonville,location_Wildwood Sub Ph 2 Bentonville,location_Wildwood Sub Ph 6 Bentonville,location_Willow Bend,location_Willowbrook Farms Sub Ph Ii Bentonville,location_Willowbrook Farms SubdivPH II-BENTONVILL,location_Wilson-Dunn,location_Wilsonadams Add,location_Windwood Sub Ph 4 Bentonville,location_Wire Ridge Rogers,location_Woodlands Crossing Ph I Bentonville,location_Woodridge,location_Woodridge Sub Ph III & IV,location_Woods Creek,location_Woodward Hills,location_Woodward Hills Sub Ph 1 & Ph 2 Highfill,location_Woodward Hills Sub Ph 4 Highfill,status_Active,url,address,population,median_age,median_family_income,per_capita_income,total_households,owner_occupied_households,renter_occupied_households,transportation_total,transportation_drove,transporation_carpooled,transporation_public_transit,transportation_walked,transportation_other,transportation_wfh,total_units,occupied_units,vacant_units,owner_occupied_units,renter_occupied_units,total_income_poverty,total_income_below_poverty,gini_index
0,0,72704,1050000.0,4.0,3.5,3935.0,33928.0,2012.0,1.0,267.0,13.0,36.10313,-94.245292,7271.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,https://www.redfin.com/AR/Fayetteville/5917-W-...,5917 W Kennedy Dr,27740,32.0,97791,43172,27347,17131,10216,16851,13089,1124,94,116,113,2315,12901,11967,934,7141,4826,27689,2207,0.422
1,1,72704,345000.0,3.0,2.0,1622.0,9456.0,2010.0,1.0,213.0,0.0,36.095251,-94.209051,861.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,https://www.redfin.com/AR/Fayetteville/2360-N-...,2360 N Caney Dr,27740,32.0,97791,43172,27347,17131,10216,16851,13089,1124,94,116,113,2315,12901,11967,934,7141,4826,27689,2207,0.422
2,3,72704,249000.0,2.0,2.5,1368.0,3406.0,2008.0,1.0,182.0,0.0,36.052967,-94.205851,5192.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,https://www.redfin.com/AR/Fayetteville/3173-W-...,3173 Old Farmington Rd,27740,32.0,97791,43172,27347,17131,10216,16851,13089,1124,94,116,113,2315,12901,11967,934,7141,4826,27689,2207,0.422
3,12,72704,279500.0,2.0,2.5,1138.0,7405.0,2023.0,5.0,246.0,0.0,36.053785,-94.203082,6950.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,https://www.redfin.com/AR/Wheeler/3093-W-Salid...,3093 W Salida Ln,27740,32.0,97791,43172,27347,17131,10216,16851,13089,1124,94,116,113,2315,12901,11967,934,7141,4826,27689,2207,0.422
4,13,72704,293500.0,2.0,2.5,1195.0,4356.0,2022.0,5.0,246.0,0.0,36.055171,-94.204172,2434.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,https://www.redfin.com/AR/Wheeler/3056-W-Salid...,3056 W Salida Ln,27740,32.0,97791,43172,27347,17131,10216,16851,13089,1124,94,116,113,2315,12901,11967,934,7141,4826,27689,2207,0.422


## Feature Engineering and Selection
* Calculate % owner occupied housing
* Calculate % income below poverty line
* Calculate % vacant units
* Rename column names for listing and property type
* Drop cities other than Fayetteville, Rogers, and Bentonville

In [None]:
df = all_data.copy()
df['poverty_rate'] = df['total_income_below_poverty'] / df['total_income_poverty']
df['homeownership_rate'] = df['owner_occupied_units'] / df['occupied_units']
df['vacancy_rate'] = df['vacant_units'] / df['total_units']

new_col_names = {'sale type_For-Sale-by-Owner Listing': 'fsbo', 'sale type_MLS Listing':'mls', 'sale type_New Construction Home':'new_construction', \
                 'sale type_New Construction Plan':'new_plan', 'property type_Condo/Co-op':'condo', 'property type_Multi-Family (2-4 Unit)':'multi_family_2-4', \
                 'property type_Multi-Family (5+ Unit)':'multi_family_5+', 'property type_Single Family Residential':'single family', 'property type_Townhouse':\
                 'townhouse', 'city_Bentonville':'bentonville', 'city_Fayetteville':'fayetteville', 'city_Rogers':'rogers', 'city_Springdale':'springdale'}
df.rename(mapper=new_col_names, axis=1, inplace=True)

# drop columns that won't be used to make recommendations
drop_cols = []
location_cols = [col for col in df.columns if col.startswith('location')]
city_cols = [col for col in df.columns if col.startswith('city')]
transportation_cols = [col for col in df.columns if col.startswith('transp')]
misc_cols = ['Unnamed: 0', 'zip', '$/square feet', 'latitude', 'longitude', 'id', 'status_Active', 'address', 'total_units', \
             'occupied_units', 'vacant_units', 'owner_occupied_units', 'renter_occupied_units', 'total_income_poverty', 'total_income_below_poverty', \
             'total_households', 'owner_occupied_households', 'renter_occupied_households', 'gini_index']
drop_cols.extend(location_cols)
drop_cols.extend(city_cols)
drop_cols.extend(transportation_cols)
drop_cols.extend(misc_cols)

df.drop(labels=drop_cols, axis=1, inplace=True)
print(all_data.shape)
print(df.shape)

(668, 431)
(668, 29)


In [None]:
df.head()

Unnamed: 0,price,beds,baths,square feet,lot size,year built,days on market,hoa/month,fsbo,mls,new_construction,new_plan,condo,multi_family_2-4,multi_family_5+,single family,townhouse,bentonville,fayetteville,rogers,springdale,url,population,median_age,median_family_income,per_capita_income,poverty_rate,homeownership_rate,vacancy_rate
0,1050000.0,4.0,3.5,3935.0,33928.0,2012.0,1.0,13.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,https://www.redfin.com/AR/Fayetteville/5917-W-...,27740,32.0,97791,43172,0.079707,0.596724,0.072397
1,345000.0,3.0,2.0,1622.0,9456.0,2010.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,https://www.redfin.com/AR/Fayetteville/2360-N-...,27740,32.0,97791,43172,0.079707,0.596724,0.072397
2,249000.0,2.0,2.5,1368.0,3406.0,2008.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,https://www.redfin.com/AR/Fayetteville/3173-W-...,27740,32.0,97791,43172,0.079707,0.596724,0.072397
3,279500.0,2.0,2.5,1138.0,7405.0,2023.0,5.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,https://www.redfin.com/AR/Wheeler/3093-W-Salid...,27740,32.0,97791,43172,0.079707,0.596724,0.072397
4,293500.0,2.0,2.5,1195.0,4356.0,2022.0,5.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,https://www.redfin.com/AR/Wheeler/3056-W-Salid...,27740,32.0,97791,43172,0.079707,0.596724,0.072397


In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/Regis Practicum II/all_data.csv'
df.to_csv(path_or_buf=path)