# User Input
I need to decide what features I will ask the user for to start building a recommendation system.

**Personal Characteristics**
* age
* income
* household income
* do you want high or low density housing?
* how wealthy of a neighborhood do you want to be in?

**Property Characteristics**
* price
* beds
* baths
* square footage
* property type
* new vs older
* existing vs new build

## Import Dependencies & Load Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
pd.set_option('display.max_columns', None)
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Regis Practicum II/all_data.csv', index_col=0)
df.set_index('url', inplace=True)

# Define User Class

In [3]:
class user:
  def __init__(self, price, beds, baths, square_feet, lot_size, property_age, age, household_income, individual_income, property_types, cities, new_build):
    self.price = price
    self.beds = beds
    self.baths = baths
    self.square_feet = square_feet
    self.lot_size = lot_size
    self.year_built = 2023-property_age
    self.age = age
    self.household_income = household_income
    self.individual_income = individual_income
    self.property_types = property_types
    self.cities = cities
    self.new_build = new_build
  def get_array(self):
    arr = np.array([self.price, self.beds, self.baths, self.square_feet, self.lot_size, self.year_built, self.age, self.household_income, self.individual_income])
    return arr

# Define Recommendation Engine

In [4]:
def filter_listings(data, property_types, cities, new_build):
  # Filter by property type
  property_types_backtick = [','.join('`'+item+'`' for item in property_types)][0].split(',')
  df_list = []
  for i in range(len(property_types_backtick)):
    new_rows = data.query('{0}==1'.format(property_types_backtick[i]))
    df_list.append(new_rows)
  filtered_property_df = pd.concat(df_list)

  # Filter by city
  cities_backtick = [','.join('`'+item+'`' for item in cities)][0].split(',')
  df_list = []
  for i in range(len(cities_backtick)):
    new_rows = filtered_property_df.query('{0}==1'.format(cities_backtick[i]))
    df_list.append(new_rows)
  filtered_df = pd.concat(df_list)

  # If new_build is false, remove rows for new plans and new construction
  if new_build is False:
    filtered_df.drop(filtered_df[filtered_df['new_construction']==1].index, inplace=True)
    filtered_df.drop(filtered_df[filtered_df['new_plan']==1].index, inplace=True)
  return filtered_df

def weight_recommendations(data, recommendations):
  # Isolate index of recommended listings
  urls = recommendations.index
  # Calculate metrics to weight recommendations
  inverse_poverty = 1 - data.loc[urls]['poverty_rate']
  homeownership = data.loc[urls]['homeownership_rate']
  inverse_vacancy = 1 - data.loc[urls]['vacancy_rate']
  # Weight recommendations
  recommendations = recommendations * inverse_poverty * homeownership * inverse_vacancy
  return recommendations

def recommend(data, user, num_recs):
  # Filter data according to property types, cities, and if the user is open to new construction
  filtered_data = filter_listings(data, user.property_types, user.cities, user.new_build)
  # Drop categorical columns and columns not used to make recommendations
  drop_cols = ['fsbo', 'mls', 'new_construction', 'new_plan', 'condo', 'multi_family_2-4', 'multi_family_5+', 'single family', 'townhouse', 'bentonville', \
              'fayetteville', 'rogers', 'springdale', 'days on market', 'hoa/month', 'population', 'poverty_rate', 'homeownership_rate', 'vacancy_rate']
  filtered_data.drop(drop_cols, axis=1, inplace=True)

  # Get np.array of user info and calculate cosine similarity with listings matrix
  user_arr = user.get_array()
  user_arr = user_arr.reshape(1, -1)
  cosine_similarities = cosine_similarity(filtered_data, user_arr)
  # Convert array back to pd.Series to maintain index of URLs
  recommendations = [item[0] for item in cosine_similarities]
  recommendations = pd.Series(recommendations, index=filtered_data.index)
  # Sort recommendations
  weighted_recs = weight_recommendations(data, recommendations)
  sorted_recs = weighted_recs.sort_values(ascending=False)
  # Return dataframe of recommendations including only columns with property information
  property_cols = ['price', 'beds', 'baths', 'square feet', 'lot size', 'year built', 'days on market']
  index = sorted_recs.index
  return df.loc[index][property_cols].head(num_recs)

In [5]:
user_a = user(price=375000, beds=3, baths=2, square_feet=2000, lot_size=5000, property_age=10, age=28, household_income=200000, individual_income=48000, \
              property_types=['single family', 'multi_family_2-4'], cities=['rogers', 'fayetteville'], new_build=False)

In [6]:
recommend(df, user_a, 7)

Unnamed: 0_level_0,price,beds,baths,square feet,lot size,year built,days on market
url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
https://www.redfin.com/AR/Garfield/12351-Fire-Run-Rd-72732/home/114982400,599000.0,3.0,2.0,1740.0,217800.0,2009.4,14.0
https://www.redfin.com/AR/Rogers/1306-W-Pecan-St-72758/home/115847725,249000.0,2.0,2.0,1836.0,13503.0,1970.0,11.0
https://www.redfin.com/AR/Rogers/1603-S-17th-St-72758/home/115197381,280000.0,4.0,2.5,1470.0,10018.0,1994.0,14.0
https://www.redfin.com/AR/Rogers/1410-W-Pine-St-72758/home/114619858,295000.0,3.0,2.0,1690.0,11761.0,1971.0,9.0
https://www.redfin.com/AR/Fayetteville/3173-W-Old-Farmington-Rd-72704/home/126344008,249000.0,2.0,2.5,1368.0,3406.0,2008.0,1.0
https://www.redfin.com/AR/Fayetteville/4496-W-Pecan-St-72704/home/126723434,259900.0,3.0,2.0,1211.0,4055.0,2009.0,12.0
https://www.redfin.com/AR/Rogers/2506-S-D-St-72758/home/176836204,315000.0,4.0,2.0,1482.0,6098.0,2021.0,5.0
