In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from datetime import datetime
from time import sleep
from bs4 import BeautifulSoup
import re
import pandas as pd
import matplotlib.pyplot as plt
import config

In [None]:
# disable notifications
chrome_options = Options()
chrome_options.add_argument("--disable-notifications")

In [None]:
# open the facebook group (Student Housing in Waterloo)
# NOTE: replace path_to_chromedriver with your own path
driver = webdriver.Chrome(config.path_to_chromedriver, options=chrome_options) 
driver.get("https://www.facebook.com")

In [None]:
# Log in using my credentials
username = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='email']")))
password = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='pass']")))

# my_username and my_password should be replaced with your own unique values
username.clear()
username.send_keys(config.my_username) 
password.clear()
password.send_keys(config.my_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
sleep(8)

In [None]:
driver.get('https://www.facebook.com/groups/110354088989367')
sleep(8)

In [None]:
for j in range(0,30):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    sleep(5)

In [None]:
soup = BeautifulSoup(driver.page_source, "html.parser")
all_posts = soup.find_all('div', attrs={'class':'du4w35lb k4urcfbm l9j0dhe7 sjgh65i0'})

In [None]:
len(all_posts) # 282 posts when scrolled 50 times - range(0, 50)

In [None]:
def get_bedrooms(my_str):
    d1 = re.search('([^ \r\n]+) beds?', my_str, re.IGNORECASE)
    d2 = re.search('([^ \r\n]+) bedrooms?', my_str, re.IGNORECASE)
    if d1:
        print('hi')
        bedroom = d1.group(0)
    elif d2:
        bedroom = d2.group(0)
    else:
        bedroom = 1 # one bedroom by default if not specified
    return bedroom

In [None]:
msg = '2 Beds 1 Bath - Apartment203 Albert St, Waterloo, ON N2L 3T4, Canada'
print('bedroom:', get_bedrooms(msg))

In [None]:
def is_looking(my_str):
    my_str = my_str.lower()
    if 'looking' in my_str:
        return True
    elif 'available' or 'offer' in my_str:
        return False
    else:
        return None

In [None]:
price_list = []
bedrooms_list = []
isLookingFor_list = []

In [None]:
for post in all_posts: 
    price_text = post.find('span', attrs={'class': 'sqxagodl'}).find('div').getText()
    price = ''.join([n for n in price_text.split()[0] if n.isdigit()])
    if not price: 
        price = 0
#     print(price, type(price))
    
    message = post.find('div', attrs={'data-ad-comet-preview':'message'})
#     message = post.find('div', attrs={'class':'kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x c1et5uql ii04i59q'})
    if message: 
        message = str(message.getText())
    else:
        message = ''
        
    bedroom_text = get_bedrooms(message)
    bedroom = ''.join([n for n in str(bedroom_text).split()[0] if n.isdigit()])
    print(message)
    print('- bedrooms: ', bedroom)
    if not bedroom: 
        bedroom = 1
#     print('Number of bedrooms:', bedroom)

    if int(price) >= 1600:
        price = int(price) // int(bedroom)
        
    price_list.append(int(price))
    bedrooms_list.append(int(bedroom))
    isLookingFor_list.append(is_looking(message))

In [None]:
price_list[:10], bedrooms_list[:10], isLookingFor_list[:10]

In [None]:
len(price_list), len(bedrooms_list)

In [None]:
data = {'price': price_list, 'bedroom': bedrooms_list, 'isLookingFor': isLookingFor_list}

In [None]:
[isinstance(item, int) for item in bedrooms_list[:5]]

In [None]:
df = pd.DataFrame(data)
df.to_excel('housing_list.xlsx', index=False)
df.head()

In [None]:
df.drop(df[(df.price < 300) | (df.price > 5000)].index, inplace=True)
df.drop(df[df.bedroom > 7].index, inplace=True)
# df.loc[df.my_channel > 20000, 'my_channel'] = 0

In [None]:
df.groupby('isLookingFor').get_group(True).groupby('bedroom').describe()

In [None]:
df.groupby('bedroom').get_group(2)

In [None]:
# by_bedroom = df.groupby('bedroom')
df.groupby('bedroom').describe()

In [None]:
df.groupby('bedroom').describe()

In [None]:
by_bedroom = df.groupby('bedroom')

In [None]:
bedroom_labels = []
bedroom_values = []

price_by_bedroom = {}

In [None]:
by_bedroom

In [None]:
for idx, frame in by_bedroom:
    bedroom_labels.append(idx)
    bedroom_values.append(by_bedroom.size()[idx])
    
    price_by_bedroom[idx] = by_bedroom.mean().loc[idx, 'price']
#     print(f"{by_bedroom.get_group(idx)}")
#     print("------------------------")

In [None]:
price_by_bedroom

In [None]:
colors = ['#ffadad', '#ffd6a5', '#fdffb6', '#caffbf']
explode = [0.03] * 4
wedgeprops = {'width': 0.6, 'edgecolor':'w', 'linewidth':2}
plt.pie(bedroom_values, labels=bedroom_labels, colors=colors, explode=explode, wedgeprops=wedgeprops)
plt.title("Number of Bedrooms")
plt.show()

In [None]:
plt.bar(price_by_bedroom.keys(), price_by_bedroom.values())
plt.ylim(0, 4000)
plt.title("Price by Number of bedrooms")
plt.xlabel('Number of bedrooms')
plt.ylabel('Price')
plt.show()

#### other things to consider: 
- number of bedrooms
- number of washrooms 
- number of roommates
- location (how far from uw?)
