In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import regex as re
import urllib
from requests_html import HTMLSession
import math

In [4]:
def get_source(url):
    try:
        session = HTMLSession()
        response = session.get(url)
        return response

    except requests.exceptions.RequestException as e:
        print(e)
        
def scrape_google(query):

    query = urllib.parse.quote_plus(query)
    response = get_source("https://www.google.co.uk/search?q=" + query)

    links = list(response.html.absolute_links)
    google_domains = ('https://www.google.', 
                      'https://google.', 
                      'https://webcache.googleusercontent.', 
                      'http://webcache.googleusercontent.', 
                      'https://policies.google.',
                      'https://support.google.',
                      'https://maps.google.')

    for url in links[:]:
        if url.startswith(google_domains):
            links.remove(url)
    links = [x for x in links if 'redfin' in x]
    return links
### Calculates Mortgage

def calc_mortgage(principal, interest, years):
        '''
        given mortgage loan principal, interest(%) and years to pay
        calculate and return monthly payment amount
        '''
        # monthly rate from annual percentage rate
        interest_rate = interest/(100 * 12)
        # total number of payments
        payment_num = years * 12
        # calculate monthly payment
        payment = principal * \
            (interest_rate/(1-math.pow((1+interest_rate), (-payment_num))))
        return payment

### Calculates Down Payment

def downpayment(price, percent):
        downpayment = (price * (percent/100))
        return downpayment

### Calculates Cap Rate

def capRate(income, price):
        return income / price

### Calculates Cash On Cash

def cashOnCash(profit, downpayment):
        return profit / downpayment

### Calculates Monthly Payment

def monthlypayment(homeInsurance, propertyTaxes, hoa, mortgage, otherExpenses, propertyManagementFee, vacancyRate):
        monthlyPayment = homeInsurance + propertyTaxes + hoa + mortgage + otherExpenses + propertyManagementFee + vacancyRate
        return monthlyPayment

### Vacancy Rate

def vacancyRate(state):
    vacancy = pd.read_csv('~\Calc\Data\RentalVacancy.csv')
    vacancy = pd.DataFrame(data = vacancy, columns=['State', 'Vacancy Rate'])
    vacancy = vacancy.loc[vacancy['State'] == state]
    vacancyRate = vacancy['Vacancy Rate']
    index = vacancyRate.index.tolist()
    vacancyRate = float(str(vacancyRate[index[0]]).replace('%', '')) 
    return vacancyRate / 100

In [5]:
request_headers = {
    'accept': 
    'text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, image/apng,*/*;q=0.8',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US, en;q=0.8',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
}
with requests.Session() as session:
    url = str(scrape_google(str(input("Enter address: ")))[0])
    response = session.get(url, headers = request_headers)

soup = BeautifulSoup(response.content, 'html.parser')
### Address

street = soup.find('meta', {'name': 'twitter:text:street_address'})
street = street['content']
city = soup.find('meta', {'name': 'twitter:text:city'})['content']
state = soup.find('meta', {'name': 'twitter:text:state_code'})['content']
zipcode = soup.find('meta', {'name': 'twitter:text:zip'})['content']
latitude = str(soup.find('meta', {'name': 'geo.position'})['content']).split(';')[0]
longitude = str(soup.find('meta', {'name': 'geo.position'})['content']).split(';')[1]
address = street + ', ' + city + ', ' + state + ' ' + zipcode
neighborhood = '1'

### Beds

beds = int(soup.find('meta', {'name': 'twitter:text:beds'})['content'])

### Baths

baths = float(soup.find('meta', {'name': 'twitter:text:baths'})['content'])

### SQFT

sqft = int(str(soup.find('meta', {'name': 'twitter:text:sqft'})['content']).replace(',', ''))

### Property Type

propertyType = soup.find_all('div', {'class':'keyDetail font-weight-roman font-size-base'})
propertyType = str(propertyType).split('<div class')
propertyType = str([x for x in propertyType if 'Property' in x][0]).replace('="keyDetail font-weight-roman font-size-base"><span class="header font-color-gray-light inline-block">Property Type</span><span class="content text-right">','').replace('</span></div>, ', '')

# Financials

### Price

price = [td.findAll('div') for td in soup.findAll('div', attrs={'class': 'stat-block price-section'})][0][0]
price = price.text.replace('$','').replace(',', '')
price = float(price)

### Property Taxes

propertyTaxes = str(soup.find_all('div', attrs={'class': 'Row col-12 col-xl-6 padding-bottom-medium font-weight-roman font-size-base'})).split('<svg')
propertyTaxes = str([x for x in propertyTaxes if 'blue' in x][0])
propertyTaxes = float(re.search(r'([£$€])(\d+(?:\.\d{2})?)', propertyTaxes).groups()[1])

### Home Insurance

homeInsurance = str(soup.find_all('div', attrs={'class': 'Row col-12 col-xl-6 padding-bottom-medium font-weight-roman font-size-base'})).split('<svg')
homeInsurance = str([x for x in homeInsurance if 'yellow' in x][0])
homeInsurance = float(re.search(r'([£$€])(\d+(?:\.\d{2})?)', homeInsurance).groups()[1])

### HOA

a = str(soup.find_all('div', attrs={'class': 'Row col-12 col-xl-6 padding-bottom-medium font-weight-roman font-size-base'})).split('<svg')
b = [x for x in a if 'red' in x]
if len(b) == 0:
    hoa = 0
elif len(b) >= 1:
    b = str(b[0])
    hoa = float(re.search(r'([£$€])(\d+(?:\.\d{2})?)', b).groups()[1])

### Calculates Mortgage & DownPayment

percent = float(input("Down Payment Percent: "))
downPayment = downpayment(price, percent)

# mortgage loan principal
principal = price - downPayment
# percent annual interest
interest = float(input("Interest: "))
if interest == None:
    interest = 4
# years to pay off mortgage
years = int(input("Years: "))
# calculate monthly payment amount
mortgage = float(round(calc_mortgage(principal, interest, years), 2))
print("Down payment:", "${:,.2f}".format(downPayment))
print("Mortgage:", "${:,.2f}".format(mortgage))

# Long Term

address0 = street.replace(' ', '-') + '-' + city.replace(' ', '-') + '-' + state.lower().replace(' ',  '-')+ '-' + zipcode

if beds >= 6:
    beds1 = 6

propertyManagement = True

url = "https://www.zillow.com/rental-manager/price-my-rental/results/" + address0 + '/'
results = requests.get(url)
soup1 = BeautifulSoup(results.content, 'html.parser')

otherExpenses = float(input("Other Expenses: "))
if otherExpenses == None:
    otherExpenses = 0

### Gets Rent

traditionalincome = soup1.find('h2', {'class': 'Text-c11n-8-23-1__aiai24-0 cpSRFk'}).text.replace('$', '').replace(',', '').replace('/moEdit', '')
if(len(traditionalincome) == 0):
    traditionalincome = None
else:
    traditionalincome = float(traditionalincome)
    
if propertyManagement == True:
    propertyManagementFee = traditionalincome * .1
else:
    propertyManagementFee = 0
    
vacancyrate = traditionalincome * vacancyRate(state)

monthlyPayment = monthlypayment(homeInsurance, propertyTaxes, hoa, mortgage, otherExpenses, propertyManagementFee, vacancyrate)
print("Monthly Payment:", "${:,.2f}".format(monthlyPayment))

traditionalProfit = traditionalincome - monthlyPayment
traditionalCapRate = round((traditionalincome / price) * 100, 2)
traditionalCashOnCash = round((traditionalProfit / downPayment)* 100, 2)
print("Long Term Income:", "${:,.2f}".format(traditionalincome))

# Short Term

request_headers = {
    'accept': 
    'text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, image/apng,*/*;q=0.8',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US, en;q=0.8',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
    }
with requests.Session() as session:
    url= "https://www.mashvisor.com/cities/" + state.lower().replace(' ', '') + "/" + city.replace(' ', '-') + "-investment-property-guide"
    response = session.get(url, headers = request_headers)
soup2 = BeautifulSoup(response.content, 'html.parser')

### Calculates Closing Costs & All In

closingCosts = principal * .05
allin = downPayment + closingCosts

### Picks Price / Beds

if (beds <= 1):
    airbnbincome = pd.read_html(url) # Returns list of all tables on page
    airbnbincome = float(airbnbincome[0].iloc[0].iloc[1].replace('$ ', '').replace(',', ''))
elif (beds == 2):
    airbnbincome = pd.read_html(url) # Returns list of all tables on page
    airbnbincome = float(airbnbincome[0].iloc[0].iloc[2].replace('$ ', '').replace(',', ''))
elif (beds == 3):
    airbnbincome = pd.read_html(url) # Returns list of all tables on page
    airbnbincome = float(airbnbincome[0].iloc[0].iloc[3].replace('$ ', '').replace(',', ''))
elif (beds >= 4):
    airbnbincome = pd.read_html(url) # Returns list of all tables on page
    airbnbincome = float(airbnbincome[0].iloc[0].iloc[4].replace('$ ', '').replace(',', ''))
else:
    airbnbincome = None
        
airbnbProfit = airbnbincome - monthlyPayment
airbnbCapRate = round((airbnbincome / price) * 100, 2)
airbnbCashOnCash = round((airbnbProfit / downPayment) * 100, 2)
print("Short Term Income:", "${:,.2f}".format(airbnbincome))

### Defines Rules

fiftyPercent = traditionalincome / 2
breakEvenLT = (allin / traditionalProfit) / 12
breakEvenST = (allin / airbnbProfit) / 12
twoPercent = True

### Gets Desired Cash Flow, Cap Rate, & Cash On Cash

cashFlow = float(input("Enter desired Cash Flow:"))
capRate = float(input("Enter desired Cap Rate:"))
cashOnCash = float(input("Enter desired Cash On Cash:"))

request_headers = {
    'accept': 
    'text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, image/apng,*/*;q=0.8',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US, en;q=0.8',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
    }
with requests.Session() as session:
    url = 'https://datausa.io/profile/geo/' + city.replace(' ', '-').lower() + '-' + state.replace(' ', '-').lower() + '#housing'
    response = session.get(url, headers = request_headers)

soup1 = BeautifulSoup(response.content, 'html.parser')

propertyValue = soup1.findAll('div', {'class': 'topic property_value TextViz'})
propertyValue = str(propertyValue[0]).split('div')
propertyValue = str([x for x in propertyValue if 'stat-value' in x][0])
propertyValue = re.findall('\d+', propertyValue)
propertyValue = float(''.join(propertyValue))
propertyValue

commute = soup1.findAll('div', {'class': 'topic commute_time TextViz'})
commute = str(commute[0]).split('div')
commute = str([x for x in commute if 'stat-value' in x][0])
commute = re.findall('\d+', commute)
commute = float('.'.join(commute))

request_headers = {
    'accept': 
    'text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, image/apng,*/*;q=0.8',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US, en;q=0.8',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
    }
with requests.Session() as session:
    url = 'https://datausa.io/profile/geo/' + city.replace(' ', '-').lower() + '-' + state.replace(' ', '-').lower() + '#housing'
    response = session.get(url, headers = request_headers)

r = requests.get('https://datausa.io/api/data?drilldowns=State&measures=commute_time&year=latest')
r.content

own = soup1.findAll('div', {'class': 'topic rent_own TextViz'})
own = str(own[0]).split('div')
own = str([x for x in own if 'stat-value' in x][0])
own = re.findall('\d+', own)
own = float('.'.join(own))
own

rent = 100 - own

request_headers = {
    'accept': 
    'text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, image/apng,*/*;q=0.8',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US, en;q=0.8',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
    }
with requests.Session() as session:
    url = 'https://www.areavibes.com/' + city.replace(' ', '+').lower() + '-' + state.replace(' ', '-').lower() + '/livability/#ratings-jmp'
    response = session.get(url, headers = request_headers)

soup2 = BeautifulSoup(response.content, 'html.parser')

##### Each category (amenities, cost of living, crime, employment, housing, schools and user ratings) is scored individually based on data in the methodology below. A letter grade is assigned to each category in order to easily evaluate the score.

score = str(soup2.find_all('nav', {'class': 'category-menu-new'})).split('<a')
score = str([x for x in score if '/livability/' in x][0])
score = re.findall('\d+', score)
score = float('.'.join(score))
# Logic

if traditionalProfit <= 0:
    breakEvenLT = "Never"
else:
    breakEvenLT = "{:.2f} Year(s)".format(breakEvenLT)

if airbnbProfit <= 0:
    breakEvenST = "Never"
else:
    breakEvenST = "{:.2f} Year(s)".format(breakEvenST)
    
if twoPercent == True:
    twoPercent = price * .02
    
if (traditionalincome == twoPercent):
    twoPercent = True
else:
    twoPercent = False

if traditionalProfit != None:
    traditionalProfitString = "${:.2f}".format(traditionalProfit)

if airbnbProfit != None:
    airbnbProfitString = "${:.2f}".format(airbnbProfit)

if traditionalCapRate != None:
    traditionalCapRateString = "{:.2f}%".format(traditionalCapRate)

if airbnbCapRate != None:
    airbnbCapRateString = "{:.2f}%".format(airbnbCapRate)

if traditionalCashOnCash != None:
    traditionalCashOnCashString = "{:.2f}%".format(traditionalCashOnCash)

if airbnbCashOnCash != None:
    airbnbCashOnCashString = "{:.2f}%".format(airbnbCashOnCash)

traditionalincomeString = "${:,.2f}".format(traditionalincome)
airbnbincomeString = "${:,.2f}".format(airbnbincome)
fiftyPercentString = "${:,.2f}".format(fiftyPercent)


if (airbnbProfit >= cashFlow) and (airbnbCapRate >= capRate) and (airbnbCashOnCash >= cashOnCash) and (twoPercent == True):
    airbnbGoodDeal = 1
else:
    airbnbGoodDeal = False

if(traditionalProfit >= cashFlow) and (traditionalCapRate >= capRate) and (traditionalCashOnCash >= cashOnCash) and (twoPercent == True):
    traditionalGoodDeal = 1
elif(traditionalProfit >= cashFlow) or (traditionalCapRate >= capRate) or (traditionalCashOnCash >= cashOnCash):
    traditionalGoodDeal = 2
elif(traditionalProfit >= cashFlow):
    traditionalGoodDeal = 3
else:
    traditionalGoodDeal = 4

if (airbnbProfit >= cashFlow) and (airbnbCapRate >= capRate) and (airbnbCashOnCash >= cashOnCash):
    airbnbGoodDeal = True
else:
    airbnbGoodDeal = False

if (traditionalProfit >= cashFlow) and (traditionalCapRate >= capRate) and (traditionalCashOnCash >= cashOnCash):
    traditionalGoodDeal = True
else:
    traditionalGoodDeal = False

if traditionalGoodDeal == 1:
    traditionalGoodDeal = 'A'
elif traditionalGoodDeal == 2:
    traditionalGoodDeal = 'B'
elif traditionalGoodDeal == 3:
    traditionalGoodDeal = 'C'
elif traditionalGoodDeal == 4:
    traditionalGoodDeal = 'D'
else:
    traditionalGoodDeal = 'F'

### Makes Values None if Beds or Baths Not Listed

if beds == None or baths == None:
    breakEvenLT = None
    breakEvenST = None
    fiftyPercent = None
    airbnbincome = None
    traditionalincome = None
    airbnbProfit = None
    airbnbCapRate = None
    airbnbCashOnCash = None
    traditionalProfit = None
    traditionalCapRate = None
    traditionalCashOnCash = None
    traditionalGoodDeal = None
    airbnbGoodDeal = None

Enter address: 4047 Bremo Recess


IndexError: list index out of range

In [8]:
# Summary Report

print("SUMMARY for", address)
dashes = []
for element in address:
    e = dashes.append('-')
dashes = ''.join(dashes)
print('-----------', dashes)
print("Neighborhood:", neighborhood)
print("Property Type:", propertyType)
print(beds, "Beds")
print(baths, "Baths")
print(sqft, "sqft")
print("Price:", "${:,.2f}".format(price))
print("Property Taxes:", "${:,.2f}".format(propertyTaxes))
print("Home Insurance:", "${:,.2f}".format(homeInsurance))
print("HOA:", "${:,.2f}".format(hoa))
print("Other Expenses:", "${:,.2f}".format(otherExpenses))
if propertyManagement == True:
    print("Management Fee: ", "${:,.2f}".format(propertyManagementFee))
else:
    pass
print("Monthly Payment:", "${:,.2f}".format(monthlyPayment))
dashes = []
for element in str(monthlyPayment):
    e = dashes.append('-')
dashes = ''.join(dashes)
print('----------------', dashes)
print("Down payment:", "${:,.2f}".format(downPayment))
print("Down payment percent:", "{:.1f}%".format(percent))
print("Interest:", "{:.2f}%".format(interest))
print(years, "Years")
print("Mortgage:", "${:,.2f}".format(mortgage))
dashes = []
for element in str(mortgage):
    e = dashes.append('-')
dashes = ''.join(dashes)
print('---------', dashes)
print("Long Term Cash Flow:", traditionalProfitString)
print("Long Term Break Even Time:", breakEvenLT)
print("Long Term Cap Rate:", traditionalCapRateString)
print("Long Term Cash on Cash:", traditionalCashOnCashString)
print("Long Term Cap Rate:", traditionalCapRateString)
print("Long Term Cash on Cash:", traditionalCashOnCashString)
print("Short Term Cash Flow:", airbnbProfitString)
print("Short Term Break Even Time:", breakEvenST)
print("Short Term Cap Rate:", airbnbCapRateString)
print("Short Term Cash on Cash:", airbnbCashOnCashString)
print("Long Term Cash Flow:", traditionalProfitString)
print("Long Term Grade:", traditionalGoodDeal)
print("Short Term Grade:", airbnbGoodDeal)

SUMMARY for 4047 Bremo Recess, New Albany, OH 43054
----------- ---------------------------------------
Neighborhood: 1
Property Type: Single Family Residential
4 Beds
5.0 Baths
3700 sqft
Price: $768,320.00
Property Taxes: $468.00
Home Insurance: $199.00
HOA: $0.00
Other Expenses: $0.00
Management Fee:  $375.50
Monthly Payment: $3,806.64
---------------- -------
Down payment: $153,664.00
Down payment percent: 20.0%
Interest: 3.00%
30 Years
Mortgage: $2,591.41
--------- -------
Long Term Cash Flow: $-51.64
Long Term Break Even Time: Never
Long Term Cap Rate: 0.49%
Long Term Cash on Cash: -0.03%
Long Term Cap Rate: 0.49%
Long Term Cash on Cash: -0.03%
Short Term Cash Flow: $-1371.64
Short Term Break Even Time: Never
Short Term Cap Rate: 0.32%
Short Term Cash on Cash: -0.89%
Long Term Cash Flow: $-51.64
Long Term Grade: F
Short Term Grade: False
