In [121]:
import re
import csv
# import requests
import time
from selenium import webdriver
# from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from datetime import date
import os
from os import path
# import save_screenshot

from PIL import Image
from io import BytesIO

markets = {
    'dallas' : 'Dallas',
    'fort-worth' : 'Fort-Worth',
    'plano' : 'Plano',
    'irving' : 'Irving',
    'arlington' : 'Arlington',
    'frisco' : 'Frisco',
    'grand-prairie' : 'Grand Prairie',
    'mckinney' : 'McKinney',
    'denton' : 'Denton',    
    'richardson' : 'Richardson'
}
maps_save_path = 'maps/'
screenshots_save_path = 'screenshots/'
chrome_driver_path = 'chromedriver/chromedriver.exe'
market_csv_file = 'airdna_dfw_market_metrics.csv'
header = [
    'Market',
    'Active Rentals',
    'Rental Demand Score',
    'Average Daily Rate',
    'Occupancy Rate %',
    'Monthly Revenue',
    'Entire Home Rentals',
    'Entire Home Rentals %',
    'Private Room Rentals',
    'Private Room Rentals %',
    'Shared Room Rentals',
    'Shared Room Rentals %',
    'Studio',
    'Studio %',
    '1 Bedroom',
    '1 Bedroom %',
    '2 Bedroom',
    '2 Bedroom %',
    '3 Bedroom',
    '3 Bedroom %',
    '4 Bedroom',
    '4 Bedroom %',
    '5+ Bedroom',
    '5+ Bedroom %',
    'Air Conditioning',
    'Heating',
    'Washer',
    'Dryer',
    'Kitchen',
    'Parking',
    'Internet',
    'Cable TV',
    'Pool',
    'Hot Tub',
    'Q1 19',
    'Q2 19',
    'Q3 19',
    'Q4 19',
    '2019 Growth %',
    'Q1 20',
    'Q2 20',
    '2020 Growth %',
    '181 - 270 Days Available',
    '181 - 270 Days Available %',
    '271 - 365 Days Available',
    '271 - 365 Days Available %',
    '181 - 270 Days Booked',
    '181 - 270 Days Booked %',
    '271 - 365 Days Booked',
    '271 - 365 Days Booked %',
    'Rental Activity Property Count',
    'Accuracy',
    'Check In',
    'Cleanliness',
    'Communication',
    'Location',
    'Value',
    'Overall Rating',
    'Map Url'
]

def full_screenshot(driver, save_path):
    # initiate value
    save_path = save_path + '.png' if save_path[-4::] != '.png' else save_path
    img_li = []  # to store image fragment
    offset = 0  # where to start

    # js to get height
    height = driver.execute_script('return Math.max('
                                   'document.documentElement.clientHeight, window.innerHeight);')

    # js to get the maximum scroll height
    # Ref--> https://stackoverflow.com/questions/17688595/finding-the-maximum-scroll-position-of-a-page
    max_window_height = driver.execute_script('return Math.max('
                                              'document.body.scrollHeight, '
                                              'document.body.offsetHeight, '
                                              'document.documentElement.clientHeight, '
                                              'document.documentElement.scrollHeight, '
                                              'document.documentElement.offsetHeight);')

    # looping from top to bottom, append to img list
    # Ref--> https://gist.github.com/fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb
    while offset < max_window_height:

        # Scroll to height
        driver.execute_script(f'window.scrollTo(0, {offset});')
        img = Image.open(BytesIO((driver.get_screenshot_as_png())))
        img_li.append(img)
        offset += height

    # Stitch image into one
    # Set up the full screen frame
    img_frame_height = sum([img_frag.size[1] for img_frag in img_li])
    img_frame = Image.new('RGB', (img_li[0].size[0], img_frame_height))
    offset = 0
    for img_frag in img_li:
        img_frame.paste(img_frag, (0, offset))
        offset += img_frag.size[1]
    img_frame.save(save_path)

def write_data_to_csv(data, mode): 
    with open(market_csv_file, mode, newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(data)

def setUp():
    # Prep chromedriver
    try:
        path.exists(r'chromedriver/chromedriver.rename')
        os.rename(r'chromedriver/chromedriver.rename',r'chromedriver/chromedriver.exe')
    except:
        print("Notice: The file chromedriver/chromedriver.rename does not exist")
        
    # Setup Screenshot Folders
    if not os.path.exists(screenshots_save_path):
        os.makedirs(screenshots_save_path)
    if not os.path.exists(maps_save_path):
        os.makedirs(maps_save_path)
        
    # CSV Header (Overwrite)
    write_data_to_csv(header, 'w')
    
def tearDown():
    #Rename chromedriver
    os.rename(r'chromedriver/chromedriver.exe',r'chromedriver/chromedriver.rename')
    print('Completed')

def clean_int(string):
    string_list = re.findall(r'\d+', string.replace(',', ''))
    if len(string_list):
        return int(string_list[0])
    return 0

def clean_int_with_pct(string):
    string_list = re.findall(r'\d+', string.replace(',', ''))
    makeInt = lambda x: int(x)
    string_list = list(map(makeInt, string_list));
    return string_list

def clean_float(string):
    string_list = string.replace(',', '')
    if len(string_list):
        return float(string_list)
    return 0.0

def scrape_int(driver, element_id, by = 'xpath'):
    if by == 'xpath':
        return clean_int(driver.find_element_by_xpath(element_id).text)
    return clean_int(driver.find_element_by_class_name(element_id).text)

def scrape_float(driver, element_id, by = 'xpath'):
    if by == 'xpath':
        return clean_float(driver.find_element_by_xpath(element_id).text)
    return clean_float(driver.find_element_by_class_name(element_id).text)

def calc_basic_growth(initial, final):
    return round(((final - initial)/initial)*100)

def scrape_market_data(market = '', maximize_screen_size = False):
    if not market:
        for key in markets:
            scrape_market_data(key, maximize_screen_size)
    else:
        url='https://www.airdna.co/vacation-rental-data/app/us/texas/{}/overview'.format(market)
        driver = webdriver.Chrome(executable_path = chrome_driver_path)
        driver.get(url)
        
        active_rentals_class = 'section__header'
        rental_demand_class = 'market-health-section__right-axis-text'
        average_daily_rate_xpath = '//*[@id="app"]/div[2]/div[2]/div/div/div/div[2]/div/div[1]/div[1]/div[1]'
        occupancy_rate_xpath = '//*[@id="app"]/div[2]/div[2]/div/div/div/div[2]/div/div[2]/div[1]/div[1]'
        revenue_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[2]/div/div[3]/div[1]/div[1]'
        rental_type_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[1]/div[2]/div/div/div/p[2]/p'
        rental_size_tooltip_xpath = '//*[@id="app"]/div[2]/div[2]/div/div/div/div[3]/div[3]/div/div/div/div/p[2]/p'
        amenities_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[5]/div/div/div/div/p/p'
        rental_growth_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[4]/div/div/div/div/p[2]/p'
        days_available_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[7]/div/div[1]/div/div/div/p[2]'
        days_booked_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[7]/div/div[2]/div/div/div/p[2]'
        ratings_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[6]/div/div[1]/div/div/p/p'
        overall_rating_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[6]/div/div[2]/div[1]/span'

        if maximize_screen_size:
            driver.maximize_window()
            average_daily_rate_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[2]/div/div[1]/div[1]/div[1]'
            occupancy_rate_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[2]/div/div[2]/div[1]/div[1]'
            rental_size_tooltip_xpath = '//*[@id="app"]/div[2]/div/div/div/div/div[3]/div[3]/div/div/div/div/p[2]/p'

        time.sleep(5)
        if market in markets:
            market_name = markets[market]
        else:
            market_name = market
        market_data = [market_name]

        active_rentals = scrape_int(driver, active_rentals_class, 'class')
        market_data.append(active_rentals)

        rental_demand = scrape_int(driver, rental_demand_class, 'class')
        market_data.append(rental_demand)

        average_daily_rate = scrape_int(driver, average_daily_rate_xpath)
        market_data.append(average_daily_rate)

        occupancy_rate = scrape_int(driver, occupancy_rate_xpath)
        market_data.append(occupancy_rate)

        average_revenue = scrape_int(driver, revenue_xpath)
        market_data.append(average_revenue)

        # Get chart data
        charts = [];
        charts = driver.find_elements_by_class_name("chart__bar--empty");
#         print(len(charts))

        def get_chart_value(element_key, element_xpath, with_pct = False, float_values = False):
            try:
                hover = ActionChains(driver).move_to_element(charts[element_key])
                hover.perform()
            except IndexError:
                pass
            
            if with_pct:
                try:
                    return clean_int_with_pct(driver.find_element_by_xpath(element_xpath).text)
                except NoSuchElementException:
                    pass                
            else:
                if float_values:
                    return scrape_float(driver, element_xpath)
                return scrape_int(driver, element_xpath)

        rental_type = {
            7 : 'Entire Home',
            6 : 'Private Room',
            5 : 'Shared Room'
        }
        for key in rental_type:
            count = get_chart_value(key, rental_type_tooltip_xpath, True)
            market_data.append(count[0]) if count else market_data.append(0)
            market_data.append(count[1]) if count else market_data.append(0)

        rental_size = {
            8  : 'Studio',
            9  : '1 Bedroom',
            10 : '2 Bedroom',
            11 : '3 Bedroom',
            12 : '4 Bedroom',
            13 : '5+ Bedroom'
        }
        for key in rental_size:
            count = get_chart_value(key, rental_size_tooltip_xpath, True)
            market_data.append(count[0]) if count else market_data.append(0)
            market_data.append(count[1]) if count else market_data.append(0)

        amenities = {
            23 : 'Air Conditioning',
            22 : 'Heating',
            21 : 'Washer',
            20 : 'Dryer',
            19 : 'Kitchen',
            18 : 'Parking',
            17 : 'Internet',
            16 : 'Cable TV',
            15 : 'Pool',
            14 : 'Hot Tub'
        }
        for key in amenities:
            count = get_chart_value(key, amenities_tooltip_xpath)                
            market_data.append(count) if count else market_data.append(0)

        quarters = {
            7  : 'Q1 19',
            8  : 'Q2 19',
            9  : 'Q3 19',
            10 : 'Q4 19',
            11 : 'Q1 20',
            12 : 'Q2 20'
        }
        charts = [];
        charts = driver.find_elements_by_class_name("chart__dot");
        q1_count = 0
        for key in quarters:
            count = get_chart_value(key, rental_growth_tooltip_xpath)
            market_data.append(count) if count else market_data.append(0)
            if (key == 7 or key == 11):
                q1_count = count
            if (key == 10 or key == 12):
                market_data.append(calc_basic_growth(q1_count, count))

        # Rental Activity
        charts = [];
        charts = driver.find_elements_by_class_name("chart__arc");
        days_available = {
            2 : '181 - 270 Days Available',
            3 : '271 - 365 Days Available'
        }
        for key in days_available:
            count = get_chart_value(key, days_available_tooltip_xpath, True)
            market_data.append(count[0]) if count else market_data.append(0)
            market_data.append(count[1]) if count else market_data.append(0)
#             if count:
#                 print('{}: {} {}%'.format(days_available[key], count[0], count[1]))

        days_booked = {
            6 : '181 - 270 Days Booked',
            7 : '271 - 365 Days Booked'
        }
        for key in days_booked:
            count = get_chart_value(key, days_booked_tooltip_xpath, True)
            market_data.append(count[0]) if count else market_data.append(0)
            market_data.append(count[1]) if count else market_data.append(0)
#             if count:
#                 print('{}: {} {}%'.format(days_booked[key], count[0], count[1]))

        rental_activity_prop_count = 0
        for x in range(4):
            rental_activity_prop_count += get_chart_value(x, days_available_tooltip_xpath)
        market_data.append(rental_activity_prop_count)

        ratings = {
            0 : 'Accuracy',
            1 : 'Check In',
            2 : 'Cleanliness',
            3 : 'Communication',
            4 : 'Location',
            5 : 'Value'
        }
        charts = [];
        charts = driver.find_elements_by_class_name("chart__radar-point");
        for key in ratings:
            count = get_chart_value(key, ratings_tooltip_xpath, False, True)
            market_data.append(count) if count else market_data.append(0)

        # Overall Rating
        overall_rating = scrape_float(driver, overall_rating_xpath)
        market_data.append(overall_rating)

        # Get Full Page Screenshot (for logging purposes)
        driver.set_window_size(945, 1020)
        time.sleep(5)
        datestamp = date.today().strftime("%Y%m%d")
        entire_page_image = '{}{}_{}'.format(screenshots_save_path, market, datestamp)
        full_screenshot(driver, entire_page_image)
        
        # Get Map Screenshot
        mapContainer = driver.find_element_by_class_name("mapContainer");
        location = mapContainer.location;
        size = mapContainer.size;
        # crop image
        x = location['x'];
        y = location['y'];
        width = location['x']+size['width'];
        height = location['y']+size['height'];
        im = Image.open(entire_page_image + '.png')
        im = im.crop((int(x), int(y), int(width), int(height)))
        market_map = '{}{}.png'.format(maps_save_path, market)
        im.save(market_map)
        market_data.append(market_map)

        write_data_to_csv(market_data, 'a')        
        driver.close()

setUp()
scrape_market_data('', True)
tearDown()

Completed
