In [1]:
from bs4 import BeautifulSoup
import requests

In [180]:
def get_soup(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'}
    page = requests.get(url, headers=headers)
    soup = BeautifulSoup(page.content, 'html.parser')
    soup.prettify()
    return soup

def clean_string(string: str) -> str:
    string = clean_escapes(string)
    
    return string

def clean_escapes(string: str):
    string = string.replace('/','')
    string = string.replace('/n','')
    string = string.replace('\n','')
    string = string.replace('\r','')

    return string

def convert_li(li: list):
    pass

def goodtx_get_all_data(soup: BeautifulSoup) -> dict:
    all_data = {}
    all_data['name'] = goodtx_get_name(soup)
    all_data['writing_sample'] = goodtx_get_writing_sample(soup)
    all_data['issues'] = goodtx_get_tx_issues(soup)
    all_data['orientations'] = goodtx_get_orientations(soup)
    all_data['services'] = goodtx_get_services(soup)
    all_data['ages'] = goodtx_get_client_ages(soup)
    all_data['professions'] = goodtx_get_professions(soup)
    all_data['credential'] = goodtx_get_primary_credential(soup)
    all_data['license_status'] = goodtx_get_license_status(soup)
    all_data['website'] = goodtx_get_website(soup)
    all_data['address'] = goodtx_get_address(soup)
    all_data['phone'] = goodtx_get_phone(soup)
    
    return all_data

def goodtx_get_name(soup: BeautifulSoup) -> str:
    name = soup.find('h1', id='profileTitle_id').contents[1].get_text()
    return clean_escapes(name)

def goodtx_get_writing_sample(soup: BeautifulSoup) -> str:
    desc = soup.find_all('div', class_='profileBottomLeft')
    all_text = desc[0].find_all('div', class_='text')
    good_stuff = []
    for txt in all_text:
        for child in txt.children:
            if(child.name == 'p'):
                good_stuff.append(child.get_text())

    good_stuff_st = ''.join(good_stuff)
    return good_stuff_st

def goodtx_get_tx_issues(soup: BeautifulSoup)-> list:
    issues_html = soup.find_all('ul', id='issuesData')
    issues_list = list(issues_html[0].children)
    
    ##if want to return string instead
    # issues_str = issues_html[0].get_text()
    # clean_str = clean_string(issues_str)
    
    return issues_list

def goodtx_get_orientations(soup: BeautifulSoup)-> list:
    orientations_html = soup.find_all('ul', id='modelsData')
    orientations_list = list(orientations_html[0].children)
    
    ##if want to return string instead
    # issues_str = issues_html[0].get_text()
    # clean_str = clean_string(issues_str)
    
    return orientations_list

def goodtx_get_services(soup: BeautifulSoup)-> list:
    services_html = soup.find_all('ul', id='servicesprovidedData')
    services_list = list(services_html[0].children)
    
    return services_list

def goodtx_get_client_ages(soup: BeautifulSoup) -> list:
    ages_html = soup.find_all('ul', id='agesData')
    ages_list = list(ages_html[0].children)
    
    return ages_list

def goodtx_get_professions(soup: BeautifulSoup) -> list:
    profs_str = soup.find('span', id='professionsDefined').get_text()
    profs_list = profs_str.split(',')
    
    return [prof.strip() for prof in profs_list]

def goodtx_get_primary_credential(soup: BeautifulSoup) -> str:
    credential = credential_type_str = soup.find('span', id='licenceinfo1').get_text()
    
    return clean_escapes(credential)

def goodtx_get_license_status(soup: BeautifulSoup) -> str:
    license_status = soup.find('span', id='license_status_id').get_text()
    
    return clean_escapes(license_status)

def goodtx_get_website(soup: BeautifulSoup) -> str:
    try:
        website = soup.find('a', id='edit_website')['href']
    except:
        website = 'None'
    return website

def goodtx_get_address(soup: BeautifulSoup) -> dict:
    office = soup.find('div', id='editOffice1')
    address = {}
    
    address['street'] = soup.find('span', itemprop='streetAddress').get_text()
    address['city'] = soup.find('span', itemprop='addressLocality').get_text()
    address['state'] = soup.find('span', itemprop='addressRegion').get_text()
    address['zip'] = soup.find('span', itemprop='postalCode').get_text()
    
    return address

def goodtx_get_phone(soup: BeautifulSoup) -> str:
    phone = soup.find('span', itemprop='telephone').contents[1].contents[0].get_text()

    return phone

In [178]:
goodtx_get_phone(soup)

'303-431-5641 x 4'

In [124]:
#if __name__ == '__main__':
url = 'https://www.goodtherapy.org/therapists/profile/clare-comstock-20170911'
therapists = ['https://www.goodtherapy.org/therapists/profile/andrea-risi-20130730']
soup = get_soup(url = url)


soup_no_website = get_soup('https://www.goodtherapy.org/therapists/profile/julie-reichenberger-20200327')

In [182]:
therapist_info = goodtx_get_all_data(soup)
therapist_info

{'name': 'Clare Comstock',
 'writing_sample': 'As a therapist, I have found that there is always something trying to emerge in the symptoms we might experience. Something needing attention, or expression, or healing. You may experience problems or symptoms as disruptive thought patterns, emotional overwhelm, physical sensations, repetitive behaviors, disturbing dreams, flashbacks, or in other ways. For me, therapy is firstly a process of observing ourselves from a wider perspective. Witnessing so as to see what is needed. Noticing and naming the thoughts, emotions, and body sensations that are present so they can get worked through and unstuck; moving us back into the flow of life again. Sometimes there is a need to unhook from cultural conditioning or early childhood patterns that may be blocking our current functioning. Or a traumatic experience has disrupted our lives.\r Therapy is also about creating safe, supported ways to go into what is there, rather than continuing to use the p

In [187]:
name = goodtx_get_name(soup)
issues = goodtx_get_tx_issues(soup)
orientations = goodtx_get_orientations(soup)
services = goodtx_get_services(soup)
ages = goodtx_get_client_ages(soup)
writing_sample = goodtx_get_writing_sample(soup)
print(f'''Name:\n{therapist_info['name']}\n''')
print(f'Issues:\n{issues}\n')
print(f'Orientations:\n{orientations}\n')
print(f'Services:\n{services}\n')
print(f'Ages:\n{ages}\n')
print(f'Writing Sample:\n{writing_sample}\n')

Name:
Clare Comstock

Issues:
['\n', <li>Anxiety</li>, <li>Attachment Issues</li>, <li>Codependency / Dependency</li>, <li>Depression</li>, <li>Grief, Loss, and Bereavement</li>, <li>Individuation</li>, <li>Life Purpose / Meaning / Inner-Guidance</li>, <li>Midlife Crisis / Midlife Transition</li>, <li>Posttraumatic Stress / Trauma</li>, <li>Self-Esteem</li>, <li>Shame</li>, <li>Social Anxiety / Phobia</li>, ' ']

Orientations:
['\n', <li>Integration of different therapy models</li>, <li>Jungian Psychotherapy</li>, <li>Mindfulness-Based Interventions</li>, <li>Somatic Experiencing (SE)</li>, <li>Somatic Psychotherapy</li>, ' ']

Services:
['\n', <li>Individual Therapy &amp; Counseling</li>, ' ', <li>Telehealth</li>, ' ']

Ages:
['\n', <li>Adults</li>, ' ', <li>Elders</li>, ' ']

Writing Sample:
As a therapist, I have found that there is always something trying to emerge in the symptoms we might experience. Something needing attention, or expression, or healing. You may experience proble

#postgres database and table creation

CREAtE TABLE 