In [1]:

%pprint

Pretty printing has been turned OFF



---
# Load needed libraries and functions

In [2]:

import sys

# Insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../py')

In [3]:

%matplotlib inline
from datetime import datetime
from neo4j.exceptions import ServiceUnavailable
import humanize
import matplotlib.pyplot as plt
import time
import winsound

bin_count = 12
duration = 1000  # milliseconds
freq = 880  # Hz
height_inches = 3.0
width_inches = 18.0

In [4]:

t0 = time.time()

# Get the Neo4j driver
from storage import Storage
s = Storage()

from ha_utils import HeaderAnalysis
ha = HeaderAnalysis(verbose=False)

from scrape_utils import WebScrapingUtilities
wsu = WebScrapingUtilities(s=s)
uri = wsu.secrets_json['neo4j']['connect_url']
user =  wsu.secrets_json['neo4j']['username']
password = wsu.secrets_json['neo4j']['password']

# Get the neo4j object
from cypher_utils import CypherUtilities
cu = CypherUtilities(uri=uri, user=user, password=password, driver=None, s=s, ha=ha)

try:
    
    version_str = cu.driver.get_server_info().agent
    print(f'======== {version_str} ========')
    
    from hc_utils import HeaderCategories
    hc = HeaderCategories(cu=cu, verbose=False)
    
    # 400 6,094 37 minutes and 50 seconds
    # 800 6,094 44 minutes and 18 seconds
    from lr_utils import LrUtilities
    lru = LrUtilities(ha=ha, cu=cu, hc=hc, verbose=False)
    lru.build_isheader_logistic_regression_elements(verbose=False)
    lru.build_pos_logistic_regression_elements(sampling_strategy_limit=800, verbose=True)
    lru.build_isqualified_logistic_regression_elements(sampling_strategy_limit=5_000, verbose=False)
    
    from crf_utils import CrfUtilities
    crf = CrfUtilities(ha=ha, hc=hc, cu=cu, lru=lru, verbose=True)
    
    from section_utils import SectionUtilities
    su = SectionUtilities(s=s, ha=ha, cu=cu, crf=crf, verbose=False)
    
    import warnings
    warnings.filterwarnings('ignore')
except ServiceUnavailable as e:
    print('You need to start Neo4j as a console')
    raise
except Exception as e:
    print(f'{e.__class__}: {str(e).strip()}')
duration_str = humanize.precisedelta(time.time() - t0, minimum_unit='seconds', format='%0.0f')
winsound.Beep(freq, duration)
print(f'Utility libraries created in {duration_str}')
print(f'Last run on {datetime.now()}')

I have 6,094 labeled parts of speech in here
Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\job-hunting\saves\pkl\basic_quals_dict.pkl
Utility libraries created in 49 minutes and 32 seconds
Last run on 2023-03-07 21:35:20.145732


In [5]:

import enchant
from IPython.display import HTML

file_name = "aafda86facc69d43_Experimentation_Data_Scientist_Remote_Indeed_com.html"


---
# Training

In [10]:

# You need to run this again if you changed the qualification dictionary in another notebook
t0 = time.time()
basic_quals_dict = lru.sync_basic_quals_dict(sampling_strategy_limit=10_000, verbose=False)
lru.retrain_isqualified_classifier(verbose=True)
duration_str = humanize.precisedelta(time.time() - t0, minimum_unit='seconds', format='%0.0f')
print(f'Is-qualified classifer retrained in {duration_str}')

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\job-hunting\saves\pkl\basic_quals_dict.pkl
I have 11,659 hand-labeled qualification strings in here
Retraining complete
Is-qualified classifer retrained in 2 minutes and 12 seconds



----
# Prepare cover sheet

In [11]:

# Show what qualifications you have for this posting
ask_str = ''
child_strs_list = ha.get_child_strs_from_file(file_name=file_name)
is_header_list = []
for is_header, child_str in zip(ha.get_is_header_list(child_strs_list), child_strs_list):
    if is_header is None:
        probs_list = lru.ISHEADER_PREDICT_PERCENT_FIT(child_str)
        idx = probs_list.index(max(probs_list))
        is_header = [True, False][idx]
    is_header_list.append(is_header)
feature_tuple_list = []
for feature_dict in hc.get_feature_dict_list(ha.get_child_tags_list(child_strs_list), is_header_list, child_strs_list):
    feature_tuple_list.append(hc.get_feature_tuple(feature_dict, lru.pos_lr_predict_single))
crf_list = crf.CRF.predict_single(crf.sent2features(feature_tuple_list))
indices_list = su.find_basic_quals_section_indexes(child_strs_list=child_strs_list, crf_list=crf_list, file_name=file_name)
quals_list = [child_str for i, child_str in enumerate(child_strs_list) if i in indices_list]
prediction_list = list(lru.predict_job_hunt_percent_fit(quals_list))
_, qual_count = lru.get_quals_str(prediction_list, quals_list)
job_fitness = qual_count/len(prediction_list)
d = enchant.Dict('en_US')
job_title = ' '.join([w for w in file_name.replace('.html', '').replace('_Indeed_com', '').split('_') if d.check(w)])
met_str = f'<p>I only meet {job_fitness:.1%} of the minimum requirements for the {job_title} position, but I can explain:</p>'
ask_str += met_str
display(HTML(met_str))
for i, qual_str in enumerate(quals_list):
    if qual_str in basic_quals_dict:
        if basic_quals_dict[qual_str]:
            met_str = f'{i+1}) {qual_str}'
            ask_str += ' ' + met_str
            idx = qual_str.find('>')
            if idx == -1:
                display(HTML(met_str))
            else:
                display(HTML(f'{qual_str[:idx+1]}{i+1}) {qual_str[idx+1:]}'))

In [12]:

unmet_str = "<p>The minimum requirements that I don't meet are:</p>"
display(HTML(unmet_str))
for i, qual_str in enumerate(quals_list):
    if (qual_str not in basic_quals_dict) or not basic_quals_dict[qual_str]:
        met_str = f'{i+1}) {qual_str}'
        unmet_str += ' ' + met_str
        idx = qual_str.find('>')
        if idx == -1:
            display(HTML(met_str))
        else:
            display(HTML(f'{qual_str[:idx+1]}{i+1}) {qual_str[idx+1:]}'))

In [13]:

# This doesn't work unless you score all the O-PQs
db_pos_list = []
for navigable_parent in child_strs_list:
    db_pos_list = cu.append_parts_of_speech_list(navigable_parent, pos_list=db_pos_list)
pos_list = []
for i, (crf_symbol, db_symbol) in enumerate(zip(crf_list, db_pos_list)):
    if db_symbol in [None, 'O', 'H']:
        pos_list.append(crf_symbol)
    else:
        pos_list.append(db_symbol)
met_str = f"<p>The preferred requirements that I meet are:</p>"
display(HTML(met_str))
min_str = ''
pqs_list = [child_str for pos_str, child_str in zip(pos_list, child_strs_list) if (pos_str in ['O-PQ'])]
for i, qual_str in enumerate(pqs_list):
    if qual_str in basic_quals_dict:
        if basic_quals_dict[qual_str]:
            pref_str = f'{i+1}) {qual_str}'
            min_str += ' ' + pref_str
            idx = qual_str.find('>')
            if idx == -1:
                display(HTML(pref_str))
            else:
                display(HTML(f'{qual_str[:idx+1]}{i+1}) {qual_str[idx+1:]}'))
if min_str:
    ask_str += met_str + min_str
winsound.Beep(freq, duration)


----
# Write cover sheet

In [14]:

topic = 'cover'
if topic == 'recruiter':
    recruiter_name = 'Vena Burgess'
    youchat_str = f"Reply to the recruiter email that although you don't meet {1-job_fitness:.1%} of the requirements (" + unmet_str + f'), you do meet these criterion: {ask_str} and have applied for the job.'
    youchat_str += f' (Replace [Your Name] with Dave Babbitt, Replace [Recruiter] with {recruiter_name})'
elif topic == 'cover':
    import pandas as pd
    cypher_str = f"""
        MATCH (fn:FileNames {{file_name: "{file_name}"}})
        RETURN
            fn.role_primary_contact AS role_primary_contact,
            fn.role_primary_contact_email_id AS role_primary_contact_email_id,
            fn.role_title AS role_title
        ORDER BY fn.percent_fit DESC;"""
    cover_df = pd.DataFrame(cu.get_execution_results(cypher_str, verbose=False))
    recruiter_name = cover_df.role_primary_contact.squeeze()
    email_address = cover_df.role_primary_contact_email_id.squeeze()
    role_title = cover_df.role_title.squeeze()
    if (recruiter_name is None) or (email_address is None):
        suffix_str = ''
    else:
        suffix_str = f' to "{recruiter_name}" <{email_address}>'
    youchat_str = f'Write a cover letter email{suffix_str}, complete with subject, using this information: ' + ask_str + ' Replace [Your Name] with Dave Babbitt'
elif topic == 'zoom':
    interviewer_name = 'Dan, David, Alex, and Melinda'
    company_name = '3GIMBALS'
    youchat_str = f"Write a follow up thank you note for an interview using this information: a) Interviewer Name: {interviewer_name}, b) Position: {job_title}, c) Company Name {company_name}, "
    youchat_str += f"d) relevant skills: {ask_str}, e) Your Name: Dave Babbitt. Ask about going over the programming exercise."
elif topic == 'phone':
    interviewer_name = 'Dan, David, Alex, and Melinda'
    company_name = '3GIMBALS'
    interviewer_title = 'interview team'
    youchat_str = f"Write an email, complete with subject, to {interviewer_name} about what a pleasure it was to talk to them, the {interviewer_title},"
    youchat_str += f" on the phone about the {job_title} position with {company_name}. Replace [Your Name] with Dave Babbitt"
elif topic == 'interested':
    file_path = '../data/txt/resume.txt'
    with open(file_path, 'r') as file:
        resume_str = file.read().rstrip()
    task_strs_list = []
    for task_str in [child_str for pos_str, child_str in zip(pos_list, child_strs_list) if (pos_str in ['O-TS'])]:
        task_strs_list.append(task_str)
    company_name = child_strs_list[1]
    youchat_str = f"Explain in first person singular why I would be interested in this role at {company_name}, given\n\n1) this information about the task scope:\n\n{' '.join(task_strs_list)}"
    youchat_str += f"\n\nand, 2) my resume:\n\n{resume_str}"
elif topic == 'question':
    file_path = '../data/txt/resume.txt'
    with open(file_path, 'r') as file:
        resume_str = file.read().rstrip()
    task_strs_list = []
    for task_str in [child_str for pos_str, child_str in zip(pos_list, child_strs_list) if (pos_str in ['O-TS'])]:
        task_strs_list.append(task_str)
    company_name = child_strs_list[1]
    youchat_str = f"Pretend you have the competencies and experience listed on the resume. Explain in first person singular"
    youchat_str += f" how you manage projects, communicate with clients, discover their needs, make recommendations, stay in budget, manage changing requirements, and produce results,"
    youchat_str += f" given this resume:\n\n{resume_str[75:]}"
elif topic == 'rejected':
    job_title = 'Senior Data Engineering Analyst, Platform Engineering (Remote, Anywhere in US)'
    recruiting_team_name = 'Humana Recruiting Team'
    company_name = 'Humana'
    youchat_str = f"Write a reply to the {recruiting_team_name} rejection letter for the {job_title} position at {company_name},"
    youchat_str += ' persuading the recruiting team to explain in more detail why I was rejected for the role. Replace [Name] with Dave Babbitt'
print(youchat_str)

Write a cover letter email, complete with subject, using this information: <p>I only meet 100.0% of the minimum requirements for the Experimentation Data Scientist Remote position, but I can explain:</p> 1) <li>MS or Ph.D. or equivalent experience in a quantitative field or 6+ years of proven ability as a Data Scientist. Preferably in digital media or product fields.</li> 2) <li>4+ years applied experience in building sophisticated datasets and feature engineering</li> 3) <li>Experience working with structured and unstructured data stored in distributed files systems.</li> 4) <li>Strong proficiency with SQL. Deep experience in Python/Jupyter</li> 5) <li>Strong background and knowledge in statistics, probability, and data analysis.</li> 6) <li>Project ownership with an ability to condense &amp; communicate sophisticated concepts and analysis into clear and concise takeaways that drive action.</li> 7) <li>Curious about what drives business trends and. Demonstrated capacity to learn on th


----
## Unless you have written consent from the Generative AI and LLM CoE, you may not use generative AI tools while coding and cannot upload Accenture, ecosystem or client content or data to these tools.

In [15]:

import urllib.parse

driver = wsu.get_driver(verbose=False)
# youchat_str = 'Rewrite this sentence so it sounds like a minimum requirement: "Self-sustaining and proactive self-starters who can thrive in a large technological ecosystem with a myriad of tools and documentation."'
youchat_url = f'https://you.com/search?q={urllib.parse.quote_plus(youchat_str)}&fromSearchBar=true&tbm=youchat'
wsu.driver_get_url(driver, youchat_url, verbose=False)
print(youchat_str)
winsound.Beep(freq, duration)

Write a cover letter email, complete with subject, using this information: <p>I only meet 100.0% of the minimum requirements for the Experimentation Data Scientist Remote position, but I can explain:</p> 1) <li>MS or Ph.D. or equivalent experience in a quantitative field or 6+ years of proven ability as a Data Scientist. Preferably in digital media or product fields.</li> 2) <li>4+ years applied experience in building sophisticated datasets and feature engineering</li> 3) <li>Experience working with structured and unstructured data stored in distributed files systems.</li> 4) <li>Strong proficiency with SQL. Deep experience in Python/Jupyter</li> 5) <li>Strong background and knowledge in statistics, probability, and data analysis.</li> 6) <li>Project ownership with an ability to condense &amp; communicate sophisticated concepts and analysis into clear and concise takeaways that drive action.</li> 7) <li>Curious about what drives business trends and. Demonstrated capacity to learn on th


### Check the back FireFox window to make sure the chat writing has stopped before running this next cell.

In [16]:

from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By

css_selector = 'div[data-testid="youchat-text"]'
try:
    web_element = driver.find_element(By.CSS_SELECTOR, css_selector)
    print(web_element.text)
except NoSuchElementException as e:
    pass
except Exception as e:
    print(f'{e.__class__} error: {str(e).strip()}')
finally:
    driver.close()

Subject: Application for Experimentation Data Scientist Remote position - Dave Babbitt
Dear Hiring Team,
I am writing to apply for the Experimentation Data Scientist Remote position. I understand that I only meet 100.0% of the minimum requirements, but I believe my skills and experience make me an excellent candidate.
I have an MS or Ph.D. in a quantitative field, as well as 6+ years of proven ability as a Data Scientist, preferably in digital media or product fields. Additionally, I have 4+ years of applied experience in building sophisticated datasets and feature engineering, as well as experience working with structured and unstructured data stored in distributed files systems.
I am confident in my abilities with SQL, and have a deep experience in Python/Jupyter. My background and knowledge in statistics, probability, and data analysis is strong. I have project ownership with an ability to condense and communicate sophisticated concepts and analysis into clear and concise takeaways 