In [4]:
import spacy
from textacy import preprocessing
from textacy import extract

nlp = spacy.load("en_core_web_lg")


In [5]:
text = "About the job\nDescription\n\nWe’re seeking an experienced product leader to join us in our Boston, MA office to lead product management for AWS’s newest hybrid storage service. AWS Outposts provide fully managed AWS infrastructure and services to customers in their on-premises data centers. Amazon S3 on Outposts, launched in 2020, provides hybrid object storage enabling customers to store and retrieve data on-premises using the S3 programming model and features.\n\nOur team is responsible for delivering this hybrid storage platform on Outposts. As part of the team, you will help shape object storage for the next generation hybrid computing platform.\n\nProduct Management at AWS is an opportunity to collaborate with engineering, design, and business development teams. We are looking for an entrepreneurial product leader who is passionate about delivering solutions to customers and excited about growing a new AWS business. Successful candidates will be able to build strategic roadmap for the business, dive into technical details working closely with the engineering team and drive the delivery of features that will delight our customers.\n\nYou will be joining an experienced team of engineers and product managers who have built and scaled services at Amazon and beyond. We’re looking for a new teammate who is enthusiastic, empathetic, curious, motivated, reliable, and able to collaborate effectively with a diverse team of peers.\n\nWork/Life Balance\n\nOur team puts a high value on work-life balance. Our entire team is co-located in the Boston Seaport office, but we’re also flexible when people occasionally need to work from home. We generally keep core in-office hours from 10am to 4pm.\n\nMentorship and Career Growth\n\nOur team is dedicated to supporting new team members. Our team has a broad mix of experience levels and Amazon tenures, and we’re building an environment that celebrates knowledge sharing and mentorship. Our senior members truly enjoy mentoring others through one-on-one sessions helping them with their career growth.\n\nInclusive Team Culture\n\nHere at AWS, we embrace our differences. We are committed to furthering our culture of inclusion. We have ten employee-led affinity groups, reaching 40,000 employees in over 190 chapters globally. We have innovative benefit offerings, and we host annual and ongoing learning experiences, including our Conversations on Race and Ethnicity (CORE) and AmazeCon (gender diversity) conferences. Amazon’s culture of inclusion is reinforced within our 14 Leadership Principles, which remind team members to seek diverse perspectives, learn and be curious, and earn trust\n\n\nBasic Qualifications\nBachelor’s Degree in Computer Science or related field.\n10+ years of product management experience including defining product vision, roadmap and driving product investment decisions.\n10+ years of work experience with launching large scale software products and driving adoption.\nDemonstrable experience delivering through large teams.\nDemonstrable experience delivering strategies and insights to VP-level leadership.\n\nPreferred Qualifications\n\nAmazon is committed to a diverse and inclusive workforce. Amazon is an equal opportunity employer and does not discriminate on the basis of race, ethnicity, gender, gender identity, sexual orientation, protected veteran status, disability, age, or other legally protected status. For individuals with disabilities who would like to request an accommodation, please visit https://www.amazon.jobs/en/disability/us\n\n\nCompany - Amazon Dev Center U.S., Inc.\n\nJob ID: A1584516"




In [9]:
def remove_stop_words(text: str):
    
    doc = nlp(text)
    tokens = [token.text for token in doc if not token.is_stop]
    return " ".join(tokens)

In [11]:
preproc = preprocessing.make_pipeline(
    preprocessing.remove.html_tags,
    remove_stop_words,
    preprocessing.normalize.whitespace, 
    preprocessing.replace.urls, 
    preprocessing.replace.numbers, 
    preprocessing.normalize.unicode
)

In [14]:
text = preproc(text)

In [15]:
doc = nlp(text)



In [16]:
ngrams = list(extract.basics.ngrams(doc, 2, min_freq=1))

print(ngrams)

[seeking experienced, experienced product, product leader, leader join, join Boston, MA office, office lead, lead product, product management, management AWS, AWS newest, newest hybrid, hybrid storage, storage service, AWS Outposts, Outposts provide, provide fully, fully managed, managed AWS, AWS infrastructure, infrastructure services, services customers, premises data, data centers, Amazon S3, S3 Outposts, provides hybrid, hybrid object, object storage, storage enabling, enabling customers, customers store, store retrieve, retrieve data, premises S3, S3 programming, programming model, model features, team responsible, responsible delivering, delivering hybrid, hybrid storage, storage platform, platform Outposts, help shape, shape object, object storage, storage generation, generation hybrid, hybrid computing, computing platform, Product Management, Management AWS, AWS opportunity, opportunity collaborate, collaborate engineering, business development, development teams, looking entre

In [20]:
extract.keyterms.textrank(doc, topn=20)

[('team broad mix experience level Amazon tenure', 0.05733918061952075),
 ('product management AWS new hybrid storage service', 0.057301199018178844),
 ('experienced team engineer product manager', 0.04579972829531097),
 ('new team member', 0.035476930169832444),
 ('engineering team drive delivery', 0.034840512530384185),
 ('year product management experience', 0.033031493846633744),
 ('diverse team peer', 0.031041252051884963),
 ('business development team', 0.029618639321989347),
 ('object storage generation hybrid computing platform', 0.02818529526638929),
 ('year work experience', 0.027068080310591304),
 ('entire team co', 0.02654570651829166),
 ('large scale software product', 0.02645724070228912),
 ('large team', 0.02606526383911489),
 ('hybrid storage platform Outposts', 0.02582969426891928),
 ('team dedicated', 0.02429669915825536),
 ('team responsible', 0.024093444371154897),
 ('entrepreneurial product leader passionate', 0.023679327481375887),
 ('new AWS business', 0.02335371

In [23]:
extract.keyterms.yake(doc, normalize="lemma", ngrams=2, topn=30)

[('AWS Outposts', 0.33535029962362023),
 ('S3 Outposts', 0.3358823445298114),
 ('Management AWS', 0.47710677362521514),
 ('MA office', 0.4976193058355266),
 ('Amazon S3', 0.5499260756855417),
 ('Product Management', 0.5577982887721558),
 ('Boston Seaport', 0.594677896047488),
 ('platform Outposts', 0.6453287289960133),
 ('demonstrable experience', 0.6488832973133655),
 ('Life Balance', 0.6929220664343416),
 ('Mentorship Career', 0.7286004600001447),
 ('Basic Qualifications', 0.7469508637308947),
 ('Career Growth', 0.7576764069853977),
 ('aws infrastructure', 0.763161011077047),
 ('Inclusive Team', 0.7669879246918987),
 ('premise s3', 0.7875640799590194),
 ('Team Culture', 0.7974164201091544),
 ('AWS business', 0.8082069387685239),
 ('AWS new', 0.8333908809174942),
 ('Conversations Race', 0.8456097603283527),
 ('Race Ethnicity', 0.8456097603283527),
 ('Amazon Dev', 0.8517756659977193),
 ('Job ID', 0.8538776164331809),
 ('Bachelor Degree', 0.8643478581383885),
 ('Degree Computer', 0.8802