# "Ideal" STEM Career Project
Insert project overview description here

In [365]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Data Import

In [366]:
df_cs = pd.read_csv('softwareengineer.csv') # Load 'Software Engineer' query data into DataFrame

## Data Description

##### Show data header

In [367]:
df_cs.head()

Unnamed: 0.1,Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors
0,0,Software Engineer,$60K-$63K (Glassdoor est.),Job Description\n\nCSI’s Payments Software Eng...,4.0,Computer Services\n4.0,"Valparaiso, IN","Paducah, KY",1001 to 5000 employees,1965,Company - Public,Financial Transaction Processing,Finance,$100 to $500 million (USD),-1
1,1,Software Engineer,$60K-$63K (Glassdoor est.),We are AAM. We have the POWER to move the worl...,3.3,American Axle & Manufacturing\n3.3,"Detroit, MI","Detroit, MI",10000+ employees,1994,Company - Public,Transportation Equipment Manufacturing,Manufacturing,$5 to $10 billion (USD),-1
2,2,Software Engineer,$60K-$63K (Glassdoor est.),Preferred Qualifications\nA strong foundation ...,3.9,Quicken Loans\n3.9,"Detroit, MI","Detroit, MI",10000+ employees,1985,Company - Private,Lending,Finance,$10+ billion (USD),"Citi, Bank of America, Wells Fargo"
3,3,Test Engineer,$60K-$63K (Glassdoor est.),"Secure our Nation, Ignite your Future\n\nBecom...",4.1,ManTech International Corporation\n4.1,"Clarksburg, WV","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1
4,4,Lead Embedded Software Engineer - Wearables,$60K-$63K (Glassdoor est.),Job Description\n\n\nBose Corporation’s Consum...,3.6,Bose\n3.6,"Framingham, MA","Framingham, MA",5001 to 10000 employees,1964,Company - Private,Consumer Products Manufacturing,Manufacturing,$2 to $5 billion (USD),-1


#### Data information

In [368]:
df_cs.columns

Index(['Unnamed: 0', 'Job Title', 'Salary Estimate', 'Job Description',
       'Rating', 'Company Name', 'Location', 'Headquarters', 'Size', 'Founded',
       'Type of ownership', 'Industry', 'Sector', 'Revenue', 'Competitors'],
      dtype='object')

In [369]:
df_cs = df_cs.drop('Unnamed: 0', axis = 1) # Remove 'unnamed' column

#### Identify unique job titles

In [370]:
unique_jobs = df_cs['Job Title'].unique() # Overview of unique job titles
unique_jobs[4:10]

array(['Senior Software Engineer', 'Jr. Software Engineer (JAVA, C/C++)',
       'Software Engineer - Remote, USA', 'Software Developer (Back-End)',
       'Full-Stack Software Engineer',
       'L1 Modem Verification and Release Software Engineer'],
      dtype=object)

#### Get senior and junior positions

In [371]:
def seniority(title):
    
    '''Identify and group specific job titles'''
    
    title = title.lower()
    title = title.strip()
    
    seniority = ['senior', 'sr.', 'sr', 'lead', 'expert', 'experienced', 'principal']
    juniority = ['junior', 'jr.', 'jr', 'intern']
    #specialists = ['embedded', 'test', 'integration', 'design', 'front end', 'front-end', 'back end', 'back-end', 'full stack', 
                   #'full-stack', 'data', 'database', 'firmware', 'platform', 'power', 'systems', 'systems', 'solutions', 'research']
    for i in seniority:
        if i in title:
            return 'senior'
    
    for i in juniority:
        if i in title:
            return 'junior'
    
    else:
        return 'unspecified'

In [372]:
df_cs['Seniority'] = df_cs['Job Title'].apply(seniority)
df_cs.sample(3)

Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,Seniority
427,Cyber Engineer Software Reverse Engineer,$80K-$103K (Glassdoor est.),"Job Description\n\nJob Description\n\nBITS, a ...",3.5,CACI International\n3.5,"Sterling, VA","Arlington, VA",10000+ employees,1962,Company - Public,Aerospace & Defense,Aerospace & Defense,$2 to $5 billion (USD),"CSC, ManTech, SAIC",unspecified
222,Software Engineer,$46K-$100K (Glassdoor est.),We're still hiring!\n\nDuring these unique cir...,3.9,AWeber\n3.9,"Chalfont, PA","Chalfont, PA",51 to 200 employees,1998,Company - Private,Internet,Information Technology,Unknown / Non-Applicable,-1,unspecified
19,Controls and Software Engineer,$60K-$63K (Glassdoor est.),Profi-Vision is a system integration company w...,-1.0,Profi Vision,"Allentown, PA","Allentown, PA",1 to 50 employees,-1,Company - Private,-1,-1,$1 to $5 million (USD),-1,unspecified


In [373]:
df_cs.Seniority.value_counts()

unspecified    608
senior         332
junior          60
Name: Seniority, dtype: int64

#### Wrangle salary column

In [374]:
#def avg_salary(salary):
df_cs.get('Salary Estimate').unique()

array(['$60K-$63K (Glassdoor est.)', '$48K-$102K (Glassdoor est.)',
       '$66K-$110K(Employer est.)', '$60K-$100K(Employer est.)',
       '$91K-$131K (Glassdoor est.)', '$41K-$86K (Glassdoor est.)',
       '$100K-$124K (Glassdoor est.)', '$46K-$100K (Glassdoor est.)',
       '$70K-$130K(Employer est.)', '$70K-$100K(Employer est.)',
       '$47K-$78K (Glassdoor est.)', '$104K-$148K (Glassdoor est.)',
       '$80K-$103K (Glassdoor est.)'], dtype=object)

In [375]:
def salary_simplified(salary):
    salary_simp = salary.split('(')[0].replace('K','').replace('$','')
    minimum = int(salary_simp.split('-')[0])
    maximum = int(salary_simp.split('-')[1])
    return minimum, maximum

In [376]:
salary_ranges = df_cs['Salary Estimate'].apply(salary_simplified)

#### Add estimated minimum and maximum salary estimates

In [377]:
def get_vals(salary_ranges):
    
    min_values = []
    for i in salary_ranges:
        min_val = i[0]
        min_values.append(min_val)  
    
    max_values = []
    for i in salary_ranges:
        max_val = i[1]
        max_values.append(max_val)
        
    return min_values, max_values

In [378]:
df_cs['Minimum Estimate'] = get_vals(salary_ranges)[0]
df_cs['Maximum Estimate'] = get_vals(salary_ranges)[1]
df_cs.sample(3)

Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,Seniority,Minimum Estimate,Maximum Estimate
604,Software Developer in Test (SDET),$80K-$103K (Glassdoor est.),IBT Apps is a financial software service provi...,3.6,IBT Apps\n3.6,"Cedar Park, TX","Cedar Park, TX",51 to 200 employees,1999,Company - Private,Banks & Credit Unions,Finance,Unknown / Non-Applicable,-1,unspecified,80,103
758,"Senior Software Engineer, Full Stack (Japanese...",$80K-$103K (Glassdoor est.),DOCOMO Innovations is a R&D arm of NTT DOCOMO ...,-1.0,"DOCOMO Innovations, Inc.","Palo Alto, CA",-1,-1,-1,-1,-1,-1,-1,-1,senior,80,103
521,"Junior R&D Software Engineer (Java, Python)",$80K-$103K (Glassdoor est.),Responsibilities\n\n\nReporting to the program...,3.4,Peraton\n3.4,"Chantilly, VA","Herndon, VA",1001 to 5000 employees,2017,Company - Private,Aerospace & Defense,Aerospace & Defense,$1 to $2 billion (USD),-1,junior,80,103


#### Wrangle location column

In [379]:
non_specific_loc = df_cs[(df_cs['Location'] == 'United States') | (df_cs['Location'] == 'Remote')].index
df_cs = df_cs.drop(non_specific_loc)

In [380]:
def split_city_state(location):
    
    city = location.split(',')[0]
    state = location.split(',')[-1]
    
    return city, state

In [381]:
locations = df_cs['Location'].apply(split_city_state)

#### Split locations in city and state columns

In [382]:
def get_city_state(locations):
    
    cities = []
    for i in locations:
        city = i[0]
        cities.append(city)
        
    states = []
    for i in locations:
        state = i[1]
        states.append(state)
        
    return cities, states

In [383]:
df_cs['City'] = get_city_state(locations)[0]
df_cs['State'] = get_city_state(locations)[1]
df_cs.sample(3)

Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,Seniority,Minimum Estimate,Maximum Estimate,City,State
684,Software Developer(.NET),$80K-$103K (Glassdoor est.),"POSTION OVERVIEW\n\nNASCENT Technology, LLC. (...",2.6,Nascent Technology\n2.6,"Charlotte, NC","Charlotte, NC",51 to 200 employees,-1,Company - Private,-1,-1,Unknown / Non-Applicable,-1,unspecified,80,103,Charlotte,NC
721,AWS Data Engineer,$80K-$103K (Glassdoor est.),At Development InfoStructure Inc. (Devis) we a...,4.0,Development InfoStructure\n4.0,"Arlington, VA","Arlington, VA",51 to 200 employees,1992,Company - Private,IT Services,Information Technology,$10 to $25 million (USD),-1,unspecified,80,103,Arlington,VA
597,Senior Software Engineer (Backend),$80K-$103K (Glassdoor est.),Our cross-functional engineering team is growi...,3.3,Endurance International Group\n3.3,"Burlington, MA","Burlington, MA",1001 to 5000 employees,1997,Company - Public,Internet,Information Technology,$1 to $2 billion (USD),-1,senior,80,103,Burlington,MA


In [384]:
non_state_ab = df_cs[(df_cs['State'] == 'Phoenix') | (df_cs['State'] == 'New Jersey') | (df_cs['State'] == 'Wisconsin')].index
df_cs = df_cs.drop(non_state_ab)

#### Parse through job descriptions for qualifications

In [340]:
text = df_cs.get('Job Description')[0].replace('\n', ' ')
text

"Job Description  CSI’s Payments Software Engineers provide technical support for multiple finance applications.In this position you will provide highly technical solutions in the development of applications to solve basic to complex problems.You will work cross-functionally with other departments to evaluate product requirements and ensure that data and reports are processed accurately and in a timely manner for CSI customers.  Job Responsibilities Responsible for developing, implementing, and maintaining multiple applications Test and release updates to code in sprints according to Agile standards Develop and maintain a keen functional understanding of the supported applications Execute and document detailed test plans and contribute to peer code reviews Understand complexities of moving data among various interfacing applications and platforms Plan and report activities based on leadership directive Document programs according to Software Engineering Group standards Demonstrate an a

In [408]:
def bachelor(description):
    
    for i in description:
        
        description = description.lower()
        description = description.replace('\n', ' ')
        
    bs_degree = ['bachelor', ' bs ']
    
    for i in bs_degree:
        
        if i in description:
             return 'yes'  
          
        else:
            return 'no'   
            

In [409]:
bs_yes_no = df_cs.get('Job Description').apply(bachelor)

In [412]:
df_cs['BS Required'] = bs_yes_no
df_cs

Unnamed: 0,Job Title,Salary Estimate,Job Description,Rating,Company Name,Location,Headquarters,Size,Founded,Type of ownership,Industry,Sector,Revenue,Competitors,Seniority,Minimum Estimate,Maximum Estimate,City,State,BS Required
0,Software Engineer,$60K-$63K (Glassdoor est.),Job Description\n\nCSI’s Payments Software Eng...,4.0,Computer Services\n4.0,"Valparaiso, IN","Paducah, KY",1001 to 5000 employees,1965,Company - Public,Financial Transaction Processing,Finance,$100 to $500 million (USD),-1,unspecified,60,63,Valparaiso,IN,yes
1,Software Engineer,$60K-$63K (Glassdoor est.),We are AAM. We have the POWER to move the worl...,3.3,American Axle & Manufacturing\n3.3,"Detroit, MI","Detroit, MI",10000+ employees,1994,Company - Public,Transportation Equipment Manufacturing,Manufacturing,$5 to $10 billion (USD),-1,unspecified,60,63,Detroit,MI,yes
2,Software Engineer,$60K-$63K (Glassdoor est.),Preferred Qualifications\nA strong foundation ...,3.9,Quicken Loans\n3.9,"Detroit, MI","Detroit, MI",10000+ employees,1985,Company - Private,Lending,Finance,$10+ billion (USD),"Citi, Bank of America, Wells Fargo",unspecified,60,63,Detroit,MI,no
3,Test Engineer,$60K-$63K (Glassdoor est.),"Secure our Nation, Ignite your Future\n\nBecom...",4.1,ManTech International Corporation\n4.1,"Clarksburg, WV","Herndon, VA",5001 to 10000 employees,1968,Company - Public,Research & Development,Business Services,$1 to $2 billion (USD),-1,unspecified,60,63,Clarksburg,WV,no
4,Lead Embedded Software Engineer - Wearables,$60K-$63K (Glassdoor est.),Job Description\n\n\nBose Corporation’s Consum...,3.6,Bose\n3.6,"Framingham, MA","Framingham, MA",5001 to 10000 employees,1964,Company - Private,Consumer Products Manufacturing,Manufacturing,$2 to $5 billion (USD),-1,senior,60,63,Framingham,MA,no
5,Software Engineer II,$60K-$63K (Glassdoor est.),Software Engineer II\n\nThis Software Engineer...,3.0,Kryterion Inc\n3.0,"Phoenix, AZ","Phoenix, AZ",51 to 200 employees,2001,Company - Private,IT Services,Information Technology,$10 to $25 million (USD),"Pearson VUE, Prometric, PSI Services",unspecified,60,63,Phoenix,AZ,no
6,Software Engineer,$60K-$63K (Glassdoor est.),"Who We Are:\nBectran, Inc. was founded in 2010...",2.7,Bectran\n2.7,"Schaumburg, IL","Schaumburg, IL",51 to 200 employees,2010,Company - Private,Computer Hardware & Software,Information Technology,$10 to $25 million (USD),-1,unspecified,60,63,Schaumburg,IL,yes
7,Senior Software Engineer,$60K-$63K (Glassdoor est.),"Buildium is looking for smart, driven, enthusi...",4.8,Buildium\n4.8,"Boston, MA","Boston, MA",51 to 200 employees,2004,Company - Private,Computer Hardware & Software,Information Technology,Unknown / Non-Applicable,-1,senior,60,63,Boston,MA,yes
8,"Jr. Software Engineer (JAVA, C/C++)",$60K-$63K (Glassdoor est.),Overview\n\n\nPeraton is seeking a highly moti...,3.4,Peraton\n3.4,"Aurora, CO","Herndon, VA",1001 to 5000 employees,2017,Company - Private,Aerospace & Defense,Aerospace & Defense,$1 to $2 billion (USD),-1,junior,60,63,Aurora,CO,yes
11,Software Engineer,$60K-$63K (Glassdoor est.),"Overview\n\n\nWith over 10,000 online merchant...",4.4,ReCharge Payments\n4.4,"Phoenix, AZ","Santa Monica, CA",51 to 200 employees,2015,Company - Private,Enterprise Software & Network Solutions,Information Technology,Unknown / Non-Applicable,-1,unspecified,60,63,Phoenix,AZ,yes


In [394]:
job = df_cs.get('Job Description')[2]
job

'Preferred Qualifications\nA strong foundation in programming fundamentals, design patterns, data structures, object-oriented design principles, unit testing, and modern version control flows\nWillingness to learn new languages and technologies\n2 years of development experience in a multilayered n-tier style architecture\nComfort with one or more of the following languages: C# (or other .NET language), JavaScript (either back-end, e.g., NodeJS, and/or front-end, such as Angular, React, or Vue), Java (or other JVM-based language), PHP, Python, Ruby, Progress, Erlang/Elixir, or others like these.\nExperience working with some form of distributed technology like gRPC, GraphQL, or REST over HTTP.\nUnderstanding of and experience working in a continuous integration and continuous delivery environment\nExperience working with software automation frameworks to do functional testing\nExperience developing applications in a cloud environment, such as AWS or Azure\nExperience using DevOps-focus