# Job Descriptions Processor (ﾉ>ω<)ﾉ

In [1]:
# Make your plot output stored in the notebook.
%matplotlib inline

import sys
sys.path.insert(0, 'modules/')
from fintextmin import *

import nltk
import pandas
import matplotlib
import numpy as np
from collections import OrderedDict
from matplotlib import pyplot as plt
matplotlib.style.use('ggplot')

## Helper functions
1. Min/Pref Degree requirements
2. Min/Pref Work exp requirements
3. Min/Pref Programming langs requirements

In [2]:
keywords = ['PhD', 'Master', 'MBA', 'BA', 'BS', 'Bachelor']

def get_degree(s: str) -> str:
    if type(s) is not str:
        return 'None'
    
    # linear search qwq
    for keyword in keywords:
        if keyword in s:
            return keyword
    return 'None' # yes, return a string

In [3]:
def atoi(s):
    """
    :type str: str
    :rtype: int
    """
    if not s:
        return 0
    s = s.strip()
    number, flag = 0, 1
    if s[0] == '-':
        s = s[1:]
        flag = -1
    elif s[0] == '+':
        s = s[1:]
    for c in s:
        if c >= '0' and c <= '9':
            number = 10 * number + ord(c) - ord('0')
        else:
            break
    number = flag * number
    number = number if number <= 2147483647 else 2147483647
    number = number if number >= -2147483648 else -2147483648
    return number

def get_work_exp(s: str) -> int:
    if type(s) is not str:
        return 0
    
    import re
    exp_required = re.findall('\d+\+? year', s)
    year = 0 if not exp_required else int(atoi(exp_required[0]))
    return year

In [4]:
langs = [
    'C++', 'Java', 'Python', 'Javascript', 'Go', 'PHP',
    'SQL', 'Ruby', 'Swift', 'Kotlin', 'C#', 'Objective C'
]

def get_langs(s: str) -> str:
    if type(s) is not str:
        return 'None'
    
    result = []
    # linear search again qwq
    for lang in langs:
        if lang in s:
            result.append(lang)
    return 'None' if len(result) == 0 else ','.join(result)

## Append columns

In [7]:
def process_dataframe_from_csv(filename: str):
    df = pandas.read_csv(filename)
    
    # Prepare `min_degree_req` and `pref_degree_req` columns
    if 'apple' in filename:
        df['min_degree_req'] = list(get_degree(col) for col in df['education&experience'])
        df['pref_degree_req'] = list(get_degree(col) for col in df['education&experience'])
    else:      
        df['min_degree_req'] = list(get_degree(col) for col in df['minimum_qual'])
        df['pref_degree_req'] = list(get_degree(col) for col in df['preferred_qual'])

    # Prepare `min_work_exp_req` and `min_work_exp_req` columns
    df['min_work_exp_req'] = list(get_work_exp(col) for col in df['minimum_qual'])
    df['pref_work_exp_req'] = list(get_work_exp(col) for col in df['preferred_qual'])

    # Prepare `langs` columns
    df['min_langs_req'] = list(get_langs(col) for col in df['minimum_qual'])
    df['pref_langs_req'] = list(get_langs(col) for col in df['preferred_qual'])
    
    df.to_csv(filename + '_new.csv')

In [8]:
for filename in ['google_jobs.csv', 'apple_jobs.csv', 'facebook_jobs.csv']:
    process_dataframe_from_csv('csv/' + filename)