<a href="https://colab.research.google.com/github/ayundina/job_posts_analysis/blob/main/visualize_key_phrases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import ast

def str_to_dict(dict_str: str) -> dict:
  try:
    d = ast.literal_eval(dict_str)
  except Exception as e:
    print(f"Exception in string_to_dict - {e}")
    print(f"dict_str - {dict_str}")
    d = {}
  return d

In [None]:
def get_phrases_from_df(df: pd.DataFrame, col: str) -> list:
  list_of_str = df[col].tolist()
  list_of_dicts = [str_to_dict(key_phrases) for key_phrases in list_of_str]
  return list_of_dicts

In [None]:
import itertools
from collections import Counter, OrderedDict

def combine_requirements_by_topic(topic: str, key_requirements: list) -> OrderedDict:
  requirements = [req.get(topic) for req in key_requirements if req]
  requirements = list(itertools.chain(*requirements))
  counted = Counter(requirements)
  sorted = OrderedDict(counted.most_common(20))
  return sorted

#**Let's start here**
Turn csv file to pandas data frame 

In [None]:
import pandas as pd

folder = "/content/drive/MyDrive/data_science_jobs"
file_to_read = "/data_science_jobs.csv"

df = df.read_csv(f"{folder}{file_to_read}")
df.head(2)

Convert strings to dictionaries and group key phrases by topic

In [None]:
import operator
from collections import Counter, OrderedDict

key_phrases = get_phrases_from_df(df)
edu = combine_requirements_by_topic('EDU', key_phrases)
exp = combine_requirements_by_topic('EXP', key_phrases)
tech = combine_requirements_by_topic('TECH', key_phrases)
tool = combine_requirements_by_topic('TOOL', key_phrases)

In [None]:
import matplotlib.pyplot as plt

def set_color(ax: plt.axes, color: str) -> None:
  ax.xaxis.label.set_color(color)
  ax.tick_params(axis='x', colors=color)
  ax.tick_params(axis='y', colors=color)
  ax.spines['bottom'].set_color(color)
  ax.spines['top'].set_color(color)
  ax.spines['right'].set_color(color)
  ax.spines['left'].set_color(color)

def set_subplt(ax: plt.axes, x: list, y: list, title: str) -> None:
  color = 'grey'
  bar_color = 'lightgrey'
  ax.bar(x, y, color=bar_color)
  ax.set_title(title, color=color)
  plt.setp(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
  set_color(ax, color)

#**Finally! It's time to plot**

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 10))

set_subplt(ax1, list(edu.keys()), list(edu.values()), "Education")
set_subplt(ax2, list(exp.keys()), list(exp.values()), "Experience")
set_subplt(ax3, list(tech.keys()), list(tech.values()), "Technical Skills")
set_subplt(ax4, list(tool.keys()), list(tool.values()), "Tools")

plt.tight_layout()
plt.show()