In [None]:
import json
from collections import Counter
from tabulate import tabulate
import matplotlib.pyplot as plt
import numpy as np

data = None
with open('./data/result_offers.json', 'r', encoding="utf-8") as f:
    text = f.read()
    data = json.loads(text)
# print(json.dumps(data[0], indent=4, ensure_ascii=False))

musts = []
nices = []
langs = []
for item in data:
    musts.extend(item["requirements"]["musts"])
    nices.extend(item["requirements"]["nices"])
    langs.extend(item["requirements"]["languages"])


In [None]:
def get_avg_salary(item):
    currency = item["essentials"]["salary"]["currency"]
    try:
        range = item["essentials"]["salary"]["types"]["permanent"]["range"]
        return {"currency": currency, "avg": sum(map(int, range)) / len(range)}
    except:
        # print(item["essentials"]["salary"]["types"])
        return None


def plot_against_salary(data, get_attr, title):
    attrs = [get_attr(item) for item in data]
    salary = [get_avg_salary(item) for item in data]
    filtered = [item for item in salary if item is not None]
    print(f"Rejecting {len(salary) - len(filtered)} items.\nPlotting {len(filtered)} items.")

    if next(filter(lambda x: x["currency"] != "PLN", filtered), False):
        raise ValueError("Found non PLN salary range")

    for i, attr in enumerate(sorted(set(attrs))):
        xs, ys = [], []
        for a in attrs:
            if a != attr:
                xs = [i for _ in filtered]
                ys = [f['avg'] for f in filtered]
        plt.scatter(xs, ys, s=60, label=attr, alpha=0.1, edgecolors='none')
    plt.ylabel('Avgerage salary [PLN]')
    plt.legend()
    plt.xlim(-1, 2)
    plt.title(title)


plot_against_salary(
    data, lambda x: "Yes" if x["location"]["remote"] else "No", title="Salary vs remote work")

In [None]:
print(f"Musts: {len(musts)}\nNices: {len(nices)}")

def plot_reqs(reqs, title, col="plasma"):
    counter = Counter([m["value"] for m in reqs])
    names = sorted(list(counter), key=lambda x: -counter[x])[:50]
    names = list(reversed(names))
    values = [counter[name] for name in names]

    if col == "plasma":
        gradient = np.linspace(0, 0.7, len(names))
        colors = plt.cm.plasma(gradient)
    else:
        gradient = np.linspace(0, 0.7, len(names))
        colors = plt.cm.viridis(gradient)
        
    plt.figure(figsize=(12, len(names) // 4))
    bars = plt.barh(names, values, color=colors)

    # Adding values on bars
    for bar, value in zip(bars, values):
        plt.text(bar.get_width() + 1, bar.get_y() + bar.get_height() / 2, f'{value}',
                 va='center', ha='left')

    plt.margins(y=0.01)
    plt.xlabel('Appearances')
    plt.ylabel('Skill')
    plt.title(title)


plot_reqs(musts, "Must have skills appearances", col="plasma")
plot_reqs(nices, "Nice to have skill appearances", col="viridis")

In [None]:
print(len(langs), len(data), f"\nAvg lang per offer: {len(langs) / len(data):.3f}\n")
counter = Counter([lang["code"] for lang in langs])
names = sorted(list(counter), key=lambda x: -counter[x])
levels = ["NA", "A1", "A2", "B1", "B2", "C1", "C2", "NATIVE"]
table = [[level] + [len([lang for lang in langs if lang["code"] == code and lang.get("level", "NA") == level]) for code in names] for level in levels]
table.append(["Total"] + [counter[x] for x in names])
print(tabulate(table, headers=[""] + names))