# FER ljetne prakse - duljine praksi

## Priprema

In [1]:
from pathlib import Path
import os

In [2]:
current_folder = Path(os.getcwd())
root_folder = current_folder.parent

In [3]:
import datetime
import json
import sys

In [4]:
from tqdm import tqdm

## Učitavanje podataka

In [5]:
dump_path = root_folder / "data" / "20200518_205700_edited.json"

with open(dump_path, encoding="utf8", errors="replace") as f:
    data = json.load(f)

In [6]:
print(f"Broj poslodavaca: {len(data)}")

Broj poslodavaca: 83


## Obrada

In [7]:
features = dict()

for company_name, properties in tqdm(
    data.items(), desc="Obrađujem poslodavce", file=sys.stdout
):
    jobs = properties.get("jobs")
    
    if jobs is None:
        continue
        
    job_durations = list()
        
    for job in jobs:
        start = job.get("start")
        end = job.get("end")
        
        if start is None or len(start) == 0 or end is None or len(end) == 0:
            job_durations.append(-1)
        else:
            start_tuple = [
                int(x) for x in start.split(".")
                if (x is not None and len(x) != 0)
            ][::-1]
            end_tuple = [
                int(x) for x in end.split(".")
                if (x is not None and len(x) != 0)
            ][::-1]
            
            start_date = datetime.date(*start_tuple)
            end_date = datetime.date(*end_tuple)
            
            job_durations.append((end_date - start_date).days)

    features[company_name] = job_durations

Obrađujem poslodavce: 100%|██████████| 83/83 [00:00<00:00, 46485.14it/s]


In [8]:
week_resolution = 8

buckets = [(-1, 0)]
buckets.extend([(7 * i, 7 * i + 7) for i in range(week_resolution)])
buckets.append((7 * week_resolution, 2**32))

bucket_names = ["nedefinirano", "manje od tjedna"]
bucket_names.extend(
    [f"između {i} i {i + 1} tjedna" for i in range(1, week_resolution)]
)
bucket_names.append(f"{week_resolution} tjedana ili više")

In [9]:
bucket2name = {b: n for b, n in zip(buckets, bucket_names)}
bucket2employee = {b: set() for b in buckets}

In [10]:
for company_name, days_list in features.items():
    for days in days_list:
        for bucket in buckets:
            if bucket[0] <= days < bucket[1]:
                bucket2employee[bucket].add(company_name)

In [11]:
results = tuple(
    (bucket2name[b], list(sorted(bucket2employee[b])))
    for b in sorted(bucket2employee.keys())
    if len(bucket2employee[b]) != 0
)


In [12]:
for bucket_name, company_names in results:
    print(f"{bucket_name}:")
    
    for company_name in company_names:
        print(f"\t{company_name}")
              
    print()

nedefinirano:
	ALTPRO
	ATP Projektiranje
	IN2
	InSky Solutions
	Xylon

manje od tjedna:
	INETEC - Institut za nuklearnu tehnologiju
	INTIS ENGINEERING DOO

između 2 i 3 tjedna:
	HŽ Infrastruktura
	KONČAR - INŽENJERING ZA ENERGETIKU I TRANSPORT D.D.
	KRAKEN
	S.C.A.N.
	Span

između 3 i 4 tjedna:
	8 Sigma
	APIS IT
	DOK-ING
	Hrvatska radiotelevizija (HRT)
	HŽ Infrastruktura
	Netgen
	Poslovna inteligencija
	Span

između 4 i 5 tjedna:
	Elpos
	GDi
	HEP-Operator distribucijskog sustava
	Hrvatski operator prijenosnog sustava
	Intea d.d.
	KONČAR - INŽENJERING ZA ENERGETIKU I TRANSPORT D.D.
	Kod Biro
	Sedam IT
	SevenofNine d.o.o. (Shape)
	Span

između 5 i 6 tjedna:
	AVL-AST
	MEGATREND POSLOVNA RJEŠENJA
	MICRO-LINK

između 6 i 7 tjedna:
	AVL-AST
	INFIGO IS
	Tehnozavod-Marušić
	Triple-Inovacije

između 7 i 8 tjedna:
	APIS IT
	Hrvatska radiotelevizija (HRT)
	Tehnozavod-Marušić
	divIT / Delta Reality

8 tjedana ili više:
	APIS IT
	ATOS Convergence Creators
	Ai Data Labs
	Artronic
	CROZ
	Comminus
	Cus