In [1]:
import json
from elasticsearch import Elasticsearch

In [2]:
with open('../credentials.json') as f:
    credentials = json.load(f)

bonsai_url = credentials['bonsai_url']
access_key = credentials['access_key']
access_secret = credentials['access_secret']


In [3]:
es = Elasticsearch(
    [bonsai_url],
    basic_auth=(access_key, access_secret),
    headers={'Content-Type': 'application/json'}
)
info = es.info()
version_number = info['version']['number']

print(f"Elasticsearch version: {version_number}")

Elasticsearch version: 7.2.0


In [24]:
terms = [1,2]

# Construct the script source as a string
script_source = """
double count = 0.0; 
for (long term : params.terms) { 
  if (doc['skill_ids'].contains(term)) { 
    count++; 
    } 
  } 
return count/ (double) params.terms.length;
"""

# Construct the query using the terms variable and script_source
query = {
  "query": {
    "function_score": {
      "query": {
        "terms": {
          "skill_ids": terms
        }
      },
      "functions": [
        {
          "script_score": {
            "script": {
              "source": script_source,
              "params": {
                "terms": terms
              }
            }
          }
        }
      ]
    }
  }
}

In [25]:
index_name = "learning_opportunities_production_20230811135215998"

In [31]:
response = es.search(index=index_name, body=query, size=1000)

In [36]:
for elem in response['hits']['hits'][0]['_source'].keys():
    print(elem)

course
provider
lo_provider_name
certificate
learnin_opportunity
en_topics
de_topics
en_study_areas
de_study_areas
en_skills
de_skills
en_impact_levers
de_impact_levers
learning_targets_description
structure_description
countries
states
job_skill_levels
study_area_ids
language_ids
impact_lever_ids
topic_ids
tag_ids
target_group_ids
lo_type_id
certificate_type_id
learning_type_ids
provider_type_id
price
schedule_type_id
required_number_years_of_experience
primary_education_id
career_level_id
provider_id
week_effort_type_id
total_effort_type_id
duration_type_id
duration
location
learning_opportunity_skills
skill_ids
latest_start_date
active
bookable
has_ref_url
has_reviews
level_id
internal
provider_group_id
es_learning_type


In [34]:
levels = set()
for elem in response['hits']['hits']:
    if elem['_source']['level_id']:
        levels.add(elem['_source']['level_id'])
    # print(elem['_source']['course'], elem['_score']) 
    # print('level', elem['_source']['level_id']) 
    # print(len(elem['_source']['skill_ids']), elem['_source']['skill_ids'])
    # skillist = {list_el['job_skill_id']: list_el['level_id'] for list_el in elem['_source']['job_skill_levels']}
    # print(len(elem['_source']['job_skill_levels']), skillist)
    # skillist = [(list_el['job_skill_id'], list_el['level_id']) for list_el in elem['_source']['job_skill_levels']]
    # print(len(elem['_source']['job_skill_levels']), skillist)
print(levels)

{1, 2, 3}


In [17]:
evrlearn_job_profiles_path = "../data/raw/evrlearn/job_profiles.json"
evrlearn_job_skills_path = "../data/raw/evrlearn/job_skills.json"

In [18]:
job_profiles = json.load(open(evrlearn_job_profiles_path, "r"))
job_skills = json.load(open(evrlearn_job_skills_path, "r"))

In [23]:
job_skills

[{'id': 1,
  'name': 'Kommunikation',
  'skill_group': 'Personal skills',
  'skill_group_id': 1},
 {'id': 2,
  'name': 'Selbstmanagement/Führen der eigenen Person',
  'skill_group': 'Personal skills',
  'skill_group_id': 1},
 {'id': 3,
  'name': 'Soziale Kompetenzen',
  'skill_group': 'Personal skills',
  'skill_group_id': 1},
 {'id': 4,
  'name': 'Gestaltung und Problemlösung',
  'skill_group': 'Personal skills',
  'skill_group_id': 1},
 {'id': 5,
  'name': 'Umgang mit Komplexität',
  'skill_group': 'Personal skills',
  'skill_group_id': 1},
 {'id': 6,
  'name': 'Abstraktionsvermögen',
  'skill_group': 'Personal skills',
  'skill_group_id': 1},
 {'id': 7,
  'name': 'Betriebswirtschaftliche Zusammenhänge',
  'skill_group': 'Social skills',
  'skill_group_id': 2},
 {'id': 8,
  'name': 'Management der ICT',
  'skill_group': 'Professional skills',
  'skill_group_id': 3},
 {'id': 9,
  'name': 'Projektmanagement',
  'skill_group': 'Professional skills',
  'skill_group_id': 3},
 {'id': 10,
 

In [30]:
job_profiles[7]

{'id': 8,
 'name': 'ICT System Specialist',
 'level_id': 3,
 'level': 'Senior',
 'job_profile_skills': [{'id': 113,
   'job_profile_id': 8,
   'job_skill_id': 1,
   'level_id': 1,
   'created_at': '2023-07-25T09:51:25.050+02:00',
   'updated_at': '2023-07-25T09:51:25.050+02:00',
   'certificate': False},
  {'id': 114,
   'job_profile_id': 8,
   'job_skill_id': 2,
   'level_id': 2,
   'created_at': '2023-07-25T09:51:25.052+02:00',
   'updated_at': '2023-07-25T09:51:25.052+02:00',
   'certificate': False},
  {'id': 115,
   'job_profile_id': 8,
   'job_skill_id': 3,
   'level_id': 2,
   'created_at': '2023-07-25T09:51:25.054+02:00',
   'updated_at': '2023-07-25T09:51:25.054+02:00',
   'certificate': False},
  {'id': 116,
   'job_profile_id': 8,
   'job_skill_id': 4,
   'level_id': 3,
   'created_at': '2023-07-25T09:51:25.056+02:00',
   'updated_at': '2023-07-25T09:51:25.056+02:00',
   'certificate': False},
  {'id': 117,
   'job_profile_id': 8,
   'job_skill_id': 5,
   'level_id': 2,
   '