# Testing the preprocessor by extracting keywords from the mail dataset

In [144]:
import json
from preprocessor import Preprocessor

with open('data.json',encoding='utf-8',mode='r') as file:
    mails = json.load(file)

preprocessor = Preprocessor()

preprocessed_mails = []

for mail in mails:
    preprocessed_mail = preprocessor.preprocess(mail)
    preprocessed_mails.append(preprocessed_mail)

with open('./data_preprocessed.json', encoding='utf-8', mode='w') as file:
    json.dump(preprocessed_mails, file, indent=4)

In [145]:
with open('./data_preprocessed.json', encoding='utf-8', mode='r') as file:
    mails = json.load(file)

In [146]:
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
keywords = [keyword for mail in mails for keyword in mail['keywords']]
keywords = list(set(keywords))
keywords.sort()

keywords_text = ' '.join(keywords)
vectorizer = TfidfVectorizer()
vectorizer.fit([keywords_text])

joblib.dump(vectorizer, 'tfidf_vectorizer.joblib')

loaded_vectorizer = joblib.load('tfidf_vectorizer.joblib')

In [147]:
training_data = []
for mail in mails:
    data = [mail['item_id'],mail['keywords'],mail['label']]
    training_data.append(data)
training_data

[[0,
  ['bi',
   'data',
   'design',
   'etl',
   'hr',
   'model',
   'ms',
   'nc',
   'nv',
   'power',
   'project',
   'specialist'],
  'BI_ENGINEER'],
 [1,
  ['3',
   'able',
   'analyse',
   'architecture',
   'b',
   'bi',
   'bo',
   'central',
   'customer',
   'dashboard',
   'data',
   'design',
   'develop',
   'engineer',
   'experience',
   'integration',
   'language',
   'learn',
   'main',
   'make',
   'model',
   'oa',
   'platform',
   'pro',
   'process',
   'project',
   'quality',
   'safe',
   'security',
   'sets',
   'soft',
   'solution',
   'sql',
   'strategic',
   'tool',
   'use',
   'warehouse',
   'work'],
  'BI_ENGINEER'],
 [2,
  ['3',
   'alm',
   'azure',
   'bi',
   'data',
   'databricks',
   'dax',
   'desktop',
   'devops',
   'dimensional',
   'e',
   'ecosystem',
   'excel',
   'experience',
   'factory',
   'g',
   'integration',
   'lake',
   'language',
   'microsoft',
   'model',
   'power',
   'query',
   'security',
   'service',
   'sq

In [148]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from sklearn.metrics import classification_report
import numpy as np


class_mapping = {'BI_ENGINEER': 0, 'DATA_ENGINEER': 1, 'IRRELEVANT': 2}
reverse_class_mapping = {v: k for k, v in class_mapping.items()}


X = []
y = []
for item in training_data:
    X.append(loaded_vectorizer.transform([' '.join(item[1])]).todense().tolist()[0]) #This is such a dumb function find a better one
    y.append(class_mapping[item[2]])
X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

base_estimator = AdaBoostClassifier(random_state=42, learning_rate=0.8, n_estimators=50)
bagged_adaboost = BaggingClassifier(estimator=base_estimator, random_state=42, n_estimators=100)
bagged_adaboost.fit(X_train, y_train)
y_pred_bagged_adaboost = bagged_adaboost.predict(X_test)


y_test_mapped = [reverse_class_mapping[label] for label in y_test]
y_pred_mapped_bagged_adaboost = [reverse_class_mapping[label] for label in y_pred_bagged_adaboost]
print("Bagged AdaBoost Classifier Report:")
print(classification_report(y_test_mapped, y_pred_mapped_bagged_adaboost))

Bagged AdaBoost Classifier Report:
               precision    recall  f1-score   support

  BI_ENGINEER       1.00      0.88      0.93        16
DATA_ENGINEER       0.00      0.00      0.00         3
   IRRELEVANT       0.74      1.00      0.85        14

     accuracy                           0.85        33
    macro avg       0.58      0.62      0.59        33
 weighted avg       0.80      0.85      0.81        33


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [149]:
want_save = False
if want_save:
    joblib.dump(bagged_adaboost, 'adaboost_model.joblib')

bagged_adaboost = joblib.load('adaboost_model.joblib')

In [150]:
m1 = {
        "item_id": 2,
        "sender": "fa1893951762d28976f3feadb80a14241951b5cd7c56e5866c6a89fc5abc26b9",
        "sender_email": "eab1a00874aa0b0455d0b763499e74df6b97bbcdf221d9db2a6614c8bb4abd52",
        "datetime_received": 1701077485000,
        "sensitivity": "Normal",
        "subject": "Kandidaat  senior Data Engineer  - Data BNP Paribas Fortis?",
        "text_body": "Dear,\r\n\r\nRole category -\r\nRole profile\r\n S5 - Data\r\n Data Engineer - Senior\r\nJob title\r\n Data Engineer\r\nWork location\r\n BRU.-MONTAGNE DU PARC/WARANDEBERG\r\n MONTAGNE DU PARC 3\r\n 1000 BRUXELLES\r\nStart date\r\n 01/12/2023\r\nEnd date\r\n 29/11/2024\r\nDescription\r\n\r\nMission context\r\n\r\nData Engineer\r\n\r\nThe Data Hub tribe is one of the key components of the Data department, to help developing and rolling out the bank-wide BI and Data strategy at BNP Paribas Fortis.  Its mission is to service all metiers and functions of the bank with transversal BI solutions (front and backend developments) & tools. In doing so, the Data Hub team will contribute  to the spreading of the new data governance and \u201cKnow your data\u201d culture throughout BNPP Fortis.\r\nFunction  description\r\n\r\n\r\n- As a data engineer / data modeller you establish, improve  and deploy modelling standard&guidelines for the whole Datehub tribe by coaching and assisting the data engineers in the usage of these S&G.\r\n\r\n\r\n?- As a data engineer / data modeller  you will assess on a regular basis the datamodels on the correct application of the rules/guidelines/standards by the data engineers.  This involves a close collaboration with the data architects and data managers and a detailed knowledge of  the BNP Paribas Fortis standards.\r\n\r\n\r\n?- As a data engineer / data modeller you will create awareness on data modelling by promoting techniques that make sense via chapter meetings or by giving trainings to functional and technical data engineers\r\n\r\n\r\n?- As a data engineer /  data modeller  you will act as a DBA in the NON-PROD environment by supporting the technical data engineers in the preparation , in the validation and in the execution of the ORACLE scripts. Also support the data engineers in optimizing performance on existing databases.\r\n\r\n\r\n?- As a data engineer /  data modeller you follow  and develop yourself in the recent  IT-and DATA evolutions (internal and external) and propose innovative data solutions that can help  improve business activities.\r\n\r\n\r\n\r\nWith the squad, the data engineer works on solution description and implementation of different projects and enhancements within the big data hub-environment .  Main technologies used on the data hub platform are Scala, Java, Kafka, Flink, Hadoop, Spark, Hive, ... Having experience in building applicative framework and generic components on the datahub is a must have.\r\n\r\nThis mission will focus on what we call the run the  bank activities where the focus will be monitoring the operational environments, keep them in a good shape, identify tracks of continuous improvement.  Also the deployments on the dathub  in a the NON-PROD and PROD environment is an important activity in this mission. Accuracy in the preparation and the execution of the deployments as well as good communication are crucial for this mission.\r\n Language requirements\r\nDutch\r\n\r\n\r\n  *   Preferable , but not mandatory\r\nFrench\r\n\r\n\r\n  *   Preferable , but not mandatory\r\nEnglish\r\n\r\n\r\n  *   Sound knowledge\r\nEducation\r\n\r\n\r\n  *   Bachelor/Master \u2018s degree in IT or engineering or equivalent by professional experience\r\nCertification\r\n\r\nTravel\r\n\r\nAgile requirements\r\nAn analyst involved in Agile projects must have the \"Agile mindset\" which implies:\r\n\r\n  *   a positive attitude and pragmatism\r\n  *   thirst for knowledge: Agile is about learning and adapting. Knowledge sharing is key to success.\r\n  *   The goal of team success: Agile is about the success of the team, no individual success or heroic behavior. It is more important for the team to succeed than for the individual to have completed his/her tasks.\r\n  *   There is no failure, only feedback: Agile is about taking everything as lessons, adjusting actions based on the feedback, resulting in continuous improvement.\r\n\r\n\r\nBeyond the roles:  Agile teams are cross-functional.  All required disciplines are represented in the team (analysis, development, testing, \u2026 ). However, although team members have a primary role representing a discipline, they are expected to take on other roles and contribute to other disciplines  whenever it helps towards reaching the sprint goal.\r\nRequired experience / knowledge\r\nAt least 5 years of relevant experience\r\nTechnical experience\r\nmandatory\r\n\r\n\r\n  *\r\n\r\n?You have at least 5 years of experience with data in any of the financial domain (product related, business related).  Having a significant experience in Big Data technologies and platforms is an advantage.\r\n\r\n\r\n  *\r\n\r\n?You have practical experience with Datavault modelling.  You are familiar with Powerdesigner or any other modelling tool.\r\n\r\npreferable\r\n\r\n\r\n  *   Knowing and experienced DEVOPS-practices.\r\nBusiness experience\r\nmandatory\r\nExperience with working in a complex organization\r\npreferable\r\nSpecific requirement for agile: practical experience with Scrum\r\nSoft skills\r\n\r\n\u00b7       Good analytical and problem solving skills.\r\n\u00b7       Quick self-starter, pro-active attitude\r\n\u00b7       Team player  always ready to share, train or provide support to others.\r\n\u00b7       Autonomy, commitment and perseverance\r\n\u00b7       Flexible, accurate & control minded.\r\n\u00b7       Ability to work in a dynamic and multi-cultural environment\r\n\u00b7       Ability to deliver results respecting deadlines\r\n\u00b7       Good oral and written communication. Ability to turn advanced technical topics and industry jargon into easy-to-understand information .\r\n\u00b7       Quality driven and cost avoidance minded\r\n\u00b7       Able to handle changing priorities\r\n\u00b7       Customer focus\r\n\u00b7       Passionate about data\r\n\r\n\r\n\r\n  Please submit your candidate in the attached template!\r\n\r\nTx!\r\n\r\n\r\n[CRONOS-logo]\r\n\r\nKind regards,\r\nKristin\r\nBusiness development & Account Management\r\n\r\n##################@email.com<mailto:##################@email.com>\r\nhttps://www.cronos.be<https://safelink.com/?url=https%3A%2F%2Fwww.cronos.be%2F&data=05%7C01%##################%40email.com%7Cd8c67f4344984886f16d08dbef2b95d5%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C638366742848525374%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=YjKRof3uoBKafY8wMZnkLzgfxR3%2FZe6VGTvvIrjPSis%3D&reserved=0>\r\nVeldkant 33A | B-2550 Kontich\r\n\r\n\r\n\r\n\r\n",
        "label": "DATA_ENGINEER",
        "keywords": [
            "3",
            "able",
            "act",
            "advanced",
            "analyst",
            "application",
            "b",
            "bi",
            "big",
            "bnp",
            "bnpp",
            "culture",
            "customer",
            "data",
            "develop",
            "devops",
            "dynamic",
            "engineer",
            "environment",
            "experience",
            "g",
            "handle",
            "hub",
            "java",
            "language",
            "main",
            "make",
            "oracle",
            "platform",
            "pro",
            "prod",
            "product",
            "quality",
            "requirement",
            "scala",
            "sense",
            "service",
            "soft",
            "solution",
            "strategy",
            "tool",
            "work",
            "works"
        ]
} # DATA ENGINEER

m2 = {
        "item_id": 0,
        "sender": "3cc523fa094d021b3061df403de12d83235b5cda585faede10ba0f7ab3b308eb",
        "sender_email": "fe2c9a4131b00cfe30ad4197c301d33bc8670daf865588d9bbff61436ff2e1cc",
        "datetime_received": 1707731005000,
        "sensitivity": "Normal",
        "subject": "Gezocht: Project Manager (Overheidsklant)",
        "text_body": "Beste collega\u2019s,\r\n\r\nVoor een overheidsklant regio Brussel (Anderlecht) zijn we op zoek naar een Senior Project Manager. Meer info:\r\n\r\nGewenste startdatum: 04/03/2024\r\nVoorlopige einddatum: 31/12/2024\r\nRegime: Voltijds\r\nLocatie: Brussel (Anderlecht)\r\nTalen: NL of FR + ENG\r\nRemote/onsite: 2d onsite, 3d remote\r\n\r\nOmschrijving\r\nThe primary role of the Project Manager is to work closely with customers to drive the successful delivery of IT projects\r\n\r\n  *   1st, 2nd Line Support and Technical Field services\r\n  *   End user Services: Engineering and Device Platform Management\r\n  *   Problem- & Change Management and Business Service Management\r\n  *   Request, Fulfillment, Invoicing, Tendering\r\n  *   Quality Assurance & Continuous Improvement\r\n  *   Facility Security Management for Safety, Security and Building technologies\r\n  *   Telecom: Telephony solutions, Radio Solutions\r\n  *   Infrastructure: Infrastructure, Nework and Computing\r\nThe Project Manager will facilitate the management of scope for the customer\u2019s requirements, plan implementations and deliver to the planned scope and budget.\r\n\r\nVerantwoordelijkheden\r\n\r\n  *   Leads project delivery from beginning to end;\r\n  *   Define project scope, goals and deliverables that support business goals in collaboration with management and stakeholders;\r\n  *   Develop full-scale project plans and associated communications documents following the company\u2019s standard;\r\n  *   Draft and submit budget proposals, and recommend subsequent budget changes where necessary as per defined process;\r\n  *   Ensure that project meets it\u2019s deliverables in agreed schedule, as per the scope, within stipulated budget and as per the quality standards;\r\n  *   Effectively communicate project expectations to team lead and stakeholders in a timely and clear fashion;\r\n  *   Ensures project or service scope and deliverables are defined and agreed with all relevant parties;\r\n  *   Liaise with client and IT Teams on an ongoing basis\r\n  *   Diagnoses and evaluates potential risks and issues throughout the project and executes appropriate plans to mitigate them. Reports regularly to the Team Lead and Stakeholders with regard to risks,\r\n  *   Be a key member of the overall Operations Team\r\n\r\nKennis & ervaring\r\n\r\n  *   15 + years relevant experience with project management in different domains\r\n  *   Strong affinity with enterprise technology and its complexity is an added value, like active directory, Computing, storage, cloud, network\r\n  *   Relevant experience and proven record in one of the following domains as PM :\r\n     *   Windows platform (8, 10, 11)\r\n     *   Mobile devices (smartphones, tablets,\u2026)\r\n     *   Infrastructure (network, storage, computing, cloud)\r\n  *   Experience within Public Sector will also be considered an asset\r\n  *   Recognises and applies our Core Values: Professionalism, Respect, Entrepreneurship, Corporation and Leadership\r\n  *   Demonstrated \u201ccan do\u201d work ethic coupled with effective time management and ability to overcome ambiguously or under-defined problems\r\n  *   Knowledge of Agile delivery methodology and SAFe (Essentials) are considered an asset\r\n  *   ITIL Certification is a MUST. Project Methodology Certification is a MUST.\r\n  *   Can facilitate Steerco, project update meetings and guide resources.\r\n  *   Possesses the ability to communicate, present and produce documentation on various levels for differing audiences\r\n  *   Proficient in Dutch or French, and English.\r\n\r\nAls je een geschikte kandidaat hebt dan hoor ik het graag.\r\n\r\nMet vriendelijke groeten,\r\nWout\r\n\r\nCRONOS PUBLIC SERVICES\r\n\r\n\r\n[Image]\r\nAccount Coordinator\r\n##################@email.com<mailto:##################@email.com>\r\nhttps://www.linkedin.com/in/wout-van-bavel/<https://safelink.com/?url=https%3A%2F%2Fwww.linkedin.com%2Fin%2Fwout-van-bavel%2F&data=05%7C02%##################%40email.com%7C6b1373add3ae4ea6f39808dc2baf04ce%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C638433278052966887%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=4%2Bexz6W6C10sbO0uFWhKsKwJ4pjv%2F5D6yP142mcbT2U%3D&reserved=0>\r\n<tel:+324## ## ## ##>\r\n\r\n[website]<https://safelink.com/?url=https%3A%2F%2Fwww.cronos-public-services.be%2F&data=05%7C02%##################%40email.com%7C6b1373add3ae4ea6f39808dc2baf04ce%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C638433278052980763%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=c%2BZwgbMsJFfXaghWEFqE6LQp8QbfDbEcZ4McrnSgheo%3D&reserved=0>\r\n[linkedin]<https://safelink.com/?url=https%3A%2F%2Fwww.linkedin.com%2Fcompany%2Fcronos-public-services-nv%2F&data=05%7C02%##################%40email.com%7C6b1373add3ae4ea6f39808dc2baf04ce%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C638433278052988751%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=qNA0kwQjHLyndd2W15Z556lFC8xQJt5Tyyk5RSh8KKs%3D&reserved=0>\r\n[Nieuwsbrief]<https://safelink.com/?url=https%3A%2F%2Fwww.cronos-public-services.be%2Fnieuwsbrief%2F&data=05%7C02%##################%40email.com%7C6b1373add3ae4ea6f39808dc2baf04ce%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C638433278052994865%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=7GbwOjFsQmKhs7xPWMtUKpsoG4zXzWsmrOEVervCAqQ%3D&reserved=0>\r\n\r\nDe inhoud van deze e-mail en eventuele bijlagen bevat mogelijks confidenti\u00eble informatie. De informatie is uitsluitend bestemd voor de geadresseerde. Indien u dit bericht ten onrechte ontvangt, wordt u verzocht de inhoud niet te gebruiken maar de afzender direct te informeren door het bericht te retourneren en het daarna te verwijderen. De afzender van deze e-mail is niet aansprakelijk voor enige directe of indirecte schade die voortvoeit uit fouten, onnauwkeurigheden of verlies van informatie in geval van ongeoorloofd(e) gebruik, openbaarmaking, duplicatie of wijziging van het bericht.\r\n\r\n[Afbeelding met tekst, Lettertype, schermopname, logo  Automatisch gegenereerde beschrijving]<https://safelink.com/?url=https%3A%2F%2Fcronos-public-services.be%2Fportfolio%2Fde-cronos-groep-wordt-door-data-news-erkend-als-customer-centric-it-company-of-the-year&data=05%7C02%##################%40email.com%7C6b1373add3ae4ea6f39808dc2baf04ce%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C638433278053001076%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=RvcX8ifvl0ZTMZkPU%2BdBsGzUgs%2BKXNh07uopw%2FFGxBo%3D&reserved=0>\r\n\r\n",
        "label": "IRRELEVANT",
        "keywords": [
            "asset",
            "cloud",
            "customer",
            "data",
            "develop",
            "e",
            "eng",
            "experience",
            "fr",
            "infrastructure",
            "line",
            "nv",
            "per",
            "platform",
            "process",
            "project",
            "quality",
            "safe",
            "security",
            "service",
            "tel",
            "value",
            "work"
        ]
    } # IRRELEVANT

m3 =  {
        "item_id": 41,
        "sender": "595496ee7241feb8216578b851a064ff0a8e6792d0eeabf9bca9139acc3aa741",
        "sender_email": "dbfc75f08bcb02130e9cc481a63b8fe41e3d7ea9869ec89b326020e368809365",
        "datetime_received": 1662661327000,
        "sensitivity": "Normal",
        "subject": "Business/Funct. Analyst (1. Junior (1-3)) for SWIFT - SWI000578  - Reactiedatum verlengd! ",
        "text_body": "ENKEL RECHTSTREEKS, GEEN TUSSENPARTIJEN AUB\r\n\r\n\r\n\r\nHallo collega\u2019s,\r\n\r\n\r\n\r\nVoor Swift zoeken we Business/Funct. Analyst (1. Junior (1-3)) SWI000578  die voldoet aan volgende beschrijving:\r\nUiterste reactiedatum: 30/09/2022\r\nGewenste startdatum: 01/09/2022\r\nEinddatum: 31/12/2022\r\nReferentie: SWI000578\r\nTitel: Business/Funct. Analyst (1. Junior (1-3)\r\nLocatie: The Netherlands - Zoeterwoude (Energieweg 33, 2382 NC Zoeterwoude, Nederland)\r\nStatus: Gepubliceerd\r\nType contract: Time & material\r\n\r\nAantal personen: 1\r\nAfdeling: Technology Platform (TP)\r\n\r\nOmschrijving\r\n\r\nAbout the Role\r\n\r\nReporting to the Manager of the Cloud Business Office, the Cloud and Hosting Services Analyst for SWIFT will be helping to collect and analyze data to provide Cloud financial insights critical to our success as a business as it relates to an Infrastructure and Operations organization.\r\n\r\nResponsibilities:\r\n\r\n-Produce insightful reports that can be shared with and actioned by Management\r\n-Work effectively with cross-functional teams globally, assimilating requirements, driving results in the form of data insights and building strategic partnerships\r\n-Support the FinOps team with setting goals/KPIs, retrieving data and modeling\r\n-Act as the technical expert for the Cost Transparency tools/dashboards. This includes configuring functionality, creating/enhancing visualizations, maintaining design and guidance documentation, and providing support\r\n-Partner heavily with our Cloud Governance team to map data process flows, allowing FinOps to gather a single source of data source\r\n-Analyze and forecast cloud hosting costs\r\n-Provide cost optimization recommendations\r\n-Create and maintain month-end and quarter-end reports\r\n-Use Agile methodology for planning and execution\r\n-Leverage the Cloud Financial Management (CFM) tools, interpret the findings, and make technical recommendations to the business application teams to help reduce ongoing cost\r\n\r\nProfessional Skills:\r\n\r\n-Experience with automation technologies for CI/CD (Jenkins, Azure DevOps, AWS Code pipeline)\r\n-Experience with visualization tools (e.g., QuickSight, Power BI)\r\n-Knowledge of cloud cost products (e.g., Cost Explorer, Azure Portal)\r\n-Experience with Python, JSON, YAML, ARM, Cloud Formation\r\n-Knowledge of cloud management and governance tools (e.g., Cloud Custodian, Trusted Advisor)\r\n-Familiarity with monitoring tools such as CloudWatch, CloudTrail, etc.\r\n-Detail-oriented with excellent documentation skills/methodologies, who can successfully manage multiple priorities\r\n-Demonstrable high level of intellectual curiosity, external perspective, and innovation interest\r\n-Strong communication skills, including the ability to articulate complex technical topics to a non-technical or leadership audience\r\n\r\nQualifications:\r\n\r\n-Bachelor\u2019s degree in Information Technology, Computer Science or related field\r\n-2+ years working in cloud computing or IT infrastructure/platforms or Finance role\r\n-2+ years of experience with AWS and/or Azure\r\n\r\n\r\n\r\n\r\nVaardigheden\r\n\r\nSPECIFIEKE VAARDIGHEDEN\r\n\r\nAWS or Azure certification:   Ja\r\nCloud computing or IT infrastructure/platforms or Finance role:   Junior (1-3) ( 2,00 jaar )\r\nCloud cost management tools:   Junior (1-3) ( 2,00 jaar )\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nIndien jullie geschikte kandidaten hebben ontvang ik graag hun beschikbaarheid, CV en kostprijs.\r\n\r\nAlvast hartelijk bedankt.\r\n\r\n\r\n\r\n\r\n\r\nKind regards,\r\n\r\n[http://media.gutsglory.be/cronos/signatures/201606_cronos/cronos-groep.png]\r\nEsma \r\nKey Account Manager\r\n\r\n##################@email.com<mailto:##################@email.com>\r\n\r\nBusiness Park King Square\r\nVeldkant 33A | B-2550 Kontich\r\nW cronos-groep.be <https://safelink.com/?url=http%3A%2F%2Fcronos-groep.be%2F&data=05%7C01%##################%40email.com%7C27bf594cce214bbfd94c08da91c6f7b8%7C49c3d703357947bfa8887c913fbdced9%7C0%7C0%7C637982581269308292%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=LNZWbUJMz1WW8%2FZKbDHRMLdslopMzNtVpDseSsO4axI%3D&reserved=0> | T \r\n\r\n[cid:image002.jpg@01D8C3C0.88DEF580]\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n",
        "label": "BI_ENGINEER",
        "keywords": [
            "3",
            "act",
            "analyst",
            "application",
            "arm",
            "automation",
            "aws",
            "azure",
            "b",
            "bi",
            "cd",
            "ci",
            "cloud",
            "code",
            "data",
            "design",
            "devops",
            "e",
            "experience",
            "form",
            "g",
            "infrastructure",
            "innovation",
            "level",
            "maintain",
            "make",
            "manage",
            "map",
            "nc",
            "optimization",
            "pipeline",
            "platform",
            "power",
            "process",
            "strategic",
            "tp",
            "use",
            "w",
            "work"
        ]
    } # BI ENGINEER

In [151]:
X = []
X.append(loaded_vectorizer.transform([' '.join(m1['keywords'])]).todense().tolist()[0]) #This is such a dumb function find a better one
X = np.array(X)

y_pred = bagged_adaboost.predict(X)
y_pred_mapped = [reverse_class_mapping[label] for label in y_pred]
y_pred_mapped

 # DATA ENGINEER

['DATA_ENGINEER']

In [152]:
X = []
X.append(loaded_vectorizer.transform([' '.join(m2['keywords'])]).todense().tolist()[0]) #This is such a dumb function find a better one
X = np.array(X)

y_pred = bagged_adaboost.predict(X)
y_pred_mapped = [reverse_class_mapping[label] for label in y_pred]
y_pred_mapped

# IRRELEVANT

['IRRELEVANT']

In [153]:
X = []
X.append(loaded_vectorizer.transform([' '.join(m3['keywords'])]).todense().tolist()[0]) #This is such a dumb function find a better one
X = np.array(X)

y_pred = bagged_adaboost.predict(X)
y_pred_mapped = [reverse_class_mapping[label] for label in y_pred]
y_pred_mapped

# BI ENGINEER

['BI_ENGINEER']