In [1]:
import pandas as pd
import random

import joblib

In [2]:
model = joblib.load('model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

In [3]:
def smart_predict(skills_text, top1_threshold=0.3, min_prob_threshold=0.027):
    X_vec = vectorizer.transform([skills_text])
    
    proba = model.predict_proba(X_vec)[0]
    classes = model.classes_
    
    top_indices = proba.argsort()[::-1]
    
    top1_prob = proba[top_indices[0]]
    
    if top1_prob >= top1_threshold:
        result = [(classes[top_indices[0]], round(top1_prob * 100, 2))]
    else:
        result = []
        for idx in top_indices:
            if proba[idx] >= min_prob_threshold:
                result.append((classes[idx], round(proba[idx] * 100, 2)))
                
    if not result:
        result.append((classes[top_indices[0]], round(proba[top_indices[0]] * 100, 2)))
        result.append((classes[top_indices[1]], round(proba[top_indices[0]] * 100, 2)))
        result.append((classes[top_indices[2]], round(proba[top_indices[0]] * 100, 2)))
    
    return result

In [4]:
user_skills = "Python, –ú–∞—à–∏–Ω–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ, Pandas, –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞,"

# –¥–∞—Ç–∞ —Å–∞–π–Ω—Ç–∏—Å—Ç - –º–µ—Ç–æ–¥–æ–ª–æ–≥ –ø–æ –¥–∞–Ω–Ω—ã–º
# –¥–∞—Ç–∞ –∞–Ω–∞–ª–∏—Ç–∏–∫ - –Ω–∞ –æ—Å–Ω–æ–≤–µ –¥–∞–Ω–Ω—ã—Ö –≤—ã—è–≤–ª—è–µ—Ç –∑–∞–∫–æ–Ω–æ–º–µ—Ä–Ω–æ—Å—Ç–∏
# –¥–∞—Ç–∞ –∏–Ω–∂–µ–Ω–µ—Ä - —Å–ø–µ—Ü–∏–∞–ª–∏—Å—Ç –ø–æ –¥–∞–Ω–Ω—ã–º
# –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
predictions = smart_predict(user_skills)

print("–ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:")
for profession, probability in predictions:
    print(f"- {profession} ({probability}%)")

–ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:
- –ò–Ω–∂–µ–Ω–µ—Ä –º–∞—à–∏–Ω–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è (6.35%)
- Data Scientist (6.08%)
- –ê–Ω–∞–ª–∏—Ç–∏–∫ –¥–∞–Ω–Ω—ã—Ö (3.57%)


In [5]:
user_skills = "Kotlin, Android Studio, Jetpack Compose"

predictions = smart_predict(user_skills)

print("\nüéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:")
for profession, probability in predictions:
    print(f"- {profession} ({probability}%)")


üéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:
- –ú–æ–±–∏–ª—å–Ω—ã–π —Ä–∞–∑—Ä–∞–±–æ—Ç—á–∏–∫ (3.71%)


In [6]:
user_skills = "Python, –ú–∞—à–∏–Ω–Ω–æ–µ –æ–±—É—á–µ–Ω–∏–µ, –ê–Ω–∞–ª–∏–∑ –¥–∞–Ω–Ω—ã—Ö, –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞, –†–∞–±–æ—Ç–∞ –≤ –∫–æ–º–∞–Ω–¥–µ, –ö—Ä–µ–∞—Ç–∏–≤–Ω–æ—Å—Ç—å"

# –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
predictions = smart_predict(user_skills)

print("\nüéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:")
for profession, probability in predictions:
    print(f"- {profession} ({probability}%)")


üéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:
- –ò–Ω–∂–µ–Ω–µ—Ä –º–∞—à–∏–Ω–Ω–æ–≥–æ –æ–±—É—á–µ–Ω–∏—è (7.38%)
- Data Scientist (3.67%)
- –ü—Ä–æ–¥—É–∫—Ç–æ–≤—ã–π –º–µ–Ω–µ–¥–∂–µ—Ä (IT) (3.14%)


In [7]:
user_skills = "–ü–ª–∞–Ω–∏—Ä–æ–≤–∞–Ω–∏–µ –ø—Ä–æ–µ–∫—Ç–æ–≤, Agile, –ü–ª–∞–Ω–∏—Ä–æ–≤–∞–Ω–∏–µ —Å–ø—Ä–∏–Ω—Ç–æ–≤"

# –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
predictions = smart_predict(user_skills)

print("\nüéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:")
for profession, probability in predictions:
    print(f"- {profession} ({probability}%)")


üéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:
- –†—É–∫–æ–≤–æ–¥–∏—Ç–µ–ª—å –ø—Ä–æ–µ–∫—Ç–æ–≤ (12.83%)


In [8]:
user_skills = "–ë—É—Ö–≥–∞–ª—Ç–µ—Ä—Å–∫–∏–π —É—á–µ—Ç, –§–∏–Ω–∞–Ω—Å–æ–≤—ã–π –∞–Ω–∞–ª–∏–∑, –§–∏–Ω–∞–Ω—Å–æ–≤–∞—è –æ—Ç—á–µ—Ç–Ω–æ—Å—Ç—å"

# —Ä–∞–∑–¥–µ–ª–∏—Ç—å –∏ –ø–æ—Å–º–æ—Ç—Ä–µ—Ç—å –ø–æ –Ω–∞–≤—ã–∫–∞–º 
# –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
predictions = smart_predict(user_skills)

print("\nüéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:")
for profession, probability in predictions:
    print(f"- {profession} ({probability}%)")


üéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:
- –ë—É—Ö–≥–∞–ª—Ç–µ—Ä (7.01%)


In [9]:
user_skills = "–£–ø—Ä–∞–≤–ª–µ–Ω–∏–µ —Å–ª—É—á–∞—è–º–∏, –ö—Ä–∏–∑–∏—Å–Ω–æ–µ –≤–º–µ—à–∞—Ç–µ–ª—å—Å—Ç–≤–æ, –ó–∞—â–∏—Ç–∞ –∏–Ω—Ç–µ—Ä–µ—Å–æ–≤ –∫–ª–∏–µ–Ω—Ç–∞"

# —é—Ä–∏—Å—Ç (–∫—Ä–∏–∑–∏—Å–Ω–æ–µ —É–ø—Ä–∞–≤–ª–µ–Ω–∏–µ, –∫—Ä–∏–∑–∏—Å–Ω—ã–π —É–ø—Ä–∞–≤–ª—è—é—â–∏–π) 
# –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
predictions = smart_predict(user_skills)

print("\nüéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:")
for profession, probability in predictions:
    print(f"- {profession} ({probability}%)")


üéØ –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ—Å–Ω–æ–≤–µ –≤–≤–µ–¥–µ–Ω–Ω—ã—Ö –Ω–∞–≤—ã–∫–æ–≤:
- –°–æ—Ü–∏–∞–ª—å–Ω—ã–π —Ä–∞–±–æ—Ç–Ω–∏–∫ (8.02%)
- –ü—Å–∏—Ö–æ–ª–æ–≥ (6.35%)
