In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import pickle

# --- Step 1: Load and Prepare Your Data ---
try:
    data = pd.read_csv('resume_data.csv')
    if 'Resume' not in data.columns or 'Category' not in data.columns:
        raise ValueError("CSV file must contain 'Resume' and 'Category' columns.")
except FileNotFoundError:
    print("Error: 'resume_data.csv' not found. Please make sure the file exists.")
    exit()
except ValueError as e:
    print(f"Error loading data: {e}")
    exit()

X = data['Resume']  # Resume text
y = data['Category']  # Job category

# --- Step 2: Convert Categories to Numbers ---
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# --- Step 3: Convert Resume Text to Numbers (using TF-IDF) ---
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = tfidf_vectorizer.fit_transform(X)

# --- Step 4: Train the Machine Learning Model ---
model = MultinomialNB()
model.fit(X_tfidf, y_encoded)

# --- Step 5: Save the Trained Model and Tools ---
pickle_filepath = 'resume_matcher_model.pkl'
with open(pickle_filepath, 'wb') as file:
    pickle.dump({'model': model, 'vectorizer': tfidf_vectorizer, 'label_encoder': label_encoder}, file)

print(f"Model trained and saved to '{pickle_filepath}'")

# --- Step 6: Load the Trained Model and Tools for Prediction ---
try:
    with open(pickle_filepath, 'rb') as file:
        loaded_data = pickle.load(file)
        loaded_model = loaded_data['model']
        loaded_vectorizer = loaded_data['vectorizer']
        loaded_label_encoder = loaded_data['label_encoder']
    print("Model loaded successfully.")
except FileNotFoundError:
    print(f"Error: '{pickle_filepath}' not found.")
    exit()
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# --- Step 7: Get Alignment Score for a New Resume and Category ---
def get_alignment_score(resume_text, target_category):
    """Calculates the alignment score."""
    processed_text = loaded_vectorizer.transform([resume_text])
    try:
        category_index = loaded_label_encoder.transform([target_category])[0]
        probabilities = loaded_model.predict_proba(processed_text)[0]
        alignment_score = probabilities[category_index]
        return alignment_score
    except ValueError:
        print(f"Error: Category '{target_category}' not found in the trained categories.")
        return None
    except Exception as e:
        print(f"Error calculating alignment score: {e}")
        return None



Model trained and saved to 'resume_matcher_model.pkl'
Model loaded successfully.
