In [None]:
# Importing Libraries

import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

nltk.download('stopwords')



# Step 1: Data collection
# Assuming you have two lists: 'job_descriptions' containing job descriptions and 'candidate_profiles' containing candidate profiles.

In [None]:
job_descriptions = [
    "Software engineer with experience in Python and Java",
    "Data scientist proficient in machine learning algorithms",
    "Web developer skilled in HTML, CSS, and JavaScript"
]

candidate_profiles = [
    "Experienced software engineer specializing in Python development",
    "Machine learning enthusiast with a strong background in data science",
    "Front-end web developer with expertise in HTML5, CSS3, and JavaScript frameworks"
]

# Step 2: Data preprocessing

In [None]:
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [token for token in tokens if token.isalpha()]  # Remove non-alphabetic tokens
    tokens = [token for token in tokens if token not in stop_words]  # Remove stop words
    return ' '.join(tokens)

preprocessed_job_desc = [preprocess_text(desc) for desc in job_descriptions]
preprocessed_profiles = [preprocess_text(profile) for profile in candidate_profiles]

# Step 3: Feature extraction

In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(preprocessed_job_desc)
y = [1] * len(job_descriptions) + [0] * len(candidate_profiles)

# Step 4: Labeling and training data preparation

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: ML algorithm selection and training

model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Model evaluation

In [None]:
val_predictions = model.predict(X_val)
accuracy = accuracy_score(y_val, val_predictions)
print(f"Validation Accuracy: {accuracy}")

# Step 7: Predictions

In [None]:
new_candidate_profile = [preprocess_text(new_profile)]
X_test = vectorizer.transform(new_candidate_profile)
prediction = model.predict(X_test)

if prediction[0] == 1:
    print("Candidate is suitable for the job.")
else:
    print("Candidate is not suitable for the job.")