In [15]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv('Resume.csv')
print(df.head())

         ID                                         Resume_str  \
0  16852973           HR ADMINISTRATOR/MARKETING ASSOCIATE\...   
1  22323967           HR SPECIALIST, US HR OPERATIONS      ...   
2  33176873           HR DIRECTOR       Summary      Over 2...   
3  27018550           HR SPECIALIST       Summary    Dedica...   
4  17812897           HR MANAGER         Skill Highlights  ...   

                                         Resume_html Category  
0  <div class="fontsize fontface vmargins hmargin...       HR  
1  <div class="fontsize fontface vmargins hmargin...       HR  
2  <div class="fontsize fontface vmargins hmargin...       HR  
3  <div class="fontsize fontface vmargins hmargin...       HR  
4  <div class="fontsize fontface vmargins hmargin...       HR  


In [16]:

stop_words = [
    "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", 
    "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", 
    "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", 
    "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", 
    "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", 
    "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", 
    "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", 
    "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", 
    "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", 
    "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", 
    "don", "should", "now"
]
 
    
def preprocess_text(text):
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text).lower()
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text


df['cleaned_resume'] = df['Resume_str'].apply(preprocess_text)
df[['Resume_str', 'cleaned_resume']].head()


Unnamed: 0,Resume_str,cleaned_resume
0,HR ADMINISTRATOR/MARKETING ASSOCIATE\...,hr administratormarketing associate hr adminis...
1,"HR SPECIALIST, US HR OPERATIONS ...",hr specialist us hr operations summary versati...
2,HR DIRECTOR Summary Over 2...,hr director summary years experience recruitin...
3,HR SPECIALIST Summary Dedica...,hr specialist summary dedicated driven dynamic...
4,HR MANAGER Skill Highlights ...,hr manager skill highlights hr skills hr depar...


In [17]:
job_requirements = [
    "Python, Machine Learning, Deep Learning, Bachelor's in CS",
    "Data Analysis, SQL, Master's in Data Science, Statistics",
]

job_requirements = [preprocess_text(job) for job in job_requirements]


In [18]:
all_text = df['cleaned_resume'].tolist() + job_requirements
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_text)
print("TF-IDF Vektörlerinin Boyutu:", tfidf_matrix.shape)

resume_tfidf = tfidf_matrix[:len(df)]
job_tfidf = tfidf_matrix[len(df):]

similarity_scores = cosine_similarity(resume_tfidf, job_tfidf)


TF-IDF Vektörlerinin Boyutu: (2486, 49162)


In [12]:
matches = []
for i in range(len(similarity_scores)):
    best_match_index = similarity_scores[i].argmax()
    best_match_score = similarity_scores[i][best_match_index]
    matches.append((df.iloc[i]['ID'], best_match_index, best_match_score))

for match in matches:
    print(f"Resume ID: {match[0]}, Benzerlik skoru: {match[2]:.2f}")


Resume ID: 16852973, Benzerlik skoru: 0.08
Resume ID: 22323967, Benzerlik skoru: 0.00
Resume ID: 33176873, Benzerlik skoru: 0.01
Resume ID: 27018550, Benzerlik skoru: 0.00
Resume ID: 17812897, Benzerlik skoru: 0.01
Resume ID: 11592605, Benzerlik skoru: 0.01
Resume ID: 25824789, Benzerlik skoru: 0.00
Resume ID: 15375009, Benzerlik skoru: 0.03
Resume ID: 11847784, Benzerlik skoru: 0.02
Resume ID: 32896934, Benzerlik skoru: 0.02
Resume ID: 29149998, Benzerlik skoru: 0.02
Resume ID: 11480899, Benzerlik skoru: 0.02
Resume ID: 23155093, Benzerlik skoru: 0.00
Resume ID: 11763983, Benzerlik skoru: 0.01
Resume ID: 27490876, Benzerlik skoru: 0.01
Resume ID: 32977530, Benzerlik skoru: 0.01
Resume ID: 93002334, Benzerlik skoru: 0.02
Resume ID: 24184357, Benzerlik skoru: 0.08
Resume ID: 73077810, Benzerlik skoru: 0.02
Resume ID: 13879043, Benzerlik skoru: 0.03
Resume ID: 30163002, Benzerlik skoru: 0.00
Resume ID: 18827609, Benzerlik skoru: 0.01
Resume ID: 25676643, Benzerlik skoru: 0.02
Resume ID: 