In [1]:
import pandas as pd
import nltk
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [3]:
# Load the dataset
df = pd.read_csv('/content/UpdatedResumeDataSet.csv')  # Ensure Resume.csv is in your working directory
df = df[['Category', 'Resume']]  # Keep only necessary columns
df.dropna(inplace=True)  # Drop any rows with missing values


In [4]:
# Define text preprocessing function
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [5]:
def preprocess(text):
    text = text.lower()  # Lowercase
    tokens = nltk.word_tokenize(text)  # Tokenize
    # Remove punctuation, stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalpha() and word not in stop_words]
    return ' '.join(tokens)


In [6]:
# Apply preprocessing to resumes
df['Cleaned_Resume'] = df['Resume'].apply(preprocess)

In [7]:

# TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['Cleaned_Resume']).toarray()
y = df['Category']

In [8]:
# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Train Logistic Regression classifier
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [10]:
# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)



# Output results
print("Model Accuracy:", round(accuracy * 100, 2), "%")
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Model Accuracy: 99.48 %

Classification Report:

                           precision    recall  f1-score   support

                 Advocate       1.00      1.00      1.00         3
                     Arts       1.00      1.00      1.00         6
       Automation Testing       1.00      1.00      1.00         5
               Blockchain       1.00      1.00      1.00         7
         Business Analyst       1.00      1.00      1.00         4
           Civil Engineer       1.00      1.00      1.00         9
             Data Science       1.00      1.00      1.00         5
                 Database       1.00      1.00      1.00         8
          DevOps Engineer       1.00      0.93      0.96        14
         DotNet Developer       1.00      1.00      1.00         5
            ETL Developer       1.00      1.00      1.00         7
   Electrical Engineering       1.00      1.00      1.00         6
                       HR       1.00      1.00      1.00        12
            

In [11]:
# Function to preprocess a single resume input from the user
def predict_user_resume(text):
    # Preprocess using the same logic
    text = text.lower()
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalpha() and word not in stop_words]
    cleaned_text = ' '.join(tokens)
       # Transform with the same TF-IDF vectorizer
    vectorized_input = vectorizer.transform([cleaned_text]).toarray()
    # Predict category
    predicted_category = model.predict(vectorized_input)[0]
    return predicted_category


In [12]:
# Ask for user input
user_resume = input("Paste your resume text here:\n")
prediction = predict_user_resume(user_resume)

print("\nPredicted Job Category:", prediction)

Paste your resume text here:
sql

Predicted Job Category: DotNet Developer
