In [None]:
"""
we are loading the preprocessed data and parsed resume data and combining them into a single dataset. 
We are then splitting the dataset into training and testing sets using the train_test_split function from scikit-learn.
We are vectorizing the text data using the TfidfVectorizer class from scikit-learn.
TfidfVectorizer converts text into numerical features that can be used in machine learning algorithms.
We are then training three machine learning models - Logistic Regression,
Support Vector Machine, and Random Forest Classifier - using the training data. 
We are then predicting the labels for the test data and calculating the accuracy of each model using the accuracy_score function from scikit-learn.
"""
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the preprocessed data and parsed resume data
preprocessed_data = pd.read_csv('preprocessed_data.csv')
parsed_resume_data = pd.read_csv('parsed_resume_data.csv')

# Combine the preprocessed data and parsed resume data
data = pd.concat([preprocessed_data, parsed_resume_data], axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.drop('Label', axis=1), data['Label'], test_size=0.2, random_state=42)

# Vectorize the data using TfidfVectorizer
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train['Text'])
X_test = vectorizer.transform(X_test['Text'])

# Train the logistic regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Logistic Regression Accuracy:', accuracy)

# Train the support vector machine model
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Support Vector Machine Accuracy:', accuracy)

# Train the random forest classifier model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Random Forest Classifier Accuracy:', accuracy)


