In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

df = pd.read_csv("data.csv")



In [2]:
X = df['interview_title']
y = df['offer_acceptance']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [3]:
def train_model(vectorizer, classifier):
    # Vectorize the text data
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    # Train the model
    classifier.fit(X_train_vec, y_train)

    # Make predictions on the testing data
    y_pred = classifier.predict(X_test_vec)

    # Calculate the accuracy score
    acc_score = accuracy_score(y_test, y_pred)

    return acc_score



In [4]:
# Multinomial Naive Bayes
nb = MultinomialNB()
nb_params = {'alpha': [0.1, 0.5, 1]}
nb_grid = GridSearchCV(nb, nb_params, cv=5)
nb_acc_score = train_model(CountVectorizer(), nb_grid)
print("Multinomial Naive Bayes Accuracy:", nb_acc_score)



Multinomial Naive Bayes Accuracy: 0.5811138014527845


In [5]:
# Logistic Regression
lr = LogisticRegression(max_iter=10000)
lr_params = {'C': [0.1, 1, 10]}
lr_grid = GridSearchCV(lr, lr_params, cv=5)
lr_acc_score = train_model(CountVectorizer(), lr_grid)
print("Logistic Regression Accuracy:", lr_acc_score)



Logistic Regression Accuracy: 0.5828433068142511


In [10]:
# Support Vector Machines (SVM)
svm = SVC()
svm_params = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
svm_grid = GridSearchCV(svm, svm_params, cv=5)
svm_acc_score = train_model(TfidfVectorizer(), svm_grid)
print("SVM Accuracy:", svm_acc_score)



SVM Accuracy: 0.5873400207540643


In [11]:
# Random Forest
rf = RandomForestClassifier()
rf_params = {'n_estimators': [100, 200, 300]}
rf_grid = GridSearchCV(rf, rf_params, cv=5)
rf_acc_score = train_model(CountVectorizer(), rf_grid)
print("Random Forest Accuracy:", rf_acc_score)



Random Forest Accuracy: 0.5728121757177447


In [12]:
# Train the final model
final_vectorizer = TfidfVectorizer()
final_classifier = SVC(C=10, kernel='linear')
X_vec = final_vectorizer.fit_transform(X)
final_classifier.fit(X_vec, y)



SVC(C=10, kernel='linear')

In [13]:
# Make predictions on new data
new_data = ['I had a great interview and received an offer!', 'Unfortunately, I did not receive an offer after my interview.']
new_data_vec = final_vectorizer.transform(new_data)
new_data_pred = final_classifier.predict(new_data_vec)
print(new_data_pred)

['No Offer' 'No Offer']
