#IMPORTING LIBRARIES

In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import warnings


#READ DATASET

In [None]:
# Load career data - Adjust column names as per your actual CSV
career = pd.read_csv('/content/temp_career.csv')

#CHECK FOR NLTK

In [None]:
# Suppress warnings for NLTK download
warnings.filterwarnings("ignore")
# Download NLTK resources if not already downloaded
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

True

#ANALYZING DATASET

In [None]:
print("\nFeatures (columns) in the dataset:\n\n")
for column in career.columns:
    print( 'o' ,column,'\n')



Features (columns) in the dataset:


o What is your name? 

o What is your gender? 

o What was your course in UG? 

o What is your UG specialization? Major Subject (Eg; Mathematics) 

o What are your interests? 

o What are your skills ? (Select multiple if necessary) 

o What was the average CGPA or Percentage obtained in under graduation? 

o Did you do any certification courses additionally? 

o If yes, please specify your certificate course title. 

o Are you working? 

o If yes, then what is/was your first Job title in your current field of work? If not applicable, write NA.                

o Have you done masters after undergraduation? If yes, mention your field of masters.(Eg; Masters in Mathematics) 

o Recommended Career 



In [None]:
print("First few rows of the dataset:\n\n\n")
print(career.head(2))
print('\n\n\n')

First few rows of the dataset:



  What is your name? What is your gender? What was your course in UG?  \
0        A.Uha Priya               Female                        B.Sc   
1              Aadil                 Male                         B.E   

  What is your UG specialization? Major Subject (Eg; Mathematics)  \
0                              Computer Applications                
1                       Computer Science Engineering                

  What are your interests?  \
0          Cloud computing   
1               Technology   

  What are your skills ? (Select multiple if necessary)  \
0                                    Python;SQL;Java      
1  Critical Thinking, Analytic Thinking, SQL, Pro...      

   What was the average CGPA or Percentage obtained in under graduation?  \
0                                               85.0                       
1                                               66.5                       

  Did you do any certification courses a

In [None]:
print("\nLast few rows of the dataset: \n\n\n")
print(career.tail(2))
print('\n\n\n')


Last few rows of the dataset: 



   What is your name? What is your gender? What was your course in UG?  \
47       Ajit Cherian                 Male                      B.Tech   
48         Ajit Kumar                 Male                      B.Tech   

   What is your UG specialization? Major Subject (Eg; Mathematics)  \
47             Electrical and Electronics Engineering                
48                       Computer Science Engineering                

             What are your interests?  \
47  Information Technology,Blockchain   
48                       Software Job   

   What are your skills ? (Select multiple if necessary)  \
47  \nDocker;Kubernetes;Amazon Web Services (AWS);...      
48                  Full stack, java, python, reactjs      

    What was the average CGPA or Percentage obtained in under graduation?  \
47                                              74.67                       
48                                              74.10                   

In [None]:

print("\nSummary of the dataset:\n\n\n")
summary = career.describe()
print(summary)
print('\n\n\n')


Summary of the dataset:



       What was the average CGPA or Percentage obtained in under graduation?
count                                          49.000000                    
mean                                           72.545510                    
std                                             8.364305                    
min                                            51.300000                    
25%                                            69.000000                    
50%                                            73.150000                    
75%                                            76.000000                    
max                                            92.000000                    






In [None]:
total_records = career.shape[0]          #rows
number_of_features = career.shape[1]     #columns

print("\nTotal Records:    ", total_records)
print("Number of Features: ", number_of_features)
print('\n\n\n')


Total Records:     49
Number of Features:  13






In [None]:

print("\nData types of each column:")
print(career.dtypes)
print('\n\n\n')


Data types of each column:
What is your name?                                                                                                           object
What is your gender?                                                                                                         object
What was your course in UG?                                                                                                  object
What is your UG specialization? Major Subject (Eg; Mathematics)                                                              object
What are your interests?                                                                                                     object
What are your skills ? (Select multiple if necessary)                                                                        object
What was the average CGPA or Percentage obtained in under graduation?                                                       float64
Did you do any certification courses additionall

In [None]:

print("\n basic info of the dataset:\n\n\n")
print(career.info())
print('\n\n\n')


 basic info of the dataset:



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 13 columns):
 #   Column                                                                                                                    Non-Null Count  Dtype  
---  ------                                                                                                                    --------------  -----  
 0   What is your name?                                                                                                        49 non-null     object 
 1   What is your gender?                                                                                                      49 non-null     object 
 2   What was your course in UG?                                                                                               49 non-null     object 
 3   What is your UG specialization? Major Subject (Eg; Mathematics)                                          

#APPLYING PREPROCESSING

In [None]:
# Checking for missing values
print("Missing values in the dataset:\n")
res = career.isnull().sum()
print(res)
print('\n\n\n')

Missing values in the dataset:

What is your name?                                                                                                           0
What is your gender?                                                                                                         0
What was your course in UG?                                                                                                  0
What is your UG specialization? Major Subject (Eg; Mathematics)                                                              0
What are your interests?                                                                                                     0
What are your skills ? (Select multiple if necessary)                                                                        0
What was the average CGPA or Percentage obtained in under graduation?                                                        0
Did you do any certification courses additionally?                             

In [None]:
# Checking for duplicates
print("Number of duplicate rows in the dataset:\n")
duplicates = career.duplicated().sum()
print(duplicates)
print('\n\n\n')

Number of duplicate rows in the dataset:

0






In [None]:
# Removing duplicates
career = career.drop_duplicates()
print("Dataset after removing duplicates:\n")
print(career.head())
print('\n\n\n')

Dataset after removing duplicates:

  What is your name? What is your gender? What was your course in UG?  \
0        A.Uha Priya               Female                        B.Sc   
1              Aadil                 Male                         B.E   
2            Aakriti               Female                          BA   
3     Aanchal sharma               Female                         MBA   
4   Aangkeeta Sarkar               Female                      B.Tech   

  What is your UG specialization? Major Subject (Eg; Mathematics)  \
0                              Computer Applications                
1                       Computer Science Engineering                
2                                         Psychology                
3                                           Commerce                
4                        Instrumentation Engineering                

                            What are your interests?  \
0                                    Cloud computing  

In [None]:
# Function to preprocess skills
def preprocess_skills(skills):
    tokens = word_tokenize(skills.lower())  # Tokenize and convert to lowercase
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    return ' '.join(filtered_tokens)  # Return preprocessed skills as a string


In [None]:
X.shape

(49, 12)

In [None]:
y.shape

(49,)

#RECOMMENDING CAREER FUNCTION

In [None]:
# Function to recommend careers based on user skills using Logistic Regression
def recommend_careers(user_skills):
    # Preprocess user skills
    user_skills_processed = preprocess_skills(user_skills)

    # Extract skills from career data
    all_skills = career['What are your skills ? (Select multiple if necessary)'].apply(preprocess_skills)

    # Use CountVectorizer to create a feature matrix of skills
    vectorizer = CountVectorizer(binary=True)
    X = vectorizer.fit_transform(all_skills)

    # Target variable
    y = career['Recommended Career']

    # Train Logistic Regression model
    classifier = LogisticRegression(max_iter=1000)
    classifier.fit(X, y)

    # Predict for user skills
    user_skills_vector = vectorizer.transform([user_skills_processed])
    predictions = classifier.predict(user_skills_vector)

    return predictions


#CALCULATING ACCURACY FUNCTION

In [None]:
# Function to calculate accuracy of different models
def calculate_accuracy(X, y, model):
    # Train the model
    model.fit(X, y)

    # Predictions
    y_pred = model.predict(X)

    # Calculate accuracy
    acc = accuracy_score(y, y_pred)

    # Create a 2x2 confusion matrix
    cm = confusion_matrix(y, y_pred)
    cm_simple = [[cm[0][0] + cm[1][1], cm[0][1] + cm[1][0]], [0, 0]]

    # Plot simple confusion matrix using Seaborn
    plt.figure(figsize=(4, 4))
    sns.heatmap(cm_simple, annot=True, fmt='d', cmap='Blues', xticklabels=['Positive', 'Negative'], yticklabels=['Positive', 'Negative'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

    return acc, y_pred


#MAIN FUNCTION

In [None]:
# Interactive input example
def main():
    print("Please select an option:")
    print("1. Find accuracy of the model")
    print("2. Guide a career")

    option = input("Enter your choice (1 or 2): ")

    if option == '1':
        print("Please select a classifier:")
        print("1. Logistic Regression")
        print("2. Support Vector Machine (SVM)")
        print("3. Random Forest")
        print("4. Decision Tree")

        classifier_option = input("Enter your choice (1, 2, 3, or 4): ")

        # Extract skills from career data
        all_skills = career['What are your skills ? (Select multiple if necessary)'].apply(preprocess_skills)

        # Use CountVectorizer to create a feature matrix of skills
        vectorizer = CountVectorizer(binary=True)
        X = vectorizer.fit_transform(all_skills)

        # Target variable
        y = career['Recommended Career']

        if classifier_option == '1':
            model = LogisticRegression(max_iter=1000)
        elif classifier_option == '2':
            model = SVC()
        elif classifier_option == '3':
            model = RandomForestClassifier()
        elif classifier_option == '4':
            model = DecisionTreeClassifier()
        else:
            print("Invalid choice. Please try again.")
            return

        accuracy, y_pred = calculate_accuracy(X, y, model)
        print(f"Accuracy of the model: {accuracy}")

    elif option == '2':
        print("Please enter your information:")
        name = input("Name: ")
        gender = input("Gender: ")
        course = input("Course in UG: ")
        specialization = input("Specialization: ")
        interests = input("Interests: ")
        skills = input("Skills (separated by ';'): ")  # Adjust delimiter as per your input format
        grades = input("Average CGPA or Percentage: ")
        any_cert_courses = input("Did you do any certification courses additionally? (Yes/No): ")
        if any_cert_courses.lower() == 'yes':
            cert_course_title = input("Certificate course title: ")
        else:
            cert_course_title = "NA"
        working = input("Are you working? (Yes/No): ")
        if working.lower() == 'yes':
            job_title = input("First job title in current field: ")
        else:
            job_title = "NA"
        masters_done = input("Have you done masters after undergraduation? (Yes/No): ")
        if masters_done.lower() == 'yes':
            masters_desc = input("Field of masters: ")
        else:
            masters_desc = "NA"

        # Generate a user profile based on inputs
        user_profile = {
            'Name': name,
            'Gender': gender,
            'Course': course,
            'Specialization': specialization,
            'Interest': interests,
            'Skills': skills,
            'Grades': grades,
            'Any_Add_Cert_Courses': any_cert_courses,
            'Cert_Courses_Desc': cert_course_title,
            'Working?': working,
            'Job_Title': job_title,
            'Masters_Desc': masters_desc
        }

        # Predict careers based on user skills
        predictions = recommend_careers(user_profile['Skills'])

        if predictions:
            print(f"\nPredicted career based on your skills: {predictions[0]}")
        else:
            print("\nNo matching career found based on your skills.")

    else:
        print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()


Please select an option:
1. Find accuracy of the model
2. Guide a career
Enter your choice (1 or 2): 2
Please enter your information:
Name: sf
Gender: sf
Course in UG: sf
Specialization: sf
Interests: sf
Skills (separated by ';'): People management;Communication skills
Average CGPA or Percentage: sf
Did you do any certification courses additionally? (Yes/No): sf
Are you working? (Yes/No): sf
Have you done masters after undergraduation? (Yes/No): sf

Predicted career based on your skills: Human Resources Specialist
