In [None]:
try:
    import tensorflow as tf
    from tensorflow.python import keras
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    %matplotlib inline
    from sklearn.ensemble import VotingClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import confusion_matrix,classification_report
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import LabelEncoder
    print('Library Loaded .........')
except:
    print('One or More Library was not Found ! ')

In [2]:
df = pd.read_csv("career_pred.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
# pd.unique(df["Suggested Job Role"])
# Plot the distribution of the 'Suggested Job Role' column
plt.figure(figsize=(12, 6))
df['Suggested Job Role'].value_counts().plot(kind='bar')
plt.title('Distribution of Suggested Job Roles')
plt.xlabel('Job Role')
plt.ylabel('Frequency')
plt.show()

In [None]:
null_values = df.isnull().sum()
print(null_values)

In [None]:
for column in df.columns:
    unique_elements = df[column].unique()
    print(f"Unique elements in '{column}':")
    print(unique_elements)
    print("\n")

In [None]:
df.describe().T

In [None]:
object_columns = df.select_dtypes(include=['object'])

for column in object_columns.columns:
    print(f"Description of '{column}':")
    print(object_columns[column].describe())
    print("\n")

In [None]:
# List of columns to be label encoded
columns_to_encode = [
    'can work long time before system?', 'self-learning capability?', 'Extra-courses did',
    'talenttests taken?', 'olympiads', 'In a Realtionship?', 'Gentle or Tuff behaviour?',
    'Management or Technical', 'Salary/work', 'hard/smart worker', 'worked in teams ever?', 'Introvert'
]

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Apply label encoding to the specified columns
for column in columns_to_encode:
    df[column] = label_encoder.fit_transform(df[column])

# Display the first few rows to verify the changes
df.head()

In [None]:
# Define the target variable
y = df['Suggested Job Role']

# Define the feature variables
X = df.drop(columns=['Suggested Job Role'])

# Display the shapes of X and y to verify
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")


In [None]:
# List of columns to be label encoded
label_encode_columns = [
    'reading and writing skills', 'memory capability score', 'Job/Higher Studies?', 
    'Taken inputs from seniors or elders', 'interested in games', 'Salary Range Expected'
]

# List of columns to be one-hot encoded
one_hot_encode_columns = [
    'certifications', 'workshops', 'Interested subjects', 
    'Type of company want to settle in?', 'Interested Type of Books'
]

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Apply label encoding to the specified columns in X
for column in label_encode_columns:
    X[column] = label_encoder.fit_transform(X[column])

# Apply one-hot encoding to the specified columns in X
X = pd.get_dummies(X, columns=one_hot_encode_columns)

# Display the first few rows to verify the changes
X.head()

In [None]:
for column in X.columns:
  unique_elements = X[column].unique()
  print(f"Unique elements in '{column}':")
  print(unique_elements)
  print("\n")

In [15]:
# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the 'Suggested Job Role' column
numeric = label_encoder.fit_transform(df['Suggested Job Role'].unique())
Y = label_encoder.fit_transform(df['Suggested Job Role'])
# Create a new DataFrame with the original and encoded values
encoded_df = pd.DataFrame({'Suggested Job Role': df['Suggested Job Role'].unique(), 'Associated Number': numeric})

In [None]:
Y

In [None]:
# Sort the DataFrame by 'Associated Number'
encoded_df = encoded_df.sort_values(by='Associated Number')

# Display the first few rows of the new DataFrame
encoded_df

In [18]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Ensure all categorical variables are encoded
X_train = X_train.apply(LabelEncoder().fit_transform)
X_test = X_test.apply(LabelEncoder().fit_transform)

In [None]:
# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of RandomForest model: {accuracy * 100:.2f}%")

In [21]:
# Initialize the SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)

In [None]:
# Train the classifier with a subset of the training data
subset_size = int(0.1 * len(X_train))  # Use 10% of the training data
X_train_subset = X_train[:subset_size]
y_train_subset = y_train[:subset_size]

svm_classifier.fit(X_train_subset, y_train_subset)

In [None]:
# Make predictions
y_pred_svm = svm_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_svm)
print(f"Accuracy of SVM model: {accuracy * 100:.2f}%")

In [None]:
# Define the Sequential model
model = Sequential()

# Add layers to the model
model.add(Dense(128, input_dim=X.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model's architecture
model.summary()

In [25]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Fit the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

In [None]:
# Plot the accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [29]:
# Initialize the classifiers
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
svm_classifier = SVC(kernel='linear', probability=True, random_state=42)

In [30]:
# Create the ensemble model using VotingClassifier
ensemble_model = VotingClassifier(estimators=[
    ('rf', rf_classifier),
    ('svm', svm_classifier)
], voting='soft')

In [None]:
# Train the ensemble model
ensemble_model.fit(X_train, y_train)

# Make predictions
y_pred_ensemble = ensemble_model.predict(X_test)

In [None]:
# Evaluate the model
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
print(f"Accuracy of Ensemble model: {accuracy_ensemble * 100:.2f}%")