In [1]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/tech-skills-onet/TECH_SKILLS.csv


In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

df = pd.read_csv("/kaggle/input/tech-skills-onet/TECH_SKILLS.csv")

X_train = df[['title', 'commodity_title', 'tech_skill']]
y_train = df['O*NET-SOC Code']

# Tokenize words
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train['tech_skill'])
total_words = len(tokenizer.word_index) + 1

# Convert words to sequences
sequences = tokenizer.texts_to_sequences(X_train['tech_skill'])
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, padding='post')

# Convert job profiles to one-hot encoding
labels = pd.get_dummies(y_train)

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=47)

# Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=100))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(64)))
model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dense(len(labels.columns), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

model.fit(X_train, y_train, epochs=25, batch_size=32, validation_split=0.01)

2024-03-02 04:14:32.248800: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-02 04:14:32.248903: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-02 04:14:32.416538: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Epoch 1/25
[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 11ms/step - accuracy: 0.0093 - loss: 6.4454 - val_accuracy: 0.0298 - val_loss: 6.2195
Epoch 2/25
[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 0.0256 - loss: 5.7689 - val_accuracy: 0.0298 - val_loss: 5.9839
Epoch 3/25
[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 0.0752 - loss: 5.1822 - val_accuracy: 0.0426 - val_loss: 5.8993
Epoch 4/25
[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.1388 - loss: 4.6698 - val_accuracy: 0.0383 - val_loss: 5.9639
Epoch 5/25
[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.1803 - loss: 4.3571 - val_accuracy: 0.0511 - val_loss: 6.0658
Epoch 6/25
[1m727/727[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.2140 - loss: 4.1093 - val_accuracy: 0.0426 - val_loss: 6.5062
Epoch 7/25
[1m727/72

In [1]:
new_data = ['Pyhton Numpy Pandas Sklearn and Scikit-learn ML Algorithms C HTML/CSS/JS C++ Java SQL Data Structure and Algorithms ReactJS NodeJS MERN Stack Git/Github Communication Skills Problem Solving VS Code ACHIEVEMENTS Google KickStart Best Rank: 1507th Awarded for Academic Excellence Certificate for excellent academic record COMPETITIVE CODING PROFILES Codeforces Max Rating: 1408 (Specialist) | Solved 550+ Problems CodeChef 3 Star Rated | Rating: 1658 Hacker Rank 5 star (Gold Badge) in Problem Solving Leetcode Rating: 1641 , Top 20% LANGUAGES English/Hindi/Gujarati Professional Working Proficiency INTERESTS Learning Tech Analytics AI/ML/DL WebDev Competitive Coding']

new_sequences = tokenizer.texts_to_sequences(new_data)
new_padded_sequences = pad_sequences(new_sequences, maxlen=max_sequence_length, padding='post')
predictions = model.predict(new_padded_sequences)

# Convert predictions to class labels
predicted_labels = [qnet_to_title[labels.columns[np.argmax(prediction)]] for prediction in predictions]

print("Predicted Job Profiles:", predicted_labels)

NameError: name 'tokenizer' is not defined