In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [3]:
# load data from excel 
def data_from_excel(file_path):
    df = pd.read_excel(file_path) # read excel file
    text = df["text"].tolist() # extract from colom
    label = df["label"].tolist()

    return text , label

In [4]:
file_path = r"T:\My Drive\000 ihsan\02 Courses\003GIT\Neural Networks and Deep Learning\10.Artificial Neural Networks with Keras\data.xlsx"
# Load data
text,label = data_from_excel(file_path)
text,label

(['Remuneration program',
  'Inter net bill',
  'Refreshment MIlk,Snaks&Nuts',
  'petrol for grass cutting'],
 ['GVAC Academic Program expense',
  'GVAC Mobile and internet expenses',
  'GVAC Refreshment Expense',
  'GVIC Cleaning Expenses'])

In [5]:
label_encoder = LabelEncoder() # converts categorical labels (text labels) into numerical labels
y = torch.tensor(label_encoder.fit_transform(label),dtype=torch.long) # Converts the numerical labels into a PyTorch tensor with long data type , classification tasks in PyTorch

vectorizer  = TfidfVectorizer() # convert text into numerical features using Term Frequency-Inverse Document Frequency (TF-IDF).
x = torch.tensor(vectorizer.fit_transform(text).toarray(),dtype=torch.float32)

In [6]:
x # TF-IDF transformed text data (X) serves as input features for a machine learning or deep learning model.

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.7071, 0.0000, 0.7071, 0.0000],
        [0.5774, 0.0000, 0.0000, 0.0000, 0.5774, 0.0000, 0.5774, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5000, 0.0000, 0.5000, 0.0000,
         0.0000, 0.5000, 0.0000, 0.5000],
        [0.0000, 0.5000, 0.5000, 0.5000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5000,
         0.0000, 0.0000, 0.0000, 0.0000]])

In [7]:
# train test split
x_train,y_train,x_test,y_test = train_test_split(x,y,test_size = 0.2, random_state = 42)