# Text Classification Neural Network with PyTorch

In [67]:
import pandas as pd 
import numpy as np

In [68]:
df=pd.read_csv(r"C:\Users\vivek\Data science\Deep learning\1_foduu_contacts_message.csv")
df.head(10)



Unnamed: 0,message,labels
0,"Hello,\n\nI Am Here For Providing Cheap and Pr...",spam
1,"Hello,\n\nI Am Here For Providing Cheap and Pr...",spam
2,"Hi,\n \nGet more leads for your business. Pay ...",spam
3,Hi \n\t\nI went through your website and we wo...,enquiry
4,sefgers,
5,demo enquiry,enquiry
6,demo enquiry,enquiry
7,fsddfsdfsdfsda,
8,qeqeqqeqe,
9,wqsqdqdqd,


In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1760 entries, 0 to 1759
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   message  1741 non-null   object
 1   labels   105 non-null    object
dtypes: object(2)
memory usage: 27.6+ KB


In [70]:
df.isna().sum()

message      19
labels     1655
dtype: int64

In [71]:
df.dropna(inplace=True)

In [72]:
df.duplicated().sum()

np.int64(2)

In [73]:
df[df.duplicated()]

Unnamed: 0,message,labels
1,"Hello,\n\nI Am Here For Providing Cheap and Pr...",spam
6,demo enquiry,enquiry


In [74]:
df.drop_duplicates(inplace=True)

In [75]:
check_df=df

In [76]:
# label encoding

from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()
df['label_encoded']=le.fit_transform(df['labels'])

print(le.classes_)
df.head()


['enquiry' 'job' 'spam']


Unnamed: 0,message,labels,label_encoded
0,"Hello,\n\nI Am Here For Providing Cheap and Pr...",spam,2
2,"Hi,\n \nGet more leads for your business. Pay ...",spam,2
3,Hi \n\t\nI went through your website and we wo...,enquiry,0
5,demo enquiry,enquiry,0
19,- for www.hpills.com-\n\nOpen cart\n\nThere ar...,enquiry,0


In [77]:
df['labels'].value_counts()

labels
enquiry    58
job        26
spam       19
Name: count, dtype: int64

In [78]:
# Convert text to numerical vector

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer=TfidfVectorizer(max_features=500)
x=vectorizer.fit_transform(df['message']).toarray()
y=df['label_encoded']

In [79]:
x

array([[0.        , 0.        , 0.        , ..., 0.14672734, 0.        ,
        0.        ],
       [0.        , 0.23243237, 0.23243237, ..., 0.15795752, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.36669898, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.08770386, 0.        ,
        0.        ],
       [0.04110025, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.05889119, 0.        ,
        0.        ]])

In [80]:
# PyTorch Dataset
import torch
from torch.utils.data import DataLoader, Dataset

class text_dataset(Dataset):
    def __init__(self,x,y):
        self.x=torch.tensor(x,dtype=torch.float32)
        self.y=torch.tensor(y.values , dtype=torch.long)
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.x[idx] , self.y[idx]
    
dataset=text_dataset(x,y)
dataloader=DataLoader(dataset,batch_size=16, shuffle=True)


In [81]:
# Neural Network Model Design

import torch.nn as nn

class text_classifier(nn.Module):
    def __init__(self, input_size , hidden_size , num_clases):
        super(text_classifier,self).__init__()
        self.fc1=nn.Linear(input_size,hidden_size)
        self.relu=nn.ReLU()
        self.fc2=nn.Linear(hidden_size,num_clases)

    def forward(self , x):
        out=self.fc1(x)
        out=self.relu(out)
        out=self.fc2(out)

        return out

model=text_classifier(input_size=500 , hidden_size=128 , num_clases=3)


In [82]:
# Loss + Optimizer + Train Loop

import torch.optim.adam


criterion =nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.01)

num_epochs=20

for epoch in range(num_epochs):
    for batch_x , batch_y in dataloader:
        outputs=model(batch_x)
        loss=criterion(outputs,batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1)%5==0:
        print(f'Epoch : [{epoch+1}/{num_epochs}] , Loss : {loss.item():.4f}')

Epoch : [5/20] , Loss : 0.0064
Epoch : [10/20] , Loss : 0.0002
Epoch : [15/20] , Loss : 0.0014
Epoch : [20/20] , Loss : 0.0006


In [83]:
#  Predict & Evaluate

with torch.no_grad():
    preds=model(torch.tensor(x,dtype=torch.float32))
    predicted=torch.argmax(preds,dim=1)
    accuracy = (predicted==torch.tensor(y.values)).float().mean()
    print(f'Accuracy : {accuracy.item():.4f}')

Accuracy : 1.0000


In [84]:
# Decode Labels

decoded_labels=le.inverse_transform(predicted.numpy())
print(decoded_labels[:10])

['spam' 'spam' 'enquiry' 'enquiry' 'enquiry' 'enquiry' 'enquiry' 'spam'
 'spam' 'spam']


# Check Model Works

In [88]:
def predict_message(msg):
    # 1. convert Message to TF-IDF vector
    msg_vector = vectorizer.transform([msg])
    
    # 2. Convert in Tensor
    msg_tensor = torch.tensor(msg_vector.toarray(), dtype=torch.float32)

    # 3. Take prediction to model
    with torch.no_grad():
        preds = model(msg_tensor)
        predicted_index = torch.argmax(preds, dim=1).item() 

    # 4. Label decode
    label = le.inverse_transform([predicted_index])[0]

    return label


In [99]:

for message in check_df['message']:
    print(f'{message}')
    print('----------------------')
    print(f'Prediction: {predict_message(message)}')
    print('----------------------')


Hello,

I Am Here For Providing Cheap and Premium Paid Guest Poston High-Quality Websites.

Why should you choose my service?

- Your Post Will Be Published on time.

- You Will Get Do-Follow Backlinks.

- Your Post Will Be Permanent.

- Your Post Will Be Google Indexed.

- Provide unique content if needed.


Here are the sample Sites:

https://www.techtimes.com DA 84
https://www.sciencetimes.com DA 65
https://widetopics.world.edu DA 73
https://techbeloved.com DA 64
https://roseatehouselondon.com DA 39

If you are interested then let me know if I can share our full list of sites.
----------------------
Prediction: spam
----------------------
Hi,
 
Get more leads for your business. Pay us per lead. We generate leads for all types of business. Just reply us with your needs and business details. We will give the best leads.

Let me know if we could have a quick demo on the same or schedule a meeting at you’re office?

Start growing your business today.

Thanks & Regards,
Crystal Rose 
Sr.