In [32]:
import pandas as pd
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Activation, Embedding, Bidirectional,SimpleRNN

# Download the necessary datasets
nltk.download('punkt')  # For word_tokenize
nltk.download('stopwords')  # For stopwords
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /home/doaa/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/doaa/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/doaa/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
df = pd.read_csv('/home/doaa/programming/ML/DEPI/twitter_training.csv')

df

Unnamed: 0,2401,Borderlands,Positive,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...
...,...,...,...,...
74676,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74677,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74678,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74679,9200,Nvidia,Positive,Just realized between the windows partition of...


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74681 entries, 0 to 74680
Data columns (total 2 columns):
 #   Column                                                 Non-Null Count  Dtype 
---  ------                                                 --------------  ----- 
 0   Positive                                               74681 non-null  object
 1   im getting on borderlands and i will murder you all ,  73995 non-null  object
dtypes: object(2)
memory usage: 1.1+ MB


In [10]:
df['Positive'].value_counts()

Positive
Negative      22542
Positive      20831
Neutral       18318
Irrelevant    12990
Name: count, dtype: int64

In [11]:
df.drop(columns=['Borderlands', '2401'], inplace=True)

In [13]:
target = df['Positive']
text = df['im getting on borderlands and i will murder you all ,']
text

0        I am coming to the borders and I will kill you...
1        im getting on borderlands and i will kill you ...
2        im coming on borderlands and i will murder you...
3        im getting on borderlands 2 and i will murder ...
4        im getting into borderlands and i can murder y...
                               ...                        
74676    Just realized that the Windows partition of my...
74677    Just realized that my Mac window partition is ...
74678    Just realized the windows partition of my Mac ...
74679    Just realized between the windows partition of...
74680    Just like the windows partition of my Mac is l...
Name: im getting on borderlands and i will murder you all ,, Length: 74681, dtype: object

In [15]:
df.isnull().sum()

Positive                                                   0
im getting on borderlands and i will murder you all ,    686
dtype: int64

In [16]:
df.dropna(inplace=True)
df.isnull().sum()

Positive                                                 0
im getting on borderlands and i will murder you all ,    0
dtype: int64

In [17]:
df.columns=['labels','text']
df

Unnamed: 0,labels,text
0,Positive,I am coming to the borders and I will kill you...
1,Positive,im getting on borderlands and i will kill you ...
2,Positive,im coming on borderlands and i will murder you...
3,Positive,im getting on borderlands 2 and i will murder ...
4,Positive,im getting into borderlands and i can murder y...
...,...,...
74676,Positive,Just realized that the Windows partition of my...
74677,Positive,Just realized that my Mac window partition is ...
74678,Positive,Just realized the windows partition of my Mac ...
74679,Positive,Just realized between the windows partition of...


In [19]:
df['labels'].value_counts()

labels
Negative      22358
Positive      20654
Neutral       18108
Irrelevant    12875
Name: count, dtype: int64

In [20]:
def clean(text):
    sms = re.sub('[^a-zA-Z]', ' ', text)
    sms = sms.lower()
    sms = sms.split()
    sms = ' '.join(sms)
    return sms

df['clean_Text'] = df['text'].apply(clean)
df

Unnamed: 0,labels,text,clean_Text
0,Positive,I am coming to the borders and I will kill you...,i am coming to the borders and i will kill you...
1,Positive,im getting on borderlands and i will kill you ...,im getting on borderlands and i will kill you all
2,Positive,im coming on borderlands and i will murder you...,im coming on borderlands and i will murder you...
3,Positive,im getting on borderlands 2 and i will murder ...,im getting on borderlands and i will murder yo...
4,Positive,im getting into borderlands and i can murder y...,im getting into borderlands and i can murder y...
...,...,...,...
74676,Positive,Just realized that the Windows partition of my...,just realized that the windows partition of my...
74677,Positive,Just realized that my Mac window partition is ...,just realized that my mac window partition is ...
74678,Positive,Just realized the windows partition of my Mac ...,just realized the windows partition of my mac ...
74679,Positive,Just realized between the windows partition of...,just realized between the windows partition of...


In [21]:
nltk.download('punkt_tab')
nltk.download('stopwords')

df['token_Text'] = df.apply(lambda row: nltk.word_tokenize(row['clean_Text']),axis= 1)

df

[nltk_data] Downloading package punkt_tab to /home/doaa/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /home/doaa/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,labels,text,clean_Text,token_Text
0,Positive,I am coming to the borders and I will kill you...,i am coming to the borders and i will kill you...,"[i, am, coming, to, the, borders, and, i, will..."
1,Positive,im getting on borderlands and i will kill you ...,im getting on borderlands and i will kill you all,"[im, getting, on, borderlands, and, i, will, k..."
2,Positive,im coming on borderlands and i will murder you...,im coming on borderlands and i will murder you...,"[im, coming, on, borderlands, and, i, will, mu..."
3,Positive,im getting on borderlands 2 and i will murder ...,im getting on borderlands and i will murder yo...,"[im, getting, on, borderlands, and, i, will, m..."
4,Positive,im getting into borderlands and i can murder y...,im getting into borderlands and i can murder y...,"[im, getting, into, borderlands, and, i, can, ..."
...,...,...,...,...
74676,Positive,Just realized that the Windows partition of my...,just realized that the windows partition of my...,"[just, realized, that, the, windows, partition..."
74677,Positive,Just realized that my Mac window partition is ...,just realized that my mac window partition is ...,"[just, realized, that, my, mac, window, partit..."
74678,Positive,Just realized the windows partition of my Mac ...,just realized the windows partition of my mac ...,"[just, realized, the, windows, partition, of, ..."
74679,Positive,Just realized between the windows partition of...,just realized between the windows partition of...,"[just, realized, between, the, windows, partit..."


In [22]:
def remove_stopW(text):
    stop_word = set(stopwords.words("english"))
    filtering = [word for word in text if word not in stop_word]
    return filtering

df['RSW_Text'] = df['token_Text'].apply(remove_stopW)
df

Unnamed: 0,labels,text,clean_Text,token_Text,RSW_Text
0,Positive,I am coming to the borders and I will kill you...,i am coming to the borders and i will kill you...,"[i, am, coming, to, the, borders, and, i, will...","[coming, borders, kill]"
1,Positive,im getting on borderlands and i will kill you ...,im getting on borderlands and i will kill you all,"[im, getting, on, borderlands, and, i, will, k...","[im, getting, borderlands, kill]"
2,Positive,im coming on borderlands and i will murder you...,im coming on borderlands and i will murder you...,"[im, coming, on, borderlands, and, i, will, mu...","[im, coming, borderlands, murder]"
3,Positive,im getting on borderlands 2 and i will murder ...,im getting on borderlands and i will murder yo...,"[im, getting, on, borderlands, and, i, will, m...","[im, getting, borderlands, murder]"
4,Positive,im getting into borderlands and i can murder y...,im getting into borderlands and i can murder y...,"[im, getting, into, borderlands, and, i, can, ...","[im, getting, borderlands, murder]"
...,...,...,...,...,...
74676,Positive,Just realized that the Windows partition of my...,just realized that the windows partition of my...,"[just, realized, that, the, windows, partition...","[realized, windows, partition, mac, like, year..."
74677,Positive,Just realized that my Mac window partition is ...,just realized that my mac window partition is ...,"[just, realized, that, my, mac, window, partit...","[realized, mac, window, partition, years, behi..."
74678,Positive,Just realized the windows partition of my Mac ...,just realized the windows partition of my mac ...,"[just, realized, the, windows, partition, of, ...","[realized, windows, partition, mac, years, beh..."
74679,Positive,Just realized between the windows partition of...,just realized between the windows partition of...,"[just, realized, between, the, windows, partit...","[realized, windows, partition, mac, like, year..."


In [24]:
lemmatizer = WordNetLemmatizer()

def wordLemmatize(text):
    lemma = [lemmatizer.lemmatize(word,pos='v') for word in text]
    return lemma

df['lemmaText'] = df['RSW_Text'].apply(wordLemmatize)

df

Unnamed: 0,labels,text,clean_Text,token_Text,RSW_Text,lemmaText
0,Positive,I am coming to the borders and I will kill you...,i am coming to the borders and i will kill you...,"[i, am, coming, to, the, borders, and, i, will...","[coming, borders, kill]","[come, border, kill]"
1,Positive,im getting on borderlands and i will kill you ...,im getting on borderlands and i will kill you all,"[im, getting, on, borderlands, and, i, will, k...","[im, getting, borderlands, kill]","[im, get, borderlands, kill]"
2,Positive,im coming on borderlands and i will murder you...,im coming on borderlands and i will murder you...,"[im, coming, on, borderlands, and, i, will, mu...","[im, coming, borderlands, murder]","[im, come, borderlands, murder]"
3,Positive,im getting on borderlands 2 and i will murder ...,im getting on borderlands and i will murder yo...,"[im, getting, on, borderlands, and, i, will, m...","[im, getting, borderlands, murder]","[im, get, borderlands, murder]"
4,Positive,im getting into borderlands and i can murder y...,im getting into borderlands and i can murder y...,"[im, getting, into, borderlands, and, i, can, ...","[im, getting, borderlands, murder]","[im, get, borderlands, murder]"
...,...,...,...,...,...,...
74676,Positive,Just realized that the Windows partition of my...,just realized that the windows partition of my...,"[just, realized, that, the, windows, partition...","[realized, windows, partition, mac, like, year...","[realize, windows, partition, mac, like, years..."
74677,Positive,Just realized that my Mac window partition is ...,just realized that my mac window partition is ...,"[just, realized, that, my, mac, window, partit...","[realized, mac, window, partition, years, behi...","[realize, mac, window, partition, years, behin..."
74678,Positive,Just realized the windows partition of my Mac ...,just realized the windows partition of my mac ...,"[just, realized, the, windows, partition, of, ...","[realized, windows, partition, mac, years, beh...","[realize, windows, partition, mac, years, behi..."
74679,Positive,Just realized between the windows partition of...,just realized between the windows partition of...,"[just, realized, between, the, windows, partit...","[realized, windows, partition, mac, like, year...","[realize, windows, partition, mac, like, years..."


In [26]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['lemmaText'])

In [28]:
sequence = tokenizer.texts_to_sequences(df['lemmaText'])
word_index = tokenizer.word_index
pad_sequence = pad_sequences(sequence, padding='post')

In [30]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["labels"] = label_encoder.fit_transform(df["labels"])
df

Unnamed: 0,labels,text,clean_Text,token_Text,RSW_Text,lemmaText
0,3,I am coming to the borders and I will kill you...,i am coming to the borders and i will kill you...,"[i, am, coming, to, the, borders, and, i, will...","[coming, borders, kill]","[come, border, kill]"
1,3,im getting on borderlands and i will kill you ...,im getting on borderlands and i will kill you all,"[im, getting, on, borderlands, and, i, will, k...","[im, getting, borderlands, kill]","[im, get, borderlands, kill]"
2,3,im coming on borderlands and i will murder you...,im coming on borderlands and i will murder you...,"[im, coming, on, borderlands, and, i, will, mu...","[im, coming, borderlands, murder]","[im, come, borderlands, murder]"
3,3,im getting on borderlands 2 and i will murder ...,im getting on borderlands and i will murder yo...,"[im, getting, on, borderlands, and, i, will, m...","[im, getting, borderlands, murder]","[im, get, borderlands, murder]"
4,3,im getting into borderlands and i can murder y...,im getting into borderlands and i can murder y...,"[im, getting, into, borderlands, and, i, can, ...","[im, getting, borderlands, murder]","[im, get, borderlands, murder]"
...,...,...,...,...,...,...
74676,3,Just realized that the Windows partition of my...,just realized that the windows partition of my...,"[just, realized, that, the, windows, partition...","[realized, windows, partition, mac, like, year...","[realize, windows, partition, mac, like, years..."
74677,3,Just realized that my Mac window partition is ...,just realized that my mac window partition is ...,"[just, realized, that, my, mac, window, partit...","[realized, mac, window, partition, years, behi...","[realize, mac, window, partition, years, behin..."
74678,3,Just realized the windows partition of my Mac ...,just realized the windows partition of my mac ...,"[just, realized, the, windows, partition, of, ...","[realized, windows, partition, mac, years, beh...","[realize, windows, partition, mac, years, behi..."
74679,3,Just realized between the windows partition of...,just realized between the windows partition of...,"[just, realized, between, the, windows, partit...","[realized, windows, partition, mac, like, year...","[realize, windows, partition, mac, like, years..."


In [None]:
X_train, X_test, y_train, y_test = train_test_split(pad_sequence, df["labels"], test_size=0.2, random_state=42)



In [33]:

model = Sequential()
model.add(Embedding(20000,64 ))
model.add(Dropout(0.2))
model.add(SimpleRNN(units = 64))
model.add(Dropout(0.2))
model.add(Dense(6,activation='softmax'))

model.summary()

2025-03-19 22:50:05.416743: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 64)          1280000   
                                                                 
 dropout (Dropout)           (None, None, 64)          0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 6)                 390       
                                                                 
Total params: 1288646 (4.92 MB)
Trainable params: 1288646 (4.92 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [38]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=10, verbose=1)