## **Atheism/Science Text Classifier**

In [None]:
from sklearn.linear_model import Perceptron
from sklearn.datasets import fetch_20newsgroups

categories = ['alt.atheism', 'sci.med']

train = fetch_20newsgroups(subset='train',categories=categories, shuffle=True)

clf = Perceptron(max_iter=100)

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X_train_counts = cv.fit_transform(train.data)

from sklearn.feature_extraction.text import TfidfTransformer
tfidf_tf = TfidfTransformer()
X_train_tfidf = tfidf_tf.fit_transform(X_train_counts)

clf.fit(X_train_tfidf,train.target)

test_docs = ['Religion is widespread, even in modern times',
            'His kidney failed','The pope is a controversial leader',
            'White blood cells fight off infections',
            'The reverend had a heart attack in church']

X_test_counts = cv.transform(test_docs)
X_test_tfidf = tfidf_tf.transform(X_test_counts)

pred = clf.predict(X_test_tfidf)

for doc, category in zip(test_docs, pred):
  print('%r => %s' % (doc, train.target_names[category]))

'Religion is widespread, even in modern times' => alt.atheism
'His kidney failed' => alt.atheism
'The pope is a controversial leader' => sci.med
'White blood cells fight off infections' => sci.med
'The reverend had a heart attack in church' => sci.med


In [None]:
test_docs = [
    "Doctors recommend regular exercise for a healthy heart.",
    "Atheism is often misunderstood by many religious groups.",
    "The patient was diagnosed with diabetes and high blood pressure.",
    "The debate between science and religion continues to this day.",
    "A new breakthrough in cancer treatment was announced by scientists.",
    "The church organized a fundraiser for the local community.",
    "Medical students must study anatomy extensively.",
    "Many religious texts discuss the origin of life.",
    "Vaccination rates are increasing across the globe.",
    "Spiritual leaders gathered for an interfaith dialogue."
]

X_test_counts = cv.transform(test_docs)
X_test_tfidf = tfidf_tf.transform(X_test_counts)

pred = clf.predict(X_test_tfidf)

for doc, category in zip(test_docs, pred):
    print('%r => %s' % (doc, train.target_names[category]))


'Doctors recommend regular exercise for a healthy heart.' => sci.med
'Atheism is often misunderstood by many religious groups.' => alt.atheism
'The patient was diagnosed with diabetes and high blood pressure.' => sci.med
'The debate between science and religion continues to this day.' => alt.atheism
'A new breakthrough in cancer treatment was announced by scientists.' => sci.med
'The church organized a fundraiser for the local community.' => alt.atheism
'Medical students must study anatomy extensively.' => sci.med
'Many religious texts discuss the origin of life.' => alt.atheism
'Vaccination rates are increasing across the globe.' => sci.med
'Spiritual leaders gathered for an interfaith dialogue.' => alt.atheism


## **Movie Sentiment Analyzer**

In [None]:
# 10 Layers using Sigmoid ACtication Func

from keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Dense, Activation

import pandas as pd
import sys
import tensorflow as tf



data = pd.read_csv("sentiment_dataset.tsv",sep='\t')

docs=data["text"]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(docs)

X_train = tokenizer.texts_to_matrix(docs, mode='binary')
y_train = to_categorical(data["label"])

input_dim = X_train.shape[1]
nb_classes = y_train.shape[1]

model = Sequential()

model.add(Dense(128, input_dim=input_dim))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))
model.add(Dense(128))
model.add(Activation('sigmoid'))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print("Training...")

model.fit(X_train,
          y_train,
          epochs=10,
          batch_size=32,
          validation_split=0.1,
          shuffle=False,
          verbose=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training...
Epoch 1/10
300/300 - 18s - 60ms/step - accuracy: 0.5014 - loss: 0.6972 - val_accuracy: 0.4911 - val_loss: 0.6954
Epoch 2/10
300/300 - 16s - 54ms/step - accuracy: 0.5054 - loss: 0.6954 - val_accuracy: 0.4911 - val_loss: 0.6983
Epoch 3/10
300/300 - 21s - 70ms/step - accuracy: 0.5033 - loss: 0.6949 - val_accuracy: 0.4911 - val_loss: 0.6982
Epoch 4/10
300/300 - 19s - 64ms/step - accuracy: 0.5043 - loss: 0.6948 - val_accuracy: 0.4911 - val_loss: 0.6979
Epoch 5/10
300/300 - 10s - 35ms/step - accuracy: 0.5033 - loss: 0.6947 - val_accuracy: 0.4911 - val_loss: 0.6978
Epoch 6/10
300/300 - 21s - 71ms/step - accuracy: 0.5029 - loss: 0.6947 - val_accuracy: 0.4911 - val_loss: 0.6977
Epoch 7/10
300/300 - 21s - 69ms/step - accuracy: 0.5029 - loss: 0.6947 - val_accuracy: 0.4911 - val_loss: 0.6976
Epoch 8/10
300/300 - 20s - 68ms/step - accuracy: 0.5014 - loss: 0.6947 - val_accuracy: 0.4911 - val_loss: 0.6976
Epoch 9/10
300/300 - 20s - 66ms/step - accuracy: 0.5012 - loss: 0.6947 - val_accurac

<keras.src.callbacks.history.History at 0x78d8ddafd3f0>

Relu -> max(0, x)

In [None]:
# 2 Layers using ReLU Activation Func

from keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Dense, Activation

import pandas as pd
import sys
import tensorflow as tf



data = pd.read_csv("sentiment_dataset.tsv",sep='\t')

docs=data["text"]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(docs)

X_train = tokenizer.texts_to_matrix(docs, mode='binary')
y_train = to_categorical(data["label"])

input_dim = X_train.shape[1]
nb_classes = y_train.shape[1]

model = Sequential()

model.add(Dense(128, input_dim=input_dim))
model.add(Activation('relu'))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(128))

model.add(Dense(nb_classes))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print("Training...")

model.fit(X_train,
          y_train,
          epochs=10,
          batch_size=32,
          validation_split=0.1,
          shuffle=False,
          verbose=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training...
Epoch 1/10
300/300 - 12s - 40ms/step - accuracy: 0.7128 - loss: 0.5483 - val_accuracy: 0.7732 - val_loss: 0.4746
Epoch 2/10
300/300 - 11s - 38ms/step - accuracy: 0.9305 - loss: 0.1630 - val_accuracy: 0.7535 - val_loss: 0.7675
Epoch 3/10
300/300 - 20s - 66ms/step - accuracy: 0.9917 - loss: 0.0227 - val_accuracy: 0.7610 - val_loss: 1.1936
Epoch 4/10
300/300 - 11s - 36ms/step - accuracy: 0.9982 - loss: 0.0048 - val_accuracy: 0.7591 - val_loss: 1.5600
Epoch 5/10
300/300 - 9s - 31ms/step - accuracy: 0.9993 - loss: 0.0025 - val_accuracy: 0.7591 - val_loss: 1.7354
Epoch 6/10
300/300 - 11s - 38ms/step - accuracy: 1.0000 - loss: 2.7384e-04 - val_accuracy: 0.7535 - val_loss: 1.8746
Epoch 7/10
300/300 - 10s - 35ms/step - accuracy: 1.0000 - loss: 2.8699e-05 - val_accuracy: 0.7545 - val_loss: 1.9240
Epoch 8/10
300/300 - 21s - 71ms/step - accuracy: 1.0000 - loss: 1.9826e-05 - val_accuracy: 0.7545 - val_loss: 1.9670
Epoch 9/10
300/300 - 13s - 42ms/step - accuracy: 1.0000 - loss: 1.4718e-0

<keras.src.callbacks.history.History at 0x78d8ddf8b580>

In [None]:
from keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Dense, Activation

import pandas as pd
import sys
import tensorflow as tf



data = pd.read_csv("sentiment_dataset.tsv",sep='\t')

docs=data["text"]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(docs)

X_train = tokenizer.texts_to_matrix(docs, mode='binary')
y_train = to_categorical(data["label"])

input_dim = X_train.shape[1]
nb_classes = y_train.shape[1]

model = Sequential()

model.add(Dense(128, input_dim=input_dim))
model.add(Activation('relu'))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(128))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print("Training...")

model.fit(X_train,
          y_train,
          epochs=10,
          batch_size=32,
          validation_split=0.1,
          shuffle=False,
          verbose=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training...
Epoch 1/10
300/300 - 11s - 38ms/step - accuracy: 0.7140 - loss: 0.5480 - val_accuracy: 0.7760 - val_loss: 0.4716
Epoch 2/10
300/300 - 10s - 32ms/step - accuracy: 0.9307 - loss: 0.1637 - val_accuracy: 0.7619 - val_loss: 0.7332
Epoch 3/10
300/300 - 11s - 35ms/step - accuracy: 0.9908 - loss: 0.0240 - val_accuracy: 0.7676 - val_loss: 1.1550
Epoch 4/10
300/300 - 10s - 35ms/step - accuracy: 0.9995 - loss: 0.0021 - val_accuracy: 0.7666 - val_loss: 1.5560
Epoch 5/10
300/300 - 10s - 34ms/step - accuracy: 1.0000 - loss: 1.9827e-04 - val_accuracy: 0.7676 - val_loss: 1.6844
Epoch 6/10
300/300 - 14s - 46ms/step - accuracy: 1.0000 - loss: 8.0125e-05 - val_accuracy: 0.7676 - val_loss: 1.7751
Epoch 7/10
300/300 - 17s - 56ms/step - accuracy: 1.0000 - loss: 4.7104e-05 - val_accuracy: 0.7676 - val_loss: 1.8491
Epoch 8/10
300/300 - 9s - 30ms/step - accuracy: 1.0000 - loss: 3.0667e-05 - val_accuracy: 0.7685 - val_loss: 1.9128
Epoch 9/10
300/300 - 11s - 38ms/step - accuracy: 1.0000 - loss: 2.122

<keras.src.callbacks.history.History at 0x78d8dddc34c0>

## **Tokenization**

In [None]:
docs = ['smart and alert , thirteen conversations about one thing is a smallgem','not very smart movie']
tok=Tokenizer()
tok.fit_on_texts(docs)
tok.texts_to_matrix(doc, mode='binary')

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.

In [None]:
tok.word_index

{'smart': 1,
 'and': 2,
 'alert': 3,
 'thirteen': 4,
 'conversations': 5,
 'about': 6,
 'one': 7,
 'thing': 8,
 'is': 9,
 'a': 10,
 'smallgem': 11,
 'not': 12,
 'very': 13,
 'movie': 14}

In [None]:
from google.colab import files #Uploading Files
uploaded = files.upload()

Saving rt-polaritydata.tar.gz to rt-polaritydata.tar.gz


In [None]:
import os
print(os.listdir())

['.config', 'rt-polaritydata.tar.gz', 'sample_data']


In [None]:
import tarfile

file_name = "rt-polaritydata.tar.gz"

with tarfile.open(file_name, "r:gz") as tar:
    tar.extractall("extracted_data")
    print("Extraction complete!")

Extraction complete!


In [None]:
extracted_path = "extracted_data"
print(os.listdir(extracted_path))

['rt-polaritydata.README.1.0.txt', 'rt-polaritydata']


In [None]:
extracted_path = "extracted_data/rt-polaritydata"
print(os.listdir(extracted_path))

['rt-polarity.neg', 'rt-polarity.pos']


In [None]:
import pandas as pd
import os

neg_file = os.path.join(extracted_path, "rt-polarity.neg")
pos_file = os.path.join(extracted_path, "rt-polarity.pos")

with open(pos_file, 'r', encoding='latin-1') as f:
    positive_sentences = f.readlines()

with open(neg_file, 'r', encoding='latin-1') as f:
    negative_sentences = f.readlines()

data = pd.DataFrame({
    'text': positive_sentences + negative_sentences,
    'label': [1] * len(positive_sentences) + [0] * len(negative_sentences)  # 1 = Positive, 0 = Negative
})

data = data.sample(frac=1, random_state=42).reset_index(drop=True)

print(data.head())


                                                text  label
0  a dark , dull thriller with a parting shot tha...      0
1  director chris eyre is going through the paces...      0
2  although it lacks the detail of the book , the...      1
3  the script by david koepp is perfectly service...      1
4  an exciting and involving rock music doc , a s...      1


In [None]:
data.to_csv("sentiment_dataset.tsv", sep='\t', index=False)
print("Dataset saved as 'sentiment_dataset.tsv'")

Dataset saved as 'sentiment_dataset.tsv'


In [None]:
files.download('sentiment_dataset.tsv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>