<a href="https://colab.research.google.com/github/dhikaa2/bebas/blob/main/untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import glob

def read_imdb_data(data_dir):
    data = {}
    labels = {}
    for data_type in ['train', 'test']:
        data[data_type] = {}
        labels[data_type] = {}
        for sentiment in ['pos', 'neg']:
            data[data_type][sentiment] = []
            labels[data_type][sentiment] = []
            path = os.path.join(data_dir, data_type, sentiment, '*.txt')
            files = glob.glob(path)
            for f in files:
                with open(f, encoding='utf-8') as review:
                    data[data_type][sentiment].append(review.read())
                    labels[data_type][sentiment].append(sentiment)
    return data, labels

data, labels = read_imdb_data('/content/drive/MyDrive/data')
print("IMDb reviews: train = {} pos / {} neg, test = {} pos / {} neg".format(
    len(data['train']['pos']), len(data['train']['neg']),
    len(data['test']['pos']), len(data['test']['neg'])))


IMDb reviews: train = 12500 pos / 12500 neg, test = 12500 pos / 12500 neg


In [None]:
print("Review positif:\n")
print(data['train']['pos'][0])

print("\nReview negatif:\n")
print(data['train']['neg'][0])


Review positif:

I read so many comments that I, too, shared about remembering this movie and wanting so badly to see it again but I didn't know the name of the movie. Thankfully, because of doing a search and finding the title on this site, I read the comments left here and realized that this was the movie I remembered. I then did a search and did find the movie and was so thrilled to be able to watch the movie once more 40 years later. Because of this site and your comments, you helped me and so I want to thank all of you. I want to share how I was able to find this movie for all of you who were looking for a copy as well. It was on the VHS version of Wonderful World of Disney's "Call it Courage" which contained 2 movies, the second one being "The Legend of the Boy and the Eagle." It touched me now as much as it did 40 years ago and now I own my own copy of it. I think it is only available on VHS. I found it on ebay and I have seen several copies of it there. Enjoy it, I know I did!<

In [None]:
train_texts = data['train']['pos'] + data['train']['neg']
train_labels = [1]*len(data['train']['pos']) + [0]*len(data['train']['neg'])

test_texts = data['test']['pos'] + data['test']['neg']
test_labels = [1]*len(data['test']['pos']) + [0]*len(data['test']['neg'])


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size = 10000
max_length = 500

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(train_texts)

X_train = tokenizer.texts_to_sequences(train_texts)
X_test = tokenizer.texts_to_sequences(test_texts)

X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')

y_train = train_labels
y_test = test_labels


In [None]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

embedding_dim = 100

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()




In [None]:
import numpy as np

y_train = np.array(y_train)
y_test = np.array(y_test)

history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=128,
    validation_data=(X_test, y_test)
)



Epoch 1/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 49ms/step - accuracy: 0.5006 - loss: 0.6936 - val_accuracy: 0.5002 - val_loss: 0.6933
Epoch 2/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 48ms/step - accuracy: 0.5091 - loss: 0.6921 - val_accuracy: 0.5039 - val_loss: 0.6951
Epoch 3/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 47ms/step - accuracy: 0.5172 - loss: 0.6805 - val_accuracy: 0.4990 - val_loss: 0.7045
Epoch 4/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 48ms/step - accuracy: 0.5283 - loss: 0.6552 - val_accuracy: 0.4989 - val_loss: 0.7229
Epoch 5/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 61ms/step - accuracy: 0.5374 - loss: 0.6466 - val_accuracy: 0.4997 - val_loss: 0.7369


In [None]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Akurasi test: {acc * 100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8138 - loss: 0.7136
Akurasi test: 49.97%


In [None]:
def predict_review(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length, padding='post')
    pred = model.predict(padded)[0][0]
    label = "Positif" if pred > 0.5 else "Negatif"
    print(f"Prediksi: {label} ({pred:.2f})")

predict_review("This movie was amazing! I loved the acting and the story.")
predict_review("Terrible movie. Waste of time and money.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
Prediksi: Positif (0.52)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Prediksi: Positif (0.52)
