In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from keras import regularizers

In [13]:
data = pd.read_csv('netflix_titles.csv', sep=",")
data

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
8802,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8803,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8804,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


In [15]:
# data.drop(columns=["show_id", "title", "director", "cast", "country", "date_added", 
#                    "release_year", "rating", "duration", "listed_in"], inplace=True)
data = data.rename(columns={"type":"genre",
                     "description":"sinopsis"})
data

Unnamed: 0,genre,sinopsis
0,Movie,"As her father nears the end of his life, filmm..."
1,TV Show,"After crossing paths at a party, a Cape Town t..."
2,TV Show,To protect his family from a powerful drug lor...
3,TV Show,"Feuds, flirtations and toilet talk go down amo..."
4,TV Show,In a city of coaching centers known to train I...
...,...,...
8802,Movie,"A political cartoonist, a crime reporter and a..."
8803,TV Show,"While living alone in a spooky town, a young g..."
8804,Movie,Looking to survive in a world taken over by zo...
8805,Movie,"Dragged from civilian life, a former superhero..."


In [16]:
kategori = pd.get_dummies(data["genre"])
data = pd.concat([data, kategori], axis=1).drop(columns="genre")
data

Unnamed: 0,sinopsis,Movie,TV Show
0,"As her father nears the end of his life, filmm...",1,0
1,"After crossing paths at a party, a Cape Town t...",0,1
2,To protect his family from a powerful drug lor...,0,1
3,"Feuds, flirtations and toilet talk go down amo...",0,1
4,In a city of coaching centers known to train I...,0,1
...,...,...,...
8802,"A political cartoonist, a crime reporter and a...",1,0
8803,"While living alone in a spooky town, a young g...",0,1
8804,Looking to survive in a world taken over by zo...,1,0
8805,"Dragged from civilian life, a former superhero...",1,0


In [17]:
X = data["sinopsis"].values
y = data.iloc[:, 1:].values

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [19]:
tokenizer = Tokenizer(num_words=5000, oov_token='x')
tokenizer.fit_on_texts(X_train) 
tokenizer.fit_on_texts(X_test)
 
kalimat_train = tokenizer.texts_to_sequences(X_train)
kalimat_test = tokenizer.texts_to_sequences(X_test)
 
pading_train = pad_sequences(kalimat_train) 
pading_test = pad_sequences(kalimat_test)
pading_test.shape

(1762, 46)

# FUNGSI DROPOUT DAN REGULARIZERS 

In [20]:
# Fungsi Dropout dan regularizers mencegah overfitting model
model = Sequential([
    layers.Embedding(input_dim=5000, output_dim=16),
    layers.LSTM(64),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.02)),
    layers.Dropout(0.3), 
    layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.02)),
    layers.Dropout(0.4),
    layers.Dense(2, activation='softmax')
])

In [21]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [22]:
history = model.fit(pading_train, y_train, epochs=30, batch_size=128
                   validation_data=(pading_test, y_test), verbose=2)

Epoch 1/30
221/221 - 5s - loss: 1.2893 - accuracy: 0.6938 - val_loss: 0.6701 - val_accuracy: 0.6913 - 5s/epoch - 21ms/step
Epoch 2/30
221/221 - 3s - loss: 0.5606 - accuracy: 0.6974 - val_loss: 0.5541 - val_accuracy: 0.6913 - 3s/epoch - 13ms/step
Epoch 3/30
221/221 - 3s - loss: 0.4461 - accuracy: 0.8204 - val_loss: 0.5851 - val_accuracy: 0.7384 - 3s/epoch - 14ms/step
Epoch 4/30
221/221 - 3s - loss: 0.3802 - accuracy: 0.8705 - val_loss: 0.6757 - val_accuracy: 0.7333 - 3s/epoch - 14ms/step
Epoch 5/30
221/221 - 3s - loss: 0.3324 - accuracy: 0.8952 - val_loss: 0.6450 - val_accuracy: 0.7151 - 3s/epoch - 14ms/step
Epoch 6/30
221/221 - 3s - loss: 0.2996 - accuracy: 0.9097 - val_loss: 0.8003 - val_accuracy: 0.7321 - 3s/epoch - 14ms/step
Epoch 7/30
221/221 - 3s - loss: 0.2963 - accuracy: 0.9086 - val_loss: 0.6906 - val_accuracy: 0.6969 - 3s/epoch - 14ms/step
Epoch 8/30
221/221 - 3s - loss: 0.2646 - accuracy: 0.9202 - val_loss: 0.6705 - val_accuracy: 0.7174 - 3s/epoch - 14ms/step
Epoch 9/30
221/2

In [23]:
model.evaluate(pading_test,y_test)



[1.3758419752120972, 0.6997730135917664]