In [1]:
# Limitasi Synchronous:
# ---------------------------------
# Audio Maximum 1 Menit
# Audio Maximum 10 MB
# ---------------------------------

# Import Library
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech
from google.oauth2 import service_account


# Function transkrip audio Synchronous
def transcribe_file_v2(project_id: str, audio_file: str) -> cloud_speech.RecognizeResponse:
    # Instantiates client
    credentials = service_account.Credentials.from_service_account_file('gcloud_apikey.json')
    client = SpeechClient(credentials=credentials)

    # Baca file dalam bytes
    with open(audio_file, "rb") as f:
        content = f.read()

    # Config untuk speech recognition
    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=["en-US"],
        model="long",
    )

    # Membentuk objek request
    request = cloud_speech.RecognizeRequest(
        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
        config=config,
        content=content,
    )

    # Transkrip audio menjadi teks
    response = client.recognize(request=request)

    # Print respon transkrip
    for result in response.results:
        print(f"Transcript: {result.alternatives[0].transcript}")

    # Return respon transkrip
    return response

In [2]:
%%HTML
<video width="500" controls>
    <source src="speech_data/Anime.mp4" type="video/mp4">
    <track src="Anime.srt" kind="subtitles">
</video>

In [3]:
# Import Library
from moviepy.editor import VideoFileClip

# Deklarasi input dan output
mp4_file = "speech_data/Anime.mp4"
mp3_file = "speech_data/Anime.mp3"

# Load video
video_clip = VideoFileClip(mp4_file)

# Ekstrak audio dari video
audio_clip = video_clip.audio

# Simpan audio ke file
audio_clip.write_audiofile(mp3_file)

# Menutup file audio dan video
audio_clip.close()
video_clip.close()

MoviePy - Writing audio in speech_data/Anime.mp3


                                                                                                                       

MoviePy - Done.




In [4]:
## Menyimpan Hasil Transkrip
hasil_transkrip = transcribe_file_v2(
    project_id='data-science-programming-ti24',
    audio_file='speech_data/Anime.mp3')

Transcript: she know that I I just wanted to cheer you see we you and I are the same I can't say the things that I truly feel just stop it and you're wrong we're nothing alike
Transcript:  it's really embarrassed me
Transcript:  have you considered my feelings at all everyone just hurt this I seriously hate pushy people like you
Transcript:  what if I told you in person just leave me alone didn't you hear me
Transcript:  but I hate you
Transcript:  I see
Transcript:  you hate me that much okay well then
Transcript:  hey


In [5]:
print(hasil_transkrip)

results {
  alternatives {
    transcript: "she know that I I just wanted to cheer you see we you and I are the same I can\'t say the things that I truly feel just stop it and you\'re wrong we\'re nothing alike"
    confidence: 0.877077401
  }
  result_end_offset {
    seconds: 13
    nanos: 880000000
  }
  language_code: "en-US"
}
results {
  alternatives {
    transcript: " it\'s really embarrassed me"
    confidence: 0.944916487
  }
  result_end_offset {
    seconds: 17
    nanos: 140000000
  }
  language_code: "en-US"
}
results {
  alternatives {
    transcript: " have you considered my feelings at all everyone just hurt this I seriously hate pushy people like you"
    confidence: 0.959103167
  }
  result_end_offset {
    seconds: 25
    nanos: 920000000
  }
  language_code: "en-US"
}
results {
  alternatives {
    transcript: " what if I told you in person just leave me alone didn\'t you hear me"
    confidence: 0.890649438
  }
  result_end_offset {
    seconds: 31
    nanos: 4600

In [6]:
list_hasil_transkrip = list()

for hasil in hasil_transkrip.results:
    teks = f'{hasil.alternatives[0].transcript}'
    list_hasil_transkrip.append(teks)

list_hasil_transkrip

["she know that I I just wanted to cheer you see we you and I are the same I can't say the things that I truly feel just stop it and you're wrong we're nothing alike",
 " it's really embarrassed me",
 ' have you considered my feelings at all everyone just hurt this I seriously hate pushy people like you',
 " what if I told you in person just leave me alone didn't you hear me",
 ' but I hate you',
 ' I see',
 ' you hate me that much okay well then',
 ' hey']

# PREDICT

In [20]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import TextVectorization #tokenization|
import pickle

In [21]:
model = tf.keras.models.load_model('toxic-v1.h5')



In [22]:
labels = ['toxic', 'sangat_toxic', 'cabul', 'ancaman', 'menyinggung', 'penghinaan']
# toxic == toxic
# sever_toxic == toxic_parah
# obscene == cabul
# threat == ancaman
# insult == menyinggung 
# indentity_hate == benci personal

In [23]:
with open('vectorizer_config.pkl', 'rb') as f:
    vectorizer_config = pickle.load(f)
with open('vectorizer_vocab.pkl', 'rb') as f:
    vectorizer_vocab = pickle.load(f)

In [24]:
vectorizer = TextVectorization.from_config(vectorizer_config)
vectorizer.set_vocabulary(vectorizer_vocab)

In [25]:
input_data = list_hasil_transkrip

In [26]:
# Vectorize each input text in the list
vectorized_texts = [vectorizer(text) for text in input_data]

# Pad the sequences to the same length
padded_texts = tf.keras.preprocessing.sequence.pad_sequences(vectorized_texts, maxlen=1800)

In [27]:
predictions = model.predict(padded_texts)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 682ms/step


In [28]:
binary_predictions = (predictions > 0.5).astype(int)
binary_predictions

array([[1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [29]:
# Print predictions
for i, text in enumerate(input_data):
    print("Text:", text)
    for label, pred in zip(labels, binary_predictions[i]):
        print(f"{label}: {pred}")
    print()

Text: she know that I I just wanted to cheer you see we you and I are the same I can't say the things that I truly feel just stop it and you're wrong we're nothing alike
toxic: 1
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  it's really embarrassed me
toxic: 0
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  have you considered my feelings at all everyone just hurt this I seriously hate pushy people like you
toxic: 1
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  what if I told you in person just leave me alone didn't you hear me
toxic: 0
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  but I hate you
toxic: 1
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  I see
toxic: 0
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  you hate me that much okay well then
toxic: 0
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0

Text:  h

In [30]:
total_predictions = binary_predictions.sum(axis=0)
total_predictions

array([3, 0, 0, 0, 0, 0])

In [31]:
for i, (label, total) in enumerate(zip(labels, total_predictions)):
    print(f"{label}: {total}")

toxic: 3
sangat_toxic: 0
cabul: 0
ancaman: 0
menyinggung: 0
penghinaan: 0


In [32]:
label_index = {label: [] for label in labels}

total_predictions = binary_predictions.sum(axis=0)

for i, (prediction, text) in enumerate(zip(binary_predictions, input_data)):
    # print(f"Text: {text}")
    for j, (label, pred) in enumerate(zip(labels, prediction)):
        if pred == 1:
            print(f"Text: {text}")
            print(f"Prediction: {label} Value: {predictions[i][j]}")
            label_index[label].append(i)  # Store the index where the label was predicted as 1
    print()

for label, total in zip(labels, total_predictions):
    print(f"Total {label} predictions: {total}")
    print(f"Index Kalimat yang terdeteksi {label}: {label_index[label]}")
    print()

Text: she know that I I just wanted to cheer you see we you and I are the same I can't say the things that I truly feel just stop it and you're wrong we're nothing alike
Prediction: toxic Value: 0.7711834907531738


Text:  have you considered my feelings at all everyone just hurt this I seriously hate pushy people like you
Prediction: toxic Value: 0.7438677549362183


Text:  but I hate you
Prediction: toxic Value: 0.7965867519378662




Total toxic predictions: 3
Index Kalimat yang terdeteksi toxic: [0, 2, 4]

Total sangat_toxic predictions: 0
Index Kalimat yang terdeteksi sangat_toxic: []

Total cabul predictions: 0
Index Kalimat yang terdeteksi cabul: []

Total ancaman predictions: 0
Index Kalimat yang terdeteksi ancaman: []

Total menyinggung predictions: 0
Index Kalimat yang terdeteksi menyinggung: []

Total penghinaan predictions: 0
Index Kalimat yang terdeteksi penghinaan: []

