In [1]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Embedding
import pandas as pd
import face_recognition

MAX_FEATURES = 200000
SEQUENCE_LENGTH = 1800



In [2]:
data = pd.read_csv("./data/toxicity.csv")
X = data["comment_text"]
y = data[data.columns[2:]].values
vectorizer = TextVectorization(
    max_tokens=MAX_FEATURES,
    output_sequence_length=SEQUENCE_LENGTH,
    output_mode="int",
)
vectorizer.adapt(X.values)
vectorized_text = vectorizer(X.values)
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(32)
dataset = dataset.prefetch(8)  # helps bottlene

2023-11-13 14:11:23.583687: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2023-11-13 14:11:23.583709: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-11-13 14:11:23.583713: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-11-13 14:11:23.583907: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-13 14:11:23.583924: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-11-13 14:11:23.678110: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [3]:
train = dataset.take(int(len(dataset) * 0.7))
val = dataset.skip(int(len(dataset) * 0.7)).take(int(len(dataset) * 0.2))
test = dataset.skip(int(len(dataset) * 0.9)).take(int(len(dataset) * 0.1))

In [4]:
model = Sequential()
# Create the embedding layer
model.add(Embedding(MAX_FEATURES + 1, 32))
# Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(32, activation="tanh")))
# Feature extractor Fully connected layers
model.add(Dense(128, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dense(128, activation="relu"))
# Final layer
model.add(Dense(6, activation="sigmoid"))
model.compile(loss="BinaryCrossentropy", optimizer="Adam")
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          6400032   
                                                                 
 bidirectional (Bidirection  (None, 64)                16640     
 al)                                                             
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 6)                 774       
                                                        

In [5]:
model.fit(train, epochs=2, validation_data=val)

Epoch 1/2


2023-11-13 14:11:32.539878: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:11:33.330695: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:11:33.364044: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:11:36.636313: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:11:36.657175: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-11-13 14:26:31.564225: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:26:32.668202: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:26:32.716931: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2


<keras.src.callbacks.History at 0x2c0c10d90>

In [6]:
model.save('./models/toxicity.h5')

  saving_api.save_model(


In [7]:
saved_model = tf.keras.models.load_model('./models/toxicity.h5')

In [8]:
vectorized_text = vectorizer("Stupid peace of shit stop deleting my stuff asshole go die and fall in a hole go to hell")
vectorized_text_batch = tf.expand_dims(vectorized_text, axis=0)
prediction = saved_model.predict(vectorized_text_batch)
prediction

2023-11-13 14:40:57.346621: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:40:57.676663: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-11-13 14:40:57.689112: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




array([[0.99155957, 0.25211388, 0.98192936, 0.00539784, 0.9731204 ,
        0.12561078]], dtype=float32)

In [2]:
image_path = "./data/girl.jpg"
face = face_recognition.load_image_file(image_path)
face_bbs = face_recognition.face_locations(face)
face_encodings = face_recognition.face_encodings(face, known_face_locations=face_bbs)
face_encodings

[array([-0.02318662,  0.04750833,  0.00431102, -0.08871661, -0.14002404,
         0.00737369, -0.07007979, -0.05127694,  0.1598185 , -0.16487238,
         0.17806137, -0.12473015, -0.15408543,  0.01889231, -0.05740419,
         0.20068543, -0.13724126, -0.12305908, -0.02857233, -0.01233063,
         0.03412107,  0.0400502 , -0.04964302,  0.10450687, -0.154278  ,
        -0.35581103, -0.07382981, -0.06047833,  0.00114598, -0.05441512,
        -0.04542955,  0.09264226, -0.15211549, -0.03438062,  0.08836637,
         0.08995709,  0.01059248, -0.06045713,  0.22146197,  0.04968353,
        -0.27947888,  0.06699341,  0.15163529,  0.27715811,  0.1120538 ,
         0.02231513,  0.02394987, -0.14067842,  0.15735002, -0.17129619,
         0.00047126,  0.11536092,  0.08402877,  0.03520328,  0.04029964,
        -0.0751263 ,  0.03819021,  0.13985337, -0.18513756,  0.01779577,
         0.09413364, -0.0131179 , -0.01442161, -0.11468244,  0.26571783,
         0.05658165, -0.10386717, -0.19298883,  0.1

In [3]:
image_path_2 = "./data/tao.jpg"
face_2 = face_recognition.load_image_file(image_path_2)
face_bbs_2 = face_recognition.face_locations(face_2)
face_encodings_2 = face_recognition.face_encodings(face_2, known_face_locations=face_bbs_2)
face_encodings_2

[array([-0.18274091,  0.10431059,  0.03544009, -0.05163359, -0.13263607,
        -0.05203699, -0.0683785 , -0.17487426,  0.09165816, -0.08242857,
         0.14939368, -0.04716298, -0.17745048, -0.08504643,  0.0071188 ,
         0.16217616, -0.13293228, -0.17457567, -0.01870844, -0.02509348,
         0.04737847, -0.05183195,  0.02724834,  0.09172902, -0.14209355,
        -0.33288419, -0.08233404, -0.13409221,  0.01565576, -0.01170006,
        -0.07980458,  0.01359297, -0.15512082, -0.08839157,  0.03394389,
         0.08710045,  0.04183759,  0.00238242,  0.25263813, -0.01977065,
        -0.28238991,  0.0697407 ,  0.08753251,  0.3109062 ,  0.17822708,
         0.0022672 ,  0.06235423, -0.09582599,  0.15775195, -0.14862597,
         0.0623254 ,  0.12225942,  0.08732249,  0.00049225, -0.00210157,
        -0.15735161,  0.04461724,  0.11039841, -0.21978556,  0.07117806,
         0.13847235, -0.07548745, -0.00853674, -0.03913815,  0.27089804,
         0.10136581, -0.11365812, -0.16451485,  0.1

In [7]:
face_recognition.compare_faces(face_encodings, face_encodings_2[0])

[True]

In [16]:
import whisper

model = whisper.load_model("base")
audio = whisper.pad_or_trim(whisper.load_audio("/Users/admin/Downloads/out.mp3"))
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

Detected language: en
