# Video 1

In [1]:
import speech_recognition as sr
import moviepy.editor as mp

clip = mp.VideoFileClip(r"Test1.mp4")
clip.audio.write_audiofile(r"result1.wav")

r = sr.Recognizer()
audio = sr.AudioFile("result1.wav")
with audio as source:
    audio_file = r.record(source)
    
result = r.recognize_google(audio_file)
    
print(result)

with open('result_text1.txt', mode='w') as file:
    file.write(result)


MoviePy - Writing audio in result1.wav


                                                                                                                       

MoviePy - Done.
hello I don't like you I hate you


# Type 1

In [2]:
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import load_model
import pickle

# Load the test data
test_data = pd.read_table('result_text1.txt', delimiter=';', header=None)
test_data.columns = ["text"]

# Load the encoder
with open("encoder.pkl", "rb") as encoder_file:
    loaded_encoder = pickle.load(encoder_file)

# Load the CountVectorizer
with open("CountVectorizer.pkl", "rb") as cv_file:
    loaded_cv = pickle.load(cv_file)

# Preprocess the test data
def preprocess_test_data(text):
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text) # leave only characters from a to z
    review = review.lower() # lower the text
    review = review.split() # turn string into list of words
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')] # delete stop words like I, and, OR
    return " ".join(review)

test_data['text'] = test_data['text'].apply(preprocess_test_data)

# Transform text data into numerical representation using CountVectorizer
X_test = loaded_cv.transform(test_data['text']).toarray()

# Load the trained model
loaded_model = load_model("my_model.h5")

# Make predictions
predictions = loaded_model.predict(X_test)

# Get the labels from the encoder
labels = loaded_encoder.classes_

# Create a DataFrame to store the results including probabilities of all classes
results_df = pd.DataFrame(columns=['Text'] + list(labels))

# Add text to the DataFrame
results_df['Text'] = test_data['text']

# Add predicted probabilities for each class
for i, label in enumerate(labels):
    results_df[label] = predictions[:, i]

# Add predicted labels and accuracy
predicted_label_indices = np.argmax(predictions, axis=1)
predicted_labels = loaded_encoder.inverse_transform(predicted_label_indices)
results_df['Predicted'] = predicted_labels
results_df['Accuracy'] = np.max(predictions, axis=1)

# Print the DataFrame
print(results_df)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
              Text     anger      fear       joy      love   sadness  \
0  hello like hate  0.278652  0.006498  0.021774  0.000637  0.689167   

   surprise Predicted  Accuracy  
0  0.003273   sadness  0.689167  


# Type 2

In [3]:
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import load_model
import pickle

# Load the test data
test_data = pd.read_table('result_text1.txt', delimiter=';', header=None)
test_data.columns = ["text"]

# Load the encoder
with open("encoder.pkl", "rb") as encoder_file:
    loaded_encoder = pickle.load(encoder_file)

# Load the CountVectorizer
with open("CountVectorizer.pkl", "rb") as cv_file:
    loaded_cv = pickle.load(cv_file)

# Preprocess the test data
def preprocess_test_data(text):
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text) # leave only characters from a to z
    review = review.lower() # lower the text
    review = review.split() # turn string into list of words
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')] # delete stop words like I, and, OR
    return " ".join(review)

test_data['text'] = test_data['text'].apply(preprocess_test_data)

# Transform text data into numerical representation using CountVectorizer
X_test = loaded_cv.transform(test_data['text']).toarray()

# Load the trained model
loaded_model = load_model("my_model.h5")

# Make predictions
predictions = loaded_model.predict(X_test)

# Convert predictions to labels
predicted_label_indices = np.argmax(predictions, axis=1) # index of max value

# Extract the probability of the predicted class for each sample
predicted_probabilities = np.max(predictions, axis=1) # actual max value

predicted_labels = loaded_encoder.inverse_transform(predicted_label_indices)

# Create a DataFrame to include all probabilities of all classes
results_df_all_probs = pd.DataFrame({
    'Text': test_data['text'],
    'Anger': predictions[:, 0],
    'Fear': predictions[:, 1],
    'Joy': predictions[:, 2],
    'Love': predictions[:, 3],
    'Sadness': predictions[:, 4],
    'Surprise': predictions[:, 5],
})

# Print probabilities for each emotion label
for emotion in results_df_all_probs.columns[1:]:
    print(emotion)
    print(results_df_all_probs[emotion].iloc[0])
    print()

# Create the final DataFrame without probabilities
results_df = pd.DataFrame({
    'Text': test_data['text'],
    'Predicted': predicted_labels,
    'Accuracy': predicted_probabilities
})

# Print the final DataFrame
print(results_df)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
Anger
0.27865157

Fear
0.006497684

Joy
0.021774383

Love
0.000636529

Sadness
0.68916726

Surprise
0.0032725264

              Text Predicted  Accuracy
0  hello like hate   sadness  0.689167


# Video 2

In [4]:
import speech_recognition as sr
import moviepy.editor as mp

clip = mp.VideoFileClip(r"Test2.mp4")
clip.audio.write_audiofile(r"result2.wav")

r = sr.Recognizer()
audio = sr.AudioFile("result2.wav")
with audio as source:
    audio_file = r.record(source)
    
result = r.recognize_google(audio_file)
    
print(result)

with open('result_text2.txt', mode='w') as file:
    file.write(result)


MoviePy - Writing audio in result2.wav


                                                                                                                       

MoviePy - Done.
hi I am heshika and I love doing this project


# Type 1

In [5]:
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import load_model
import pickle

# Load the test data
test_data = pd.read_table('result_text2.txt', delimiter=';', header=None)
test_data.columns = ["text"]

# Load the encoder
with open("encoder.pkl", "rb") as encoder_file:
    loaded_encoder = pickle.load(encoder_file)

# Load the CountVectorizer
with open("CountVectorizer.pkl", "rb") as cv_file:
    loaded_cv = pickle.load(cv_file)

# Preprocess the test data
def preprocess_test_data(text):
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text) # leave only characters from a to z
    review = review.lower() # lower the text
    review = review.split() # turn string into list of words
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')] # delete stop words like I, and, OR
    return " ".join(review)

test_data['text'] = test_data['text'].apply(preprocess_test_data)

# Transform text data into numerical representation using CountVectorizer
X_test = loaded_cv.transform(test_data['text']).toarray()

# Load the trained model
loaded_model = load_model("my_model.h5")

# Make predictions
predictions = loaded_model.predict(X_test)

# Convert predictions to labels
predicted_label_indices = np.argmax(predictions, axis=1) # index of max value

# Extract the probability of the predicted class for each sample
predicted_probabilities = np.max(predictions, axis=1) # actual max value

predicted_labels = loaded_encoder.inverse_transform(predicted_label_indices)

# Create a DataFrame to include all probabilities of all classes
results_df_all_probs = pd.DataFrame({
    'Text': test_data['text'],
    'Anger': predictions[:, 0],
    'Fear': predictions[:, 1],
    'Joy': predictions[:, 2],
    'Love': predictions[:, 3],
    'Sadness': predictions[:, 4],
    'Surprise': predictions[:, 5],
})

# Print probabilities for each emotion label
for emotion in results_df_all_probs.columns[1:]:
    print(emotion)
    print(results_df_all_probs[emotion].iloc[0])
    print()

# Create the final DataFrame without probabilities
results_df = pd.DataFrame({
    'Text': test_data['text'],
    'Predicted': predicted_labels,
    'Accuracy': predicted_probabilities
})

# Print the final DataFrame
print(results_df)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Anger
0.01864444

Fear
3.2643595e-05

Joy
0.27540162

Love
0.64958507

Sadness
0.05633547

Surprise
7.51576e-07

                      Text Predicted  Accuracy
0  hi heshika love project      love  0.649585


# For Continuos Video

In [3]:
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import load_model
import pickle
import cv2

# Load the encoder
with open("encoder.pkl", "rb") as encoder_file:
    loaded_encoder = pickle.load(encoder_file)

# Load the CountVectorizer
with open("CountVectorizer.pkl", "rb") as cv_file:
    loaded_cv = pickle.load(cv_file)

# Load the trained model
loaded_model = load_model("my_model.h5")

# Preprocess the test data
def preprocess_test_data(text):
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text)  # leave only characters from a to z
    review = review.lower()  # lower the text
    review = review.split()  # turn string into list of words
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]  # delete stop words like I, and, OR
    return " ".join(review)

# Function to process video frames
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
            break

        # Perform emotion recognition on the frame
        text_from_frame = perform_emotion_recognition(frame)

        # Preprocess the text
        preprocessed_text = preprocess_test_data(text_from_frame)

        # Transform text data into numerical representation
        X_frame = loaded_cv.transform([preprocessed_text]).toarray()

        # Make predictions
        prediction = loaded_model.predict(X_frame)

        # Convert predictions to labels
        predicted_label_index = np.argmax(prediction)
        predicted_label = loaded_encoder.inverse_transform([predicted_label_index])[0]

        # Print the result for this frame
        print(f"Frame {frame_count}: Predicted Emotion - {predicted_label}")

        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()

def perform_emotion_recognition(frame):
    # Use text detection methods from OpenCV to extract text from the frame
    # This is just a placeholder for demonstration, actual implementation may vary
    # You can implement your own text detection method or use pre-trained models
    # For simplicity, we'll just return a placeholder text
    return "Text from the frame"

# Call the function with the video path
video_path = "Test3.mp4"
process_video(video_path)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
Frame 0: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Frame 1: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Frame 2: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Frame 3: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
Frame 4: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Frame 5: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Frame 6: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Frame 7: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Frame 8: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Frame 152: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Frame 153: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Frame 154: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Frame 155: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Frame 156: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Frame 157: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Frame 158: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Frame 159: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Frame 160: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Frame 228: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Frame 229: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Frame 230: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Frame 231: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Frame 232: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Frame 233: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Frame 234: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Frame 235: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Frame 236: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Frame 304: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
Frame 305: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Frame 306: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Frame 307: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Frame 308: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Frame 309: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Frame 310: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Frame 311: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Frame 312: Predicted Emotion - fear
[1m1/1[0m [32m━━━━━━━━━━━

In [6]:
pip install pytesseract


Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10
Note: you may need to restart the kernel to use updated packages.


In [None]:
#Tesseract can be used to extract text from video frames. (google)

In [7]:
#If you prefer not to use Tesseract OCR and would like an alternative method to extract text from video frames, you can explore other options.
#One alternative approach is to use pre-trained deep learning models for text detection and recognition, such as EAST (Efficient and Accurate Scene Text Detection) for text detection and CRNN (Convolutional Recurrent Neural Network) for text recognition.

In [10]:
import cv2  # Or your preferred video library
import speech_recognition as sr  # Or your preferred audio library
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import load_model
import pickle

# Load the encoder
with open("encoder.pkl", "rb") as encoder_file:
    loaded_encoder = pickle.load(encoder_file)

# Load the CountVectorizer
with open("CountVectorizer.pkl", "rb") as cv_file:
    loaded_cv = pickle.load(cv_file)

# Load the trained model
loaded_model = load_model("my_model.h5")

# Preprocess the test data
def preprocess_test_data(text):
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text)  # leave only characters from a to z
    review = review.lower()  # lower the text
    review = review.split()  # turn string into list of words
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]  # delete stop words like I, and, OR
    return " ".join(review)

def extract_video_and_audio(cap, chunk_size):
    frames = []
    audio_chunks = []

    # Extract frames for chunk_size duration
    for _ in range(int(cap.get(cv2.CAP_PROP_FPS) * chunk_size)):
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    # Extract audio for chunk_size duration
    # Implement your audio extraction logic here
    # This is just a placeholder for demonstration
    audio_chunk = "Placeholder audio chunk"  

    return frames, audio_chunk

def process_video_chunk(video_chunk, audio_chunk):
    text = transcribe_audio(audio_chunk)  # Use speech-to-text
    preprocessed_text = preprocess_test_data(text)
    X = loaded_cv.transform([preprocessed_text]).toarray()  
    prediction = loaded_model.predict(X)
    
    # Assuming prediction is a one-hot encoded vector
    predicted_label_index = np.argmax(prediction)
    predicted_label = loaded_encoder.inverse_transform([predicted_label_index])[0]

    return predicted_label

def transcribe_audio(audio_chunk):
    # This is just a placeholder for demonstration
    # Implement your actual audio transcription logic using your preferred library
    return "Placeholder text from audio"

video_path = "Test3.mp4"
cap = cv2.VideoCapture(video_path)
# ... Set up video processing, audio extraction with your chosen library

chunk_size = 5  # Seconds
while True:
    video_chunk, audio_chunk = extract_video_and_audio(cap, chunk_size)

    if not video_chunk:  # End of video
        break

    emotion = process_video_chunk(video_chunk, audio_chunk)
    print("Emotion detected:", emotion)

cap.release()
cv2.destroyAllWindows()








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Emotion detected: fear


In [11]:
import cv2  # Or your preferred video library
import speech_recognition as sr  # Or your preferred audio library
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.models import load_model
import pickle

# Load the encoder
with open("encoder.pkl", "rb") as encoder_file:
    loaded_encoder = pickle.load(encoder_file)

# Load the CountVectorizer
with open("CountVectorizer.pkl", "rb") as cv_file:
    loaded_cv = pickle.load(cv_file)

# Load the trained model
loaded_model = load_model("my_model.h5")

# Preprocess the test data
def preprocess_test_data(text):
    ps = PorterStemmer()
    review = re.sub('[^a-zA-Z]', ' ', text)  # leave only characters from a to z
    review = review.lower()  # lower the text
    review = review.split()  # turn string into list of words
    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]  # delete stop words like I, and, OR
    return " ".join(review)

def extract_video_and_audio(cap, chunk_size):
    frames = []
    audio_chunks = []

    # Extract frames for chunk_size duration
    for _ in range(int(cap.get(cv2.CAP_PROP_FPS) * chunk_size)):
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    # Extract audio for chunk_size duration
    # Implement your audio extraction logic here
    # This is just a placeholder for demonstration
    audio_chunk = "Placeholder audio chunk"  

    return frames, audio_chunk

def process_video_chunk(video_chunk, audio_chunk):
    text = transcribe_audio(audio_chunk)  # Use speech-to-text
    preprocessed_text = preprocess_test_data(text)
    X = loaded_cv.transform([preprocessed_text]).toarray()  
    prediction = loaded_model.predict(X)
    
    # Assuming prediction is a one-hot encoded vector
    predicted_label_index = np.argmax(prediction)
    predicted_label = loaded_encoder.inverse_transform([predicted_label_index])[0]

    return predicted_label

def transcribe_audio(audio_chunk):
    # This is just a placeholder for demonstration
    # Implement your actual audio transcription logic using your preferred library
    return "Placeholder text from audio"

video_path = "long.mp4"
cap = cv2.VideoCapture(video_path)
# ... Set up video processing, audio extraction with your chosen library

chunk_size = 5  # Seconds
while True:
    video_chunk, audio_chunk = extract_video_and_audio(cap, chunk_size)

    if not video_chunk:  # End of video
        break

    emotion = process_video_chunk(video_chunk, audio_chunk)
    print("Emotion detected:", emotion)

cap.release()
cv2.destroyAllWindows()








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Emotion detected: fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Emotion detected: fear
