In [None]:
!pip install pydub
!pip install speechrecognition
!pip install transformers
!pip install torch


    """This is the jupyter notebook I ran on colab to extract the analysis on a few july 14th video I wanted to push this for record reasons"""


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import json
import speech_recognition as sr
from pydub import AudioSegment
from transformers import pipeline
from collections import Counter
import pandas as pd
from tqdm import tqdm

def extract_audio_from_video(video_file, output_audio_file):
    """Extract audio from a video file"""
    try:
        audio = AudioSegment.from_file(video_file)
        audio.export(output_audio_file, format="wav")
        return True
    except Exception as e:
        print(f"Error extracting audio from {video_file}: {str(e)}")
        return False

def convert_audio_to_text(audio_file):
    """Convert audio to text using speech recognition"""
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_file) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except Exception as e:
        print(f"Error in speech recognition: {str(e)}")
        return ""

def classify_emotions_in_text(transcript, classifier):
    """Classify emotions in the transcript"""
    if not transcript:
        return None
    try:
        emotions = classifier(transcript)[0]
        return max(emotions, key=lambda x: x['score'])['label']
    except Exception as e:
        print(f"Error in emotion classification: {str(e)}")
        return None

def analyze_videos_in_folder(folder_path):
    """Analyze all videos in the specified folder"""
    classifier = pipeline("text-classification",
                        model="michellejieli/emotion_text_classifier",
                        return_all_scores=True)

    video_files = [f for f in os.listdir(folder_path)
                  if f.lower().endswith(('.mp4', '.mov', '.avi', '.mkv'))]

    results = []
    temp_audio_path = "temp_audio.wav"

    for video_file in tqdm(video_files, desc="Processing videos"):
        video_path = os.path.join(folder_path, video_file)

        if extract_audio_from_video(video_path, temp_audio_path):
            transcript = convert_audio_to_text(temp_audio_path)

            emotion = classify_emotions_in_text(transcript, classifier)

            results.append({
                'video': video_file,
                'transcript': transcript,
                'emotion': emotion
            })

            if os.path.exists(temp_audio_path):
                os.remove(temp_audio_path)

    return results

def create_summary_table(results):
    """Create summary tables from the analysis results"""
    emotion_counts = Counter(r['emotion'] for r in results if r['emotion'])

    df_summary = pd.DataFrame(list(emotion_counts.items()),
                            columns=['Emotion', 'Count'])
    df_summary = df_summary.sort_values('Count', ascending=False)

    df_details = pd.DataFrame(results)

    return df_summary, df_details

def main():
    folder_path = "/content/drive/MyDrive/July 13th files"

    print("Starting video analysis...")
    results = analyze_videos_in_folder(folder_path)

    summary_table, details_table = create_summary_table(results)

    print("\nEmotion Distribution Summary:")
    print(summary_table.to_string(index=False))

    summary_table.to_csv('emotion_summary.csv', index=False)
    details_table.to_csv('analysis_details.csv', index=False)

    print("\nResults have been saved to 'emotion_summary.csv' and 'analysis_details.csv'")

if __name__ == "__main__":
    main()