In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import random
import os

In [None]:
train_df = pd.read_csv('/kaggle/input/ml-hackathon-ec-campus-set-1/train.csv', encoding='ISO-8859-1')
# Define path to video clips
video_dir = '/kaggle/input/ml-hackathon-ec-campus-set-1/train_videos'


# Function to get video file path from IDs
def get_video_clip_path(row):
    dialogue_id = row['Dialogue_ID']
    utterance_id = row['Utterance_ID']
    filename = f"dia{dialogue_id}_utt{utterance_id}.mp4"
    return os.path.join(video_dir, filename)

# Apply the function to get file paths for each sampled clip
train_df['video_clip_path'] = train_df.apply(get_video_clip_path, axis=1)

# Check sample paths
print(train_df[['Dialogue_ID', 'Utterance_ID', 'video_clip_path']].head())

In [None]:
train_df.shape

In [None]:
# Define path to video clips
df = pd.read_csv('/kaggle/input/ml-hackathon-ec-campus-set-1/test.csv', encoding='ISO-8859-1')
video_dir = '/kaggle/input/ml-hackathon-ec-campus-set-1/test_videos'


# Function to get video file path from IDs
def get_video_clip_path(row):
    dialogue_id = row['Dialogue_ID']
    utterance_id = row['Utterance_ID']
    filename = f"dia{dialogue_id}_utt{utterance_id}.mp4"
    return os.path.join(video_dir, filename)

# Apply the function to get file paths for each sampled clip
df['video_clip_path'] = df.apply(get_video_clip_path, axis=1)

# Check sample paths
print(df[['Dialogue_ID', 'Utterance_ID', 'video_clip_path']].head())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller, kpss

In [None]:
#print(train_df.head())
sentences = train_df[["Utterance","Sentiment"]].values
sentences = pd.DataFrame(sentences)

sentences_test = df["Utterance"].values
sentences_test = pd.DataFrame(sentences_test)

sentences.columns = ['Text', 'Sentiment']
print(sentences)

sentences_test.columns = ['Text']
print(sentences_test)

In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Example DataFrame (replace with your actual sentences)
# sentences = pd.DataFrame({'Text': ["I love this!", "I hate that", "This is okay"], 'Sentiment': ['positive', 'negative', 'neutral']})

# Encode sentiments into numeric labels
sentences['Sentiment'] = sentences['Sentiment'].map({'positive': 1, 'negative': 0, 'neutral': 2})

# Split the data into training and testing sets
train_data, test_data = train_test_split(sentences, test_size=0.3, random_state=42)

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create custom Dataset class
class SentimentDataset(Dataset):
    def __init__(self, sentences, tokenizer, max_length=128):
        self.sentences = sentences
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.texts = sentences['Text'].values
        self.labels = sentences['Sentiment'].values

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]
        
        # Encode text using BERT tokenizer
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Prepare DataLoader
train_dataset = SentimentDataset(train_data, tokenizer)
test_dataset = SentimentDataset(test_data, tokenizer)

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16)

# Load pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Training loop
epochs = 3
for epoch in range(epochs):
    model.train()
    for batch in train_dataloader:
        optimizer.zero_grad()
        
        # Move batch to GPU if available
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        
        # Optimizer step
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs} completed.")

# Evaluation
model.eval()
predictions, true_labels = [], []
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)

        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

# Print classification report for 3 sentiment classes
print(classification_report(true_labels, predictions, target_names=['negative', 'positive', 'neutral']))


In [None]:
# Assuming `test_data` contains only 'Text' column without the 'Sentiment' column
# Replace the previous evaluation section with the following code

test_sentences = sentences_test['Text'].values  # Only the text for prediction

# Create a dataset for test data without labels
class TestSentimentDataset(Dataset):
    def __init__(self, sentences, tokenizer, max_length=128):
        self.sentences = sentences
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.texts = sentences

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        
        # Encode text using BERT tokenizer
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten()
        }

# Prepare DataLoader for test data
test_dataset = TestSentimentDataset(test_sentences, tokenizer)
test_dataloader = DataLoader(test_dataset, batch_size=16)

# Make predictions on the test dataset
model.eval()
predictions = []
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)

        predictions.extend(preds.cpu().numpy())

# Map predictions back to sentiment labels
sentiment_labels = ['negative', 'positive', 'neutral']
predicted_sentiments = [sentiment_labels[pred] for pred in predictions]

all_preds = predicted_sentiments

submission_df = pd.DataFrame({
        'ID': range(1,len(test_sentences)+1),
        'Emotion': all_preds
    })
    
# Save the DataFrame to CSV
submission_df.to_csv("submission.csv", index=False)


In [None]:
!pip install moviepy

In [None]:
from moviepy.editor import VideoFileClip
import os

# Path to the video directory
video_dir = "/kaggle/input/ml-hackathon-ec-campus-set-1/train"  # Your video folder path

# Create a directory to store audio files
audio_dir = "/kaggle/working/audio_files"
os.makedirs(audio_dir, exist_ok=True)

# Iterate through all video files in the directory
for filename in os.listdir(video_dir):
    if filename.endswith(('.mp4', '.avi', '.mov')):  # Adjust extensions as needed
        video_path = os.path.join(video_dir, filename)
        
        # Load video file
        video_clip = VideoFileClip(video_path)
        
        # Extract audio from the video
        audio_clip = video_clip.audio
        
        # Save the audio file
        audio_filename = os.path.splitext(filename)[0] + '.mp3'  # You can also use .wav
        audio_path = os.path.join(audio_dir, audio_filename)
        audio_clip.write_audiofile(audio_path)
        
        # Close clips after processing to free resources
        video_clip.close()
        audio_clip.close()

print("Audio extraction complete!")

In [None]:
import librosa
import numpy as np
import os

# Path to your audio files directory
audio_dir = "/kaggle/working/audio_files"  # Update with your path

# Function to extract MFCC features from an audio file
def extract_audio_features(audio_file):
    # Load audio file
    y, sr = librosa.load(audio_file, sr=None)
    
    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # Mean of the MFCC features across time
    mfcc_mean = np.mean(mfcc, axis=1)
    
    return mfcc_mean

# Extract features for all audio files
audio_features = []
audio_filenames = []

for filename in os.listdir(audio_dir):
    if filename.endswith('.mp3'):  # Adjust file extensions as needed
        audio_file_path = os.path.join(audio_dir, filename)
        features = extract_audio_features(audio_file_path)
        audio_features.append(features)
        audio_filenames.append(filename)

# Convert to numpy array
audio_features = np.array(audio_features)

In [None]:
import pandas as pd

# Load the dataset with ISO-8859-1 encoding
file_path = "/kaggle/input/ml-hackathon-ec-campus-set-1/train.csv"  # Replace with your actual CSV file path
df = pd.read_csv(file_path, encoding='ISO-8859-1')

# Preview the data to check the columns
print(df.head())

# Check for missing values
print(df.isnull().sum())


# Drop rows with missing 'Utterance' or 'Sentiment' columns
df = df.dropna(subset=['Utterance', 'Sentiment'])

# Preview cleaned data
print(df[['Utterance', 'Sentiment']].head())

In [None]:
from transformers import pipeline

# Initialize the sentiment analysis pipeline using BERT
sentiment_analyzer = pipeline("sentiment-analysis")

# Function to analyze sentiment of the utterance
def analyze_text_sentiment(text):
    result = sentiment_analyzer(text)
    sentiment = result[0]['label']  # "POSITIVE" or "NEGATIVE"
    return sentiment

# Apply sentiment analysis to each utterance
df['Predicted_Sentiment'] = df['Utterance'].apply(analyze_text_sentiment)

# Map BERT sentiment output to numeric labels (positive: 1, negative: 0, neutral: 2)
def sentiment_to_label(sentiment):
    if sentiment == 'POSITIVE':
        return 1
    elif sentiment == 'NEGATIVE':
        return 0
    else:
        return 2  # Neutral

df['Predicted_Sentiment_Label'] = df['Predicted_Sentiment'].apply(sentiment_to_label)

# Preview the updated dataframe
print(df[['Utterance', 'Sentiment', 'Predicted_Sentiment', 'Predicted_Sentiment_Label']].head())

In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Convert actual sentiment to numeric labels (positive: 1, negative: 0, neutral: 2)
def actual_sentiment_to_label(sentiment):
    if sentiment == 'positive':
        return 1
    elif sentiment == 'negative':
        return 0
    else:
        return 2  # Neutral

# Apply this function to the actual 'Sentiment' column
df['Actual_Sentiment_Label'] = df['Sentiment'].apply(actual_sentiment_to_label)

# Calculate the accuracy
accuracy = accuracy_score(df['Actual_Sentiment_Label'], df['Predicted_Sentiment_Label'])
print(f'Accuracy: {accuracy * 100:.2f}%')

# Generate a classification report
print("Classification Report:\n", classification_report(df['Actual_Sentiment_Label'], df['Predicted_Sentiment_Label']))

In [None]:
!pip install moviepy
import os
from moviepy.editor import VideoFileClip


input_directory = '/kaggle/input/ml-hackathon-ec-campus-set-1/train/'  
output_directory = '/kaggle/working/train_wav/'  


if not os.path.exists(output_directory):
    os.makedirs(output_directory)


for filename in os.listdir(input_directory):
    if filename.endswith('.mp4'):
        
        mp4_path = os.path.join(input_directory, filename)
        
      
        wav_path = os.path.join(output_directory, filename.replace('.mp4', '.wav'))
        
        
        video_clip = VideoFileClip(mp4_path)
        audio_clip = video_clip.audio
        
       
        audio_clip.write_audiofile(wav_path, codec='pcm_s16le')
       
        audio_clip.close()
        video_clip.close()
        
        print(f'Converted {filename} to WAV format and saved as {wav_path}.')
!pip install librosa
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
import os
from PIL import Image
class VideoFrameDataset(Dataset):
    def __init__(self, csv_file, video_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the CSV file with annotations
            video_dir (string): Directory with all the videos
            transform (callable, optional): Optional transform to be applied on frames
        """
        # Try reading with utf-8 encoding and fall back to latin1 if needed
        try:
            self.data = pd.read_csv(csv_file, encoding='utf-8')
        except UnicodeDecodeError:
            print("utf-8 encoding failed, trying 'latin1'")
            self.data = pd.read_csv(csv_file, encoding='latin1')
        
        self.video_dir = video_dir
        self.transform = transform
        self.sentiment_map = {'negative': 0, 'neutral': 1, 'positive': 2}


    def extract_frames(self, video_path, max_frames=30):
        """Extract a fixed number of frames from the beginning of a video."""
        frames = []
        
        if not os.path.exists(video_path):
            print(f"Warning: Video file not found: {video_path}")
            return None

        try:
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Warning: Could not open video: {video_path}")
                return None

            frame_count = 0
            while cap.isOpened() and frame_count < max_frames:
                ret, frame = cap.read()
                if not ret:
                    break

                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = Image.fromarray(frame)
                if self.transform:
                    frame = self.transform(frame)
                frames.append(frame)
                frame_count += 1

        except Exception as e:
            print(f"Warning: Error processing video {video_path}: {e}")

        finally:
            if 'cap' in locals() and cap.isOpened():
                cap.release()

        return frames if frames else None

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
     if torch.is_tensor(idx):
        idx = idx.tolist()

    # Dynamically construct the filename
     dialogue_id = str(int(self.data.iloc[idx]['Dialogue_ID']))
     utt_id = str(int(self.data.iloc[idx]['Utterance_ID']))
     video_name = f"dia{dialogue_id}_utt{utt_id}.mp4"
     video_path = os.path.join(self.video_dir, video_name)

    # Skip if the video file doesn't exist
     if not os.path.exists(video_path):
        print(f"Skipping missing video: {video_name}")
        return torch.zeros((30, 3, 224, 224)), 1  # Default tensor and neutral label for missing videos
    
    # Process frames
     frames = self.extract_frames(video_path)
     if frames is None:
        return torch.zeros((30, 3, 224, 224)), self.sentiment_map.get(
            str(self.data.iloc[idx]['Sentiment']).lower().strip(), 1)
    
    # Pad or crop frames to ensure 30 frames
     if len(frames) < 30:
        frames += [torch.zeros((3, 224, 224))] * (30 - len(frames))
     elif len(frames) > 30:
        frames = frames[:30]
    
     frames = torch.stack(frames)
     sentiment = str(self.data.iloc[idx]['Sentiment']).lower().strip()
     label = self.sentiment_map.get(sentiment, 1)  # Default to neutral if unknown
    
     return frames, label
class SentimentResNet(nn.Module):
    def __init__(self, num_classes=3):
        super(SentimentResNet, self).__init__()
        resnet = models.resnet50(pretrained=True)
        modules = list(resnet.children())[:-1]
        self.resnet = nn.Sequential(*modules)
        
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(2048, 512)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        batch_size, num_frames = x.size(0), x.size(1)
        x = x.view(-1, x.size(2), x.size(3), x.size(4))
        x = self.resnet(x)
        x = self.pool(x)
        x = x.view(batch_size, num_frames, -1)
        x = torch.mean(x, dim=1)
        
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x
                
from torch.cuda.amp import autocast, GradScaler

def train_model(model, train_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    model.train()
    scaler = GradScaler()  # Initialize scaler for mixed precision
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for i, (frames, labels) in enumerate(train_loader):
            frames, labels = frames.to(device), labels.to(device)
            optimizer.zero_grad()

            with autocast():  # Enable mixed precision
                outputs = model(frames)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            if i % 100 == 99:
                print(f'Epoch [{epoch + 1}/{num_epochs}], '
                      f'Step [{i + 1}/{len(train_loader)}], '
                      f'Loss: {running_loss / 100:.4f}, '
                      f'Accuracy: {100 * correct / total:.2f}%')
                running_loss = 0.0
                correct = 0
                total = 0
        
        torch.cuda.empty_cache()
try:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    
    dataset = VideoFrameDataset(
        csv_file='/kaggle/input/ml-hackathon-ec-campus-set-1/train.csv',
        video_dir='/kaggle/input/ml-hackathon-ec-campus-set-1/train',
        transform=transform
    )
    
    train_loader = DataLoader(
        dataset, 
        batch_size=4, 
        shuffle=True, 
        num_workers=4,
        pin_memory=True if torch.cuda.is_available() else False
    )
    
    model = SentimentResNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    train_model(model, train_loader, criterion, optimizer, num_epochs=10, device=device)
    
    torch.save(model.state_dict(), 'sentiment_resnet.pth')
    print("Training completed successfully!")

except Exception as e:
    print(f"Error in main execution: {e}")