# Extracting the First Frame of each Video as the Thumbnail

In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import clip
from tqdm import tqdm  


device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14@336px", device=device)  


def load_images_and_extract_features(folder, model, preprocess, device):
    image_names = []
    features = []
    file_list = [f for f in os.listdir(folder) if f.endswith(('.png', '.jpg', '.jpeg'))]  

    
    for file_name in tqdm(file_list, desc="Processing images", unit="image"):
        image_path = os.path.join(folder, file_name)
        image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)
        with torch.no_grad():
            feature = model.encode_image(image).cpu().numpy().flatten()
        image_names.append(file_name)
        features.append(feature)
    
    return pd.DataFrame(features, index=image_names, columns=[f"feature_{i}" for i in range(768)])


image_folder = "./icable-news-scraping/thumbnails_analysis/thumbnails"  


df_features = load_images_and_extract_features(image_folder, model, preprocess, device)
df_features.index.name = "image_name"  
print(df_features.head())  


df_features.to_pickle("clip_features.pkl")

Processing images: 100%|██████████| 37843/37843 [26:01<00:00, 24.23image/s]


            feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
image_name                                                                     
0.jpg        0.378174   0.776367  -0.139771  -0.042175  -0.418457  -0.729004   
1.jpg        0.219116   0.103516   0.297363   2.056641   0.383057  -0.051331   
100.jpg      0.366211   0.254639   0.304443   0.751465  -0.732910   0.181152   
1000.jpg     0.377441   1.109375   0.409668   0.067505  -0.665527  -0.292236   
10000.jpg    0.506836  -0.531250   0.203979   0.172363  -0.760742   1.029297   

            feature_6  feature_7  feature_8  feature_9  ...  feature_758  \
image_name                                              ...                
0.jpg        0.275879  -0.557617   0.355957  -0.012924  ...     0.018524   
1.jpg       -0.192505   0.036285   0.273926   0.054596  ...     0.379395   
100.jpg     -0.967285   0.604004   0.493896  -0.218628  ...     0.587402   
1000.jpg     0.194336  -0.181641   0.719727   0.189697  ...

# Manually Labelling 2,000 Thumbnails as "News Reporter Presented" and "News Reporter NOT Presented"

In [None]:
import os
import pandas as pd
from tkinter import Tk, Label, Button
from PIL import Image, ImageTk


image_folder = "./icable-news-scraping/thumbnails_analysis/thumbnails"
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]


label_file = "labels.csv"
if os.path.exists(label_file):
    df = pd.read_csv(label_file, index_col=0)  
else:
    df = pd.DataFrame({"image_name": image_files, "is_reporter": None})  



unlabeled_indices = df[df["is_reporter"].isna()].index.tolist()
current_index = unlabeled_indices[0] if unlabeled_indices else -1  


class LabelingApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Image Labeling Tool")

        
        self.current_index = current_index
        self.df = df

        
        self.image_label = Label(root)
        self.image_label.pack()

        
        button_font = ("Helvetica", 16, "bold")  
        self.true_button = Button(root, text="True", command=self.mark_true, width=30, height=2, bg="lightgreen", font=button_font)
        self.true_button.pack(side="left", padx=20, pady=20)

        self.false_button = Button(root, text="False", command=self.mark_false, width=30, height=2, bg="lightcoral", font=button_font)
        self.false_button.pack(side="right", padx=20, pady=20)

        
        self.root.bind("<j>", lambda event: self.mark_true())
        self.root.bind("<k>", lambda event: self.mark_false())
        self.root.bind("<h>", lambda event: self.go_back())  

        
        self.update_image()

    def update_image(self):
        
        if self.current_index == -1:
            self.image_label.config(text="All images have been labeled!")
            self.true_button.config(state="disabled")
            self.false_button.config(state="disabled")
            return

        
        img_path = os.path.join(image_folder, self.df.iloc[self.current_index]["image_name"])
        img = Image.open(img_path)

        
        img = img.resize((854, 480))
        img = ImageTk.PhotoImage(img)

        
        self.image_label.config(image=img)
        self.image_label.image = img
        self.root.title(f"Image {self.current_index + 1}/{len(self.df)}")

    def mark_true(self):
        self.df.loc[self.current_index, "is_reporter"] = True
        self.save_and_next()

    def mark_false(self):
        self.df.loc[self.current_index, "is_reporter"] = False
        self.save_and_next()

    def go_back(self):
        
        if self.current_index > 0:
            self.current_index -= 1  
            self.df.loc[self.current_index, "is_reporter"] = None  
            self.update_image()

    def save_and_next(self):
        
        self.df.to_csv(label_file)

        
        unlabeled_indices = self.df[self.df["is_reporter"].isna()].index.tolist()
        self.current_index = unlabeled_indices[0] if unlabeled_indices else -1

        
        self.update_image()


root = Tk()
app = LabelingApp(root)
root.mainloop()


# Using a Simple Neural Network to Classify the videos as "News Reporter Presented" and "News Reporter NOT Presented"

In [None]:
import pandas as pd

In [None]:
clip_features = pd.read_pickle("clip_features.pkl")

In [None]:
print(clip_features.head(1))

            feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
image_name                                                                     
0.jpg        0.378174   0.776367  -0.139771  -0.042175  -0.418457  -0.729004   

            feature_6  feature_7  feature_8  feature_9  ...  feature_758  \
image_name                                              ...                
0.jpg        0.275879  -0.557617   0.355957  -0.012924  ...     0.018524   

            feature_759  feature_760  feature_761  feature_762  feature_763  \
image_name                                                                    
0.jpg         -0.908691    -0.226929     0.220825    -0.154175     0.348389   

            feature_764  feature_765  feature_766  feature_767  
image_name                                                      
0.jpg         -0.486816     0.259521     0.489746     0.989746  

[1 rows x 768 columns]


In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import StandardScaler


class ComplexClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim=512, output_dim=1, dropout_rate=0.5):
        super(ComplexClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.sigmoid(self.fc2(x))
        return x



labels_df = pd.read_csv("labels.csv", index_col=0)


features_df = clip_features


data = labels_df.merge(features_df, on="image_name")


data = data.dropna(subset=["is_reporter"])


X = data[[f"feature_{i}" for i in range(768)]].values
y = data["is_reporter"].astype(int).values  


scaler = StandardScaler()
X = scaler.fit_transform(X)


X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)


dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


input_dim = 768
model = ComplexClassifier(input_dim=input_dim, hidden_dim=512, output_dim=1, dropout_rate=0.5)
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 100
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")


model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        predictions = (outputs > 0.5).float()
        y_true.extend(targets.numpy())
        y_pred.extend(predictions.numpy())

from sklearn.metrics import classification_report, accuracy_score
print("Classification Report:")
print(classification_report(y_true, y_pred))
print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")


torch.save(model.state_dict(), "complex_classifier.pth")
print("Model saved as complex_classifier.pth")


Epoch 1/100, Loss: 0.1613
Epoch 2/100, Loss: 0.0444
Epoch 3/100, Loss: 0.0228
Epoch 4/100, Loss: 0.0129
Epoch 5/100, Loss: 0.0075
Epoch 6/100, Loss: 0.0063
Epoch 7/100, Loss: 0.0038
Epoch 8/100, Loss: 0.0026
Epoch 9/100, Loss: 0.0028
Epoch 10/100, Loss: 0.0019
Epoch 11/100, Loss: 0.0017
Epoch 12/100, Loss: 0.0009
Epoch 13/100, Loss: 0.0008
Epoch 14/100, Loss: 0.0007
Epoch 15/100, Loss: 0.0006
Epoch 16/100, Loss: 0.0008
Epoch 17/100, Loss: 0.0007
Epoch 18/100, Loss: 0.0006
Epoch 19/100, Loss: 0.0005
Epoch 20/100, Loss: 0.0004
Epoch 21/100, Loss: 0.0004
Epoch 22/100, Loss: 0.0004
Epoch 23/100, Loss: 0.0002
Epoch 24/100, Loss: 0.0002
Epoch 25/100, Loss: 0.0002
Epoch 26/100, Loss: 0.0002
Epoch 27/100, Loss: 0.0002
Epoch 28/100, Loss: 0.0002
Epoch 29/100, Loss: 0.0002
Epoch 30/100, Loss: 0.0001
Epoch 31/100, Loss: 0.0001
Epoch 32/100, Loss: 0.0002
Epoch 33/100, Loss: 0.0001
Epoch 34/100, Loss: 0.0001
Epoch 35/100, Loss: 0.0001
Epoch 36/100, Loss: 0.0002
Epoch 37/100, Loss: 0.0001
Epoch 38/1