In [None]:
import pandas as pd
import json
import os
import argparse
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import nltk
import numpy as np

def load_json_file(file_path):
    """Load JSON file and return as Python object"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading {file_path}: {str(e)}")
        return None

def extract_video_name(path):
    """Extract video filename without extension from path"""
    return os.path.splitext(os.path.basename(path))[0]

In [13]:
prediction_path = 'output/batch_video_results.json'
references_path = 'datasets/video_prompts.json'

# Load JSON files
predictions_data = load_json_file(prediction_path)
references_data = load_json_file(references_path)

if not predictions_data or not references_data:
    print("Failed to load one or both JSON files. Exiting.")

# Create a dictionary of reference prompts keyed by video name
reference_dict = {}
for item in references_data:
    # Adapt this according to your reference JSON structure
    if isinstance(item, dict) and 'video' in item and 'conversations' in item:
        reference_dict[item['video']] = item['conversations'][1]['value']


# Create a list to store all comparison data
comparison_data = []

# Process each prediction
for pred in predictions_data:
    video_name = pred['video_name']
    prediction = pred.get('prediction', '')
    
    # Find matching reference
    reference = reference_dict.get(video_name, '')
    
    
    # Add to comparison data
    comparison_data.append({
        'video_name': video_name,
        'reference': reference,
        'prediction': prediction,
    })

# Convert to DataFrame
df = pd.DataFrame(comparison_data)
#Save to parquet
df.to_parquet('output/compare_evalutaion_results.parquet', index=False)

In [1]:
import pandas as pd
import re

result_df = pd.read_parquet('output/compare_evalutaion_results.parquet')


def clean_sentence(s):
    s = s.replace("，", ",")  
    s = re.sub(r"-[a-z]+\s*\d+", "", s)  
    s = s.strip()
    return s

predictions = [clean_sentence(p) for p in result_df['prediction']]
references = [[clean_sentence(r)] for r in result_df['reference']]


# Evaluation For Video

## Bert-Score

In [1]:
import pandas as pd

compare_promts = pd.read_parquet('XXX')

In [3]:
from bert_score import score

def bert_sim_eval(candidate_sentence, target_sentence):
    # Put sentences in lists since bert-score's score function expects list inputs
    candidates = [candidate_sentence]
    references = [target_sentence]

    # Calculate BertScore
    P, R, F1 = score(candidates, references, lang='en', verbose=True, model_type='bert-base-uncased')
    return {"P":P.item(),"R":R.item(),"F1":F1.item()}

In [None]:
from tqdm import tqdm
P_ours = []
R_ours = []
F1_ours = []
for index,item in tqdm(compare_promts.iterrows(),total=len(compare_promts)):    
    res = bert_sim_eval(item['reference'],item['prediction'])
    P_ours.append(res['P'])
    R_ours.append(res['R'])
    F1_ours.append(res['F1'])

In [None]:
print(sum(P_ours)/len(P_ours))
print(sum(R_ours)/len(R_ours))
print(sum(F1_ours)/len(F1_ours))

## xclip-Video Similarity

In [None]:
import torch
from diffusers import CogVideoXPipeline
from diffusers.utils import export_to_video
import gc


def gen_pic(compare_df):
    pipe = CogVideoXPipeline.from_pretrained(
        "THUDM/CogVideoX-2b",
        torch_dtype=torch.float16
    ).to("cuda")

    # pipe.enable_model_cpu_offload()
    # pipe.enable_sequential_cpu_offload()
    pipe.vae.enable_slicing()
    pipe.vae.enable_tiling()
    
    for idx,row in compare_df.iterrows():
        prompt = row['RL_generation']
        video_name = row['video_name']

        print(f"[{idx+1}/{len(compare_df)}] Generating: RL_gen_{video_name}")

        try:
            video = pipe(
                prompt=prompt,
                num_videos_per_prompt=1,
                num_inference_steps=50,
                num_frames=49,
                guidance_scale=6,
                generator=torch.Generator(device="cuda").manual_seed(42),
            ).frames[0]

            export_to_video(video, f"output/RL_genvideos/RL_gen_{video_name}", fps=8)

        except Exception as e:
            print(f"❌ Error generating {video_name}: {e}")

        finally:
            # Explicitly release GPU memory
            del video
            torch.cuda.empty_cache()
            gc.collect()


In [None]:
gen_pic(compare_promts)

In [None]:
import av
import torch
import numpy as np
from transformers import AutoProcessor, AutoModel
from huggingface_hub import hf_hub_download
from torch.nn.functional import cosine_similarity
import pandas as pd
from tqdm import tqdm
import json

def read_video_pyav(container, indices):
    frames = []
    container.seek(0)
    start_index = indices[0]
    end_index = indices[-1]
    for i, frame in enumerate(container.decode(video=0)):
        if i > end_index:
            break
        if i >= start_index and i in indices:
            frames.append(frame)
    return np.stack([x.to_ndarray(format="rgb24") for x in frames])

def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
    converted_len = int(clip_len * frame_sample_rate)
    end_idx = np.random.randint(converted_len, seg_len)
    start_idx = end_idx - converted_len
    indices = np.linspace(start_idx, end_idx, num=clip_len)
    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
    return indices

def get_video_features(video_path, clip_len=8):
    """Extract features from a video file using XCLIP model"""
    container = av.open(video_path)
    indices = sample_frame_indices(clip_len=clip_len, frame_sample_rate=1, 
                                 seg_len=container.streams.video[0].frames)
    video = read_video_pyav(container, indices)
    
    processor = AutoProcessor.from_pretrained("microsoft/xclip-base-patch32")
    model = AutoModel.from_pretrained("microsoft/xclip-base-patch32")
    
    inputs = processor(videos=list(video), return_tensors="pt")
    video_features = model.get_video_features(**inputs)
    return video_features

def compute_video_similarity(video1_path, video2_path):
    """Compute cosine similarity between two videos"""
    features1 = get_video_features(video1_path)
    features2 = get_video_features(video2_path)
    
    # Compute cosine similarity between video features
    similarity = cosine_similarity(features1, features2)
    return similarity.item()

def compare_videos(compare_df):
    """Compare original videos with generated videos"""
    results = []
    similarity_scores = []

    for idx, row in tqdm(compare_df.iterrows(), total=len(compare_df)):
        original_video = row['video_name']
        generated_video = f"RL_gen_{original_video}"
        
        try:
            similarity = compute_video_similarity(
                f"vidprom/cog_videos_example/{original_video}",
                f"output/genvideos/{generated_video}"
            )
            
            # Save intermediate results to JSON file
            result = {
                'video_name': original_video,
                'generated_video': generated_video,
                'reference': row['reference'],
                'origin': row['Origin_generation'],
                'DI_prediction': row['DI_generation'],
                'video_similarity_score': similarity,
                'promt_similarity_score': row['similarity_score']
            }
            
            # Append to results list
            results.append(result)
            similarity_scores.append(similarity)

            # Save current results to JSON file
            result_path = 'output/RL_gen_video_comparison_results.json'
            with open(result_path, 'w') as f:
                json.dump(results, f, indent=4)
                
            print(f"Processed {idx+1}/{len(compare_df)} videos. Current similarity: {similarity:.4f}")

        except Exception as e:
            print(f"Error comparing videos {original_video}: {e}")
            continue
    
    # Create DataFrame with results
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('similarity_score', ascending=False)
    
    return result_path,results_df,similarity_scores

# Example usage:
# results = compare_videos(compare_df)
# print(results.head())


In [None]:
result_path,results,video_cos_sim_scores = compare_videos(compare_promts)
print(results.head())

In [None]:
import pandas as pd

vid_sim_res = pd.read_json(result_path)
print(vid_sim_res.shape)
vid_sim_res.head()