In [118]:
import pandas as pd
import re
import csv
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

In [121]:
def extract_video_id(link):
    pattern = r"(?<=v=)[a-zA-Z0-9_-]+(?=&|\?|$)"
    match = re.search(pattern, link)
    if match:
        return match.group(0)
    else:
        return None

def generate_video_ids(csv_file):
    file = csv_file + ".csv"
    try:
        #df = pd.read_csv(csv_file)
        df = pd.read_csv(file, encoding='latin-1')
        df['video_id'] = df['link'].apply(extract_video_id)
        df.dropna(subset=['video_id'], inplace=True)
        df.to_csv(file, index=False)
        print("Video IDs generated successfully and saved to", file)
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        
def csv_json(file):
    #data = pd.read_csv(filename)
    filename = file + ".csv"
    print(filename)
    data = pd.read_csv(filename, encoding='latin-1')
    #data["title"] = data["title"].str.lower()  # Lowercase titles directly within DataFrame
    json_filename = file + ".json"
    json_dict=[]
    for row in data:
        json_dict.append({
                    "link": data["link"],
                    "title": data["title"],
                    "author": data["author"],
                    "video_id": data["video_id"],
                    })
        json_dict = data.to_dict(orient='records')  # Convert DataFrame to list of dictionaries
        with open(json_filename, "w") as jsonfile:
            json.dump(json_dict, jsonfile, indent=4)

    print(f"CSV data converted to JSON and saved to {json_filename}")

In [130]:
emotions = ['angry','disgust','fear','happy','neutral', 'sad', 'surprise']
for emotion in emotions:
    #data_file = emotion + ".csv"
    
    generate_video_ids(emotion)
    csv_json(emotion)


Video IDs generated successfully and saved to angry.csv
angry.csv
CSV data converted to JSON and saved to angry.json
Video IDs generated successfully and saved to disgust.csv
disgust.csv
CSV data converted to JSON and saved to disgust.json
Video IDs generated successfully and saved to fear.csv
fear.csv
CSV data converted to JSON and saved to fear.json
Video IDs generated successfully and saved to happy.csv
happy.csv
CSV data converted to JSON and saved to happy.json
Video IDs generated successfully and saved to neutral.csv
neutral.csv
CSV data converted to JSON and saved to neutral.json
Video IDs generated successfully and saved to sad.csv
sad.csv
CSV data converted to JSON and saved to sad.json
Video IDs generated successfully and saved to surprise.csv
surprise.csv
CSV data converted to JSON and saved to surprise.json
