In [1]:
!pip install youtube-transcript-api

Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.3-py3-none-any.whl.metadata (17 kB)
Downloading youtube_transcript_api-0.6.3-py3-none-any.whl (622 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m622.3/622.3 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.3


In [4]:
import os
import requests
import pandas as pd
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
from bs4 import BeautifulSoup
import json

# ... (Existing code) ...

# TRIPADVISOR: Get Post Content (Newly added function)
def get_tripadvisor_content(url):
    """Fetches content from Tripadvisor URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            # Assuming Tripadvisor content is within a specific div
            content = soup.find("div", class_="_T FKffI").text.strip() if soup.find("div", class_="_T FKffI") else 'No content found'
            # Replace with actual class name for content
            return {"URL": url, "Content": content}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}



# YOUTUBE: Get Video Transcripts
def get_video_id(urls):
    """Extracts YouTube video IDs from a list of URLs."""
    video_ids = []
    for url in urls:
        parsed_url = urlparse(url)
        if "youtu.be" in parsed_url.netloc:
            video_id = parsed_url.path.lstrip("/")
        elif "youtube.com" in parsed_url.netloc and "watch" in parsed_url.path:
            video_id = parse_qs(parsed_url.query).get("v", [None])[0]
        else:
            video_id = None

        if video_id:
            video_ids.append(video_id)
        else:
            print(f"Skipping unsupported URL: {url}")

    return video_ids

def get_youtube_transcripts(video_ids):
    """Fetches transcripts for a list of YouTube video IDs."""
    transcripts = {}
    for video_id in video_ids:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            transcripts[video_id] = {"Transcript": "\n".join([entry["text"] for entry in transcript])}
        except Exception as e:
            transcripts[video_id] = {"Error": f"Error: {e}"}
    return transcripts

# TIKTOK: Get Video Titles & Descriptions
def get_tiktok_video_details(url):
    """Fetches video title & description from TikTok using web scraping."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            title = soup.find("title").text
            return {"URL": url, "Title": title}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

# FACEBOOK: Get Post Content
def get_facebook_post_details(url):
    """Fetches post title and content from a Facebook post URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            content = soup.find("meta", {"property": "og:description"})
            description = content['content'] if content else 'No description available'
            return {"URL": url, "Description": description}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

# INSTAGRAM: Get Post Caption
def get_instagram_post_details(url):
    """Fetches post caption from an Instagram post URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            script_tag = soup.find("script", text=lambda text: text and 'window._sharedData' in text)
            json_data = script_tag.string.split('= ', 1)[1].rstrip(';')
            data = json.loads(json_data)
            caption = data['entry_data']['PostPage'][0]['graphql']['shortcode_media']['edge_media_to_caption']['edges'][0]['node']['text']
            return {"URL": url, "Caption": caption}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

#  REDDIT: Get Post Title
def get_reddit_post_details(url):
    """Fetches post title from a Reddit URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            title = soup.find("h1", class_="yh1m6r").text if soup.find("h1", class_="yh1m6r") else 'No title found'
            return {"URL": url, "Title": title}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

# TRAVELINGTED & GODIGIT: Get Main Content
def get_travelingted_content(url):
    """Fetches content from TravelingTed URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            content = soup.find("div", class_="entry-content").text.strip() if soup.find("div", class_="entry-content") else 'No content found'
            return {"URL": url, "Content": content}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

def get_godigit_content(url):
    """Fetches content from Godigit URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            content = soup.find("div", class_="article-body").text.strip() if soup.find("div", class_="article-body") else 'No content found'
            return {"URL": url, "Content": content}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

def get_repec_content(url):
    """Fetches content from Repec URL."""
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            # Find the relevant content area on the Repec page
            # Replace 'repec_content_class' with the actual class name
            content = soup.find("div", class_="repec_content_class").text.strip() if soup.find("div", class_="repec_content_class") else "No content found"
            return {"URL": url, "Content": content}
        else:
            return {"URL": url, "Error": "Failed to fetch"}
    except Exception as e:
        return {"URL": url, "Error": str(e)}

def get_outlooktraveller_content(url):
  try:
    response = requests.get(url)
    if response.status_code == 200:
      soup = BeautifulSoup(response.text, "html.parser")
      content = soup.find("div", class_="article-body").text.strip() if soup.find("div", class_="article-body") else "No content found"
      return {"URL": url, "Content": content}
    else:
      return {"URL": url, "Error": "Failed to fetch"}
  except Exception as e:
    return {"URL": url, "Error": str(e)}

# MAIN EXECUTION
if __name__ == "__main__":
    # YouTube URLs
    youtube_urls = [
        "https://youtu.be/7ToBt0KWJw8?si=fE264vIp28GGSUk9",
        "https://youtu.be/12ViUYWuZ6Y?si=Ykr3jwCQohqvxb0x",
        "https://youtu.be/ux56nq7f3Rs?si=RIn5VW4ZchIQT9IU",
        "https://youtu.be/TJY-vcYczjc?si=bxTM-MgcmNZGZi_v",
        "https://youtu.be/Msl7BdI2Mys?si=tST2WMxAEmQ5NQJN",
        "https://youtu.be/nuYldIP_qIo?si=ArYGtCN-KJ0n7ecF",
        "https://youtu.be/NVaMi1E8-9s?si=kPVtBEe7nYyFnA4v",
        "https://youtu.be/bkSkjoS0AkY?si=_SkierzHYtrD0A5z",
        "https://youtu.be/De-9d_GIqCY?si=kxZPjJxQSE4kX_Ys",
        "https://youtu.be/OylS_5L0dAg?si=D7ogRAVHoXteRoyo",
        "https://youtu.be/R3vIgmzZx4o?si=YLNkR4aO6xZ_1B95",
        "https://youtu.be/r0EwuTqmhTI?si=cH-mE1Fq-_8YmGzj",
        "https://youtu.be/FQPiDkEUDfw?si=l7UQ6aBxLeiMDa8A",
        "https://youtu.be/FQPiDkEUDfw?si=CnbXcpSOQH0cTHXo",
        "https://youtu.be/Wpt_7rQ5Feo?si=HtwFTFfyoNSjFBed",
        "https://youtu.be/AGI_ZcqvAB0?si=qSNvcq9b9OAsagSH",
        "https://youtu.be/Ik7KqKZoI8A?si=nXlXUz_Nnbbai7N3",
        "https://youtu.be/PxeF-681Yi0?si=7zw5Q_eFgxXzT09O",
        "https://youtu.be/-aA0RLJptiU?si=k02MeXuNTcc7Hcpa",
        "https://youtu.be/-aA0RLJptiU?si=RdA7tlF7SibUxqYx",
        "https://youtu.be/wxE2SribvWk?si=HiHZ3yqVnIa5KeFz",
        "https://youtu.be/q7PU0iWXhhM?si=WizC4Y5KFQWBBBmt",
        "https://youtu.be/De7l7XoSlM0?si=xSX2z997VhtLmGaM",
        "https://youtu.be/tMOr50wt5pM?si=tiYvEgR2X1RS8OfC",
        "https://youtu.be/bkSkjoS0AkY?si=sKHLaLPf1SESjvhd",
        "https://youtu.be/Iv7z_5f7CqQ?si=19V7Vh6Ilh35aO8F",
        "https://youtu.be/yMe6SE9Vef4?si=6H3WrVkkaiTZsmAv",
        "https://youtu.be/SGiHLYwtTUU?si=9I67T723tZwDS-P7",
        "https://youtu.be/8woWFDiFYpQ?si=6JQefDxiMvTRqi78",
        "https://youtu.be/EiHxfypxMwU?si=KWmnSRYY0E7xP4rb",
        "https://youtu.be/khdA5j_MYGU?si=9yjUNM08PhwNRq_J",
        "https://youtu.be/ietY8qQ_2_M?si=VMVXfTkkF6KEOl2t",
        "https://youtu.be/VzoLNqRyuBs?si=WL6kU0h2LDzkbIRX",
        "https://youtu.be/zpSg7am1xZI?si=NGXNP0RgV8moUyIS",
        "https://youtu.be/3v8mUoQBbSs?si=l_ayffsMjyK9X-Rx",
        "https://youtu.be/qLO5PSoonjQ?si=ofXYJlWOQwL5gMUt",
        "https://youtu.be/bQIe_cWTfrM?si=NRnWR0nKLBaZYFX6",
        "https://youtu.be/NtVqZd2oIxo?si=wiWs_U6_zu-2d6LH",
        "https://youtu.be/S7d07mFjXaY?si=F62fWCRHdPJ_1pMs",
        "https://youtu.be/J5SdnBPBvRM?si=Lq3irjG4Z1lRE--c",
        "https://youtu.be/H-lxIBR-u-Q?si=XogN8_bOjruhpgYG",
        "https://youtu.be/Oxptm7fx9XA?si=nvc-hEmHd9iGK5h7",
        "https://youtu.be/4XR-UGvU1yM?si=o0UQ79PV1QLgphfP",
        "https://youtu.be/N_xl9Z75W80?si=FbO0X7W269Dw-Hu7",
        "https://youtu.be/cFtfR4pxkbM?si=M0fKDim64_vHQtEy",
        "https://youtu.be/0fLVc5_2oLs?si=BE2w0CM0NXqIccxp",
        "https://youtu.be/Z-AFPgX1n9c?si=DnOY0J-8_PIkpHsH",
        "https://youtube.com/shorts/0jFrNow4FIM?si=NHEzAZTx03B_sxzG",
        "https://youtu.be/a4ra88NcmUY?si=xbSuNjkPE0BkVL4m",
        "https://youtu.be/S7d07mFjXaY?si=i7i8H6p2a6jSv7Se",
        "https://youtu.be/Wpt_7rQ5Feo?si=LzyBehWaeb_uLC4U",
        "https://youtu.be/ietY8qQ_2_M?si=NFNElLp71nlcT5hw",
        "https://youtu.be/-aA0RLJptiU?si=xzBwjGl2l3zSPpnp",
        "https://youtu.be/NVaMi1E8-9s?si=UACKd2EcEibdaDEo",
        "https://youtu.be/wxE2SribvWk?si=WgR0SEDK-oB4yDvl",
        "https://youtu.be/VzoLNqRyuBs?si=v-Mun3S3fswDL5i5",
        "https://youtu.be/PxeF-681Yi0?si=8tNa5Pf00JkeOWCv",
        "https://youtu.be/bQIe_cWTfrM?si=ZxqLkEBkW9Vt8bvm",
        "https://youtu.be/wVktdXS_mkY?si=MaLepWAl0U0EXtLt",
        "https://youtu.be/Ik7KqKZoI8A",
        "https://youtu.be/O-9vhxhs1YE",
        "https://youtu.be/7ToBt0KWJw8",
        "https://youtu.be/JiwEYrlD92M?t=59",
        "https://youtu.be/7PxxVJ40ZXM?si=fNGOKhNyYk4Pwt1t",
        "https://youtu.be/Pp2flRi-ZHQ?si=fu07YG1aMlxqmRxw",
        "https://youtu.be/fOTGMsFAVpI?si=GqzokJQv_oW5XSi_",
        "https://youtu.be/YmAAz3s8A8c?si=T-Y0EJRCOgoAlDAR",
        "https://youtu.be/EY5seEH0fTo?si=cU5bHhWebaE0wOYB",
        "https://youtu.be/6wL9wA09W_4?si=ilpr5ol9ZMYsoq2i",
        "https://youtu.be/f_W5LOk9UDY?si=OCxNs6FQdt3Rm2v_",
        "https://youtu.be/OylS_5L0dAg?si=Rda1jZwodF8d5bWn",
        "https://youtu.be/UGdL4eIwc8o?si=PCJkgrixfGRzbSyK",
        "https://youtu.be/pTOmiZOW-ys?si=qtnYDN8dcnJa4q-S",
        "https://youtu.be/HBsKOYuai3U?si=zpnXLXfhKbFECbb",
        "https://youtu.be/wKgx-_9mP4U?si=BQmIDRqGv-x2dhFC",
        "https://youtu.be/_6mw1tM5MoQ?si=KbryHUXRtGoCSN4M",
        "https://youtu.be/AbGouj-_p_s?si=ilndArQTttE-Atcx",
        "https://youtu.be/gMb4PmQab-c?si=pc1nj03XJkgVeAhx",
        "https://youtu.be/fU0e98oSluk?si=26tc42Lh-eNmIfb3",
        "https://youtu.be/8R1sUrhTiGU?si=H5qqJapIU6_dcFwq",
        "https://youtu.be/lQwoix0KRvo?si=DLLa5Tw5RRbND3tG",
        "https://youtu.be/18GtEQRUfdg?si=fxhjY-eGDSTM3m_M",
        "https://youtu.be/UqFPmhSNioE?si=FXde2cWoBa6Iv4oT",
        "https://youtu.be/Xo0bKpWzwNg?si=WBlu1v3IdsK7nFmg",
        "https://youtu.be/ehWrrR9aKsg?si=PY6X36unUTw1zf1b",
        "https://youtu.be/B6afu7Tpl30?si=L9vVJ8jF692lfkDm",
        "https://youtu.be/4O4tALBRRTY?si=Mfid-ijQDDN4ttCK",
        "https://youtu.be/O-zqSfpc3R8?si=0afWJGwSR_ZjCnXw",
        "https://youtu.be/C-unk9O32T0?si=iuMV2PxrwtDR-FyR",
        "https://youtube.com/shorts/3dFfYUnztcM?si=d02yQZxd-dxG0Y2M",
        "https://youtu.be/Ihv04VaoKI8?si=680DuqZU_YNMQ8th",
        "https://youtu.be/Z-AFPgX1n9c?si=vkHTe4zzGPpt7eck",
        "https://youtu.be/KNeyLMw1XUY?si=Q06T6jnoz7LpS0z-",
        "https://youtu.be/wKgx-_9mP4U?si=nEiVbfgmlNM36RIY",
        "https://youtu.be/twAPhBna7_I?si=6ccJ0OrTv_2gayHR",
        "https://youtu.be/dlF95WCuoXY?si=zAimCAZMcoZq-n2Y",
        "https://youtu.be/0bFs7ZYUxGQ?si=tO7bJdi857M5G1O0",
        "https://youtu.be/VyAn4wgUchs?si=aOVtosNbUb-oxBfz",
        "https://youtu.be/5nbI3p-28SM?si=WY6q8b7dtaZEEYOz",
        "https://youtube.com/shorts/bXmccB9Ga74?si=KDYI5d3Z-Tt6kJp5",
        "https://youtu.be/xR98d5JPJbY?si=DA1rNlxafSoTHnfO",
        "https://youtube.com/shorts/Iv7z_5f7CqQ?si=ooDaDmMAl2dBRQLe",
        "https://youtu.be/mAzJquRtN7k?si=3DIUYd0_Ihofyc9E",
        "https://youtu.be/5YicpQphqWw?si=6kMft49Bn03qRNKx",
        "https://youtu.be/l2fef7sAKXE?si=AJIVkXVDmIdim-TN",
        "https://youtu.be/WZ_ypoWYduw?si=KBfbHKP0mZfdquhG",
        "https://youtube.com/shorts/_6u5NotHGD4?si=X0fH1tbDt3tOrh6K",
        "https://youtu.be/gaLWS85uCzA?si=hCyLGYmgJxKgYKOw",
        "https://youtube.com/shorts/eo4j_3EAAPU?si=vNW4BLQmgahUAZaL",
        "https://youtu.be/oeVHVa1KDM0?si=rJHU-r5D8O7f4m16",
        "https://youtu.be/IgikWpbM1_0?si=xryayNNbt09GMjBj",
        "https://youtu.be/-eNSdFdyw5A?si=KdcWa66eiafFO1sT",
        "https://youtube.com/shorts/9rlte0bI18E?si=fFXXW9EoaDDYZ_RL",
        "https://youtu.be/jpx07kI6FTs?si=gqjn8L7b_c6Yh-Ad",
        "https://youtu.be/a1EDBt4ya44?si=o9OwcaOKCieB-8Vu",
        "https://youtube.com/shorts/fywpx8nihHo?si=pwzZ0cYeNd6_efZN",
        "https://youtu.be/lflY_l0wZFQ?si=tLJQqzTr38p-gogB",
        "https://youtu.be/sV1Z2LXtHqc?si=gNNjTHWbSzlKDG3O",
        "https://youtu.be/kFpqL9placI?si=nrdP4SPkzlBycOaI",
        "https://youtu.be/7PxxVJ40ZXM?si=Rl5sxMQq2jmSH5eW",
        "https://youtu.be/4ORpgihjExs?si=fceWDYX9a9UYpJvM",
        "https://youtu.be/Zc2TNy6411w?si=8vtCGP-jKU8SHd7Z",
        "https://youtu.be/qcJXEI8-FFo?si=4TUanlDcXs22eUj3",
        "https://youtu.be/c52XZfPK5YE?si=upodXmkN9zZ84HpY",
        "https://youtu.be/N_xl9Z75W80?si=wKrdOiAInahFvecP",
        "https://youtu.be/0tyHQkz2rwg?si=RRynyTNF0n2qIvTP",
        "https://youtu.be/OAoRsMSXfiQ?si=CkPqadOs2HqPcIM8",
        "https://youtu.be/Wq-JwHf4NWA?si=90MUShsIB-EYZ4vH",
        "https://youtu.be/Zz3okD69zOQ?si=8Gp756zRmYzHkx8P",
        "https://youtu.be/J6_KB4B6Ox8?si=Rb6NXyMVdKZypiqw",
        "https://youtu.be/_6mw1tM5MoQ?si=8NL0uQTJLr1VRED9",
        "https://youtu.be/QAW4z3WMf7E?si=uTxFAzdaWj6RFZNO"

    ]

    video_ids = get_video_id(youtube_urls)
    youtube_data = get_youtube_transcripts(video_ids)

    # TikTok URLs
    tiktok_urls = [
        "https://www.tiktok.com/%40melissareyns1/video/7455629613805456673",
        "https://www.tiktok.com/%40drscam2/video/7452477275141885217",
        "https://www.tiktok.com/%40travelwithchris22/video/7439472872080297248",
        "https://www.tiktok.com/%40travelvlogedits/video/7417029988039281952",
        "https://www.tiktok.com/%40arinashoco/video/7379680905146699028",
        "https://www.tiktok.com/%40teganh_/video/7348120227055455520",
        "https://www.tiktok.com/%40chloelawfordtravels/video/7332908232811236640",
        "https://vt.tiktok.com/ZSMebvM2R/",
        "https://vt.tiktok.com/ZSMebho3J/",
        "https://vt.tiktok.com/ZSMeb9Gc8/",
        "https://vt.tiktok.com/ZSMebBd9W/",
        "https://vt.tiktok.com/ZSMebff2S/",
        "https://vt.tiktok.com/ZSMebm8no/",
        "https://vt.tiktok.com/ZSMebPHVH/",
        "https://vt.tiktok.com/ZSMegdvhq/",
        "https://vt.tiktok.com/ZSMebGxx1/"


    ]

    tiktok_data = [get_tiktok_video_details(url) for url in tiktok_urls]

    # Facebook, Instagram, Reddit, TravelingTed, Godigit URLs
    social_media_urls = {
        "Facebook": [
            "https://www.facebook.com/iamkarlrock/posts/beware-of-these-tourist-scammers-in-sri-lanka/551853340097754/",
            "https://www.facebook.com/Daleroxxu/videos/avoid-this-scam-in-colombo-sri-lanka-/1341582819573619/",
            "https://www.facebook.com/groups/srilankaadvice/",
            "https://www.facebook.com/p/Tourist-scams-in-Sri-Lanka-100064228086446/?locale=cy_GB",
            "https://www.facebook.com/story.php/?id=100064228086446&story_fbid=599824652168466",
            "https://m.facebook.com/story.php?id=100064228086446&story_fbid=853577886793140",
            "https://www.facebook.com/permalink.php/?id=100064228086446&story_fbid=599820545502210",
            "https://m.facebook.com/story.php/?id=100064228086446&story_fbid=830182959132633"
        ],
        "Instagram": [
            "https://www.instagram.com/pulsetoday/p/C-cFEnNBvnp/",
            "https://www.instagram.com/dailymirrorlk/p/DBK5_5XtvtU/",
            "https://www.instagram.com/usembsl/p/Cjt9kESImhc/",
            "https://www.instagram.com/memoriesfromabroad_/reel/C7bXpRFsGkb/",
            "https://www.instagram.com/crossbodyandacarryon/reel/C7gyDTOOtNz/",
            "https://www.instagram.com/arugam_bay/reel/C9Af6qboDHZ/",
            "https://www.instagram.com/projecttravelworld/reel/DA7bgGOP07_/",
            "https://www.instagram.com/daily_ft/p/DBLGstoAb-D/",
            "https://www.instagram.com/culture_blender/reel/DBbaaArs7wq/",
            "https://www.instagram.com/culture_blender/reel/DFFx9pyCA4I/",
            "https://www.instagram.com/emily.parnell/reel/DFxsMm5TXIY/",
            "https://www.instagram.com/riskyorlucky/reel/DErDIBtIsRQ/",
            "https://www.instagram.com/travelgram.sl/reel/DEcwnpoPHGv/",
            "https://www.instagram.com/julie.b.christensen/reel/DB_lCC0ML6s/",
            "https://www.instagram.com/julie.b.christensen/reel/DB_lCC0ML6s/?hl=am-et&locale=ru",
            "https://www.instagram.com/projecttravelworld/reel/DA7bgGOP07_/",
            "https://www.instagram.com/the_hangry_tourist/reel/C_p7XAqzGIO/",
            "https://www.instagram.com/daleroxxu/reel/C97QcgbMcGc/",
            "https://www.instagram.com/memoriesfromabroad_/reel/C7bXpRFsGkb/",
            "https://www.instagram.com/haa.official/reel/C1ki_kSyVZp/",
            "https://www.instagram.com/travelgram.sl/reel/DEcwnpoPHGv/",
            "https://www.instagram.com/julie.b.christensen/reel/DB_lCC0ML6s/",


        ],
        "Reddit": [
            "https://www.reddit.com/r/srilanka/comments/166wtw8/popular_road_side_scams_in_sri_laka/",
            "https://www.reddit.com/r/srilanka/comments/1hfg6ry/matara_rest_house_double_pricing_system_for/"
        ],
        "TravelingTed": [
            "https://travelingted.com/2024/08/12/yala-national-park-the-worst-tour-and-sri-lanka-scams/",

        ],
        "Godigit": [
            "https://www.godigit.com/international-travel-insurance/tourist-scams/tourist-scams-in-sri-lanka",

        ],
        "Tripadvisor": [
            "https://www.tripadvisor.com/ShowTopic-g293961-i8983-k8326870-Discriminatory_foreigner_pricing_not_wise_long_term-Sri_Lanka.html",

        ],
        "repec": [
            "https://ideas.repec.org/a/ibn/ibrjnl/v13y2020i8p18.html"
        ],
        "outlooktraveller": [
            "https://www.outlooktraveller.com/editors-picks/how-to-prevent-common-tourist-scams-in-sri-lanka"
        ]
    }

    facebook_data = [get_facebook_post_details(url) for url in social_media_urls["Facebook"]]
    instagram_data = [get_instagram_post_details(url) for url in social_media_urls["Instagram"]]
    reddit_data = [get_reddit_post_details(url) for url in social_media_urls["Reddit"]]
    travelingted_data = [get_travelingted_content(url) for url in social_media_urls["TravelingTed"]]
    godigit_data = [get_godigit_content(url) for url in social_media_urls["Godigit"]]
    tripadvisor_data = [get_tripadvisor_content(url)for url in social_media_urls["Tripadvisor"]]
    repec_data = [get_repec_content(url)for url in social_media_urls["repec"]]
    outlooktraveller_data = [get_outlooktraveller_content(url)for url in social_media_urls["outlooktraveller"]]

    # Combine all data into one DataFrame
    all_data = {
        "YouTube": [{"Video_ID": video_id, **data} for video_id, data in youtube_data.items()],
        "TikTok": tiktok_data,
        "Facebook": facebook_data,
        "Instagram": instagram_data,
        "Reddit": reddit_data,
        "TravelingTed": travelingted_data,
        "Godigit": godigit_data,
        "Tripadvisor" : tripadvisor_data,
        "repec" : repec_data,
        "outlooktraveller": outlooktraveller_data ,
    }

    with pd.ExcelWriter("transcript_data.xlsx") as writer:
        for platform, data in all_data.items():
            df = pd.DataFrame(data)
            df.to_excel(writer, sheet_name=platform, index=False)

    print("Data saved to 'transcript_data.xlsx'")


Skipping unsupported URL: https://youtube.com/shorts/0jFrNow4FIM?si=NHEzAZTx03B_sxzG
Skipping unsupported URL: https://youtube.com/shorts/3dFfYUnztcM?si=d02yQZxd-dxG0Y2M
Skipping unsupported URL: https://youtube.com/shorts/bXmccB9Ga74?si=KDYI5d3Z-Tt6kJp5
Skipping unsupported URL: https://youtube.com/shorts/Iv7z_5f7CqQ?si=ooDaDmMAl2dBRQLe
Skipping unsupported URL: https://youtube.com/shorts/_6u5NotHGD4?si=X0fH1tbDt3tOrh6K
Skipping unsupported URL: https://youtube.com/shorts/eo4j_3EAAPU?si=vNW4BLQmgahUAZaL
Skipping unsupported URL: https://youtube.com/shorts/9rlte0bI18E?si=fFXXW9EoaDDYZ_RL
Skipping unsupported URL: https://youtube.com/shorts/fywpx8nihHo?si=pwzZ0cYeNd6_efZN


  script_tag = soup.find("script", text=lambda text: text and 'window._sharedData' in text)


Data saved to 'transcript_data.xlsx'
