In [1]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [3]:
import os
import re
import googleapiclient.discovery

# Set up the YouTube API client
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY =os.getenv('DEVELOPER_KEY')

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

In [None]:
def get_video_id(url):
    # Regular expression to extract the video ID from the URL
    video_id_pattern = re.compile(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*')
    match = video_id_pattern.search(url)
    if match:
        return match.group(1)
    return None

In [None]:
def get_video_details(video_id):
    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=video_id
    )
    response = request.execute()
    items = response.get('items', [])
    if not items:
        return None

    video_data = items[0]
    title = video_data['snippet']['title']
    description = video_data['snippet']['description']

    return {
        'title': title,
        'description': description,
        'video_id': video_id
    }


In [None]:
!pip install youtube-transcript-api



In [None]:
from youtube_transcript_api import YouTubeTranscriptApi

def get_transcript(video_id):
  try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
    return " ".join([entry['text'] for entry in transcript])

  except Exception as e:
    print(f"Could not retrieve transcript for video ID {video_id}: {e}")
    return None

In [None]:
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def filter_text(text):
  if isinstance(text, str): # Check if the text is a string
    text = text.lower() # Convert to lowercase

    # Removing punctuation
    extra_chars = string.punctuation + '\t\n'
    text= text.translate(str.maketrans('', '', extra_chars))

    # Removing extra spaces
    text = re.sub(r'\s+', ' ', text)

    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)

    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)

    # Remove special characters and numbers
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\d', '', text)
    text = re.sub(r'\s+', ' ', text).strip()

    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    return ' '.join(tokens)
  else:
      print(f"Non-string value encountered: {text}")
      return "" # Return an empty string if text is not a string

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/gdrive/MyDrive/NLP/Sentiment_analysis

/content/gdrive/MyDrive/NLP/Sentiment_analysis


In [None]:
%cd /content

/content


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification

# Predict Sentiment
checkpoint_path = '/content/gdrive/MyDrive/NLP/Sentiment_analysis/results/checkpoint-75'
model = BertForSequenceClassification.from_pretrained(checkpoint_path)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
import torch

def predict_sentiment(text):
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
  outputs = model(**inputs)
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
  pred = torch.argmax(probs, dim=-1)
  sentiment = ["negative", "neutral", "positive"]
  return sentiment[pred]

In [None]:
def sentiment_analysis_pipeline(url):
  video_id = get_video_id(url)
  result = get_video_details(video_id)
  transcript=get_transcript(video_id)
  result['Transcript']=transcript
  text=result['title'] + result['description']+result['Transcript']
  preprocessed_text = filter_text(text)

  sentiment = predict_sentiment(preprocessed_text)
  return sentiment

In [None]:
url=input('Enter the url')
print(sentiment_analysis_pipeline(url))

Enter the urlhttps://youtu.be/4QTTDmqc_3E?si=32v5f7_eSk2-VYqk
neutral


In [None]:
!pip install gradio



In [None]:
import gradio as gr

interface = gr.Interface(
    fn=sentiment_analysis_pipeline,
    inputs="text",
    outputs="text",
    title="YouTube Video Sentiment Analysis",
    description="Enter the URL of a YouTube video to predict its sentiment.",
)

interface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0b197707b82e958294.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


