#  YouTube Trending Video Analytics

This project analyzes YouTube trending videos data across different regions using Python. It includes data cleaning, exploratory data analysis, sentiment analysis, and visualization.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
# Optional: For interactive visuals
# import plotly.express as px

In [None]:
# Load dataset
# Replace with your path if different
df = pd.read_csv('USvideos.csv')
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'USvideos.csv'

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
# Convert date column and handle missing values
df['trending_date'] = pd.to_datetime(df['trending_date'], errors='coerce')
df['publish_time'] = pd.to_datetime(df['publish_time'], errors='coerce')
df.dropna(inplace=True)
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
# Most viewed videos
most_viewed = df.sort_values('views', ascending=False).head(10)
most_viewed[['title', 'channel_title', 'views']]

In [None]:
grouped = df.groupby('tags')['views'].max().reset_index()
grouped.sort_values('views', ascending=False).head(10)

In [None]:
# Plot view distribution
plt.figure(figsize=(10,6))
sns.histplot(df['views'], bins=50, kde=True)
plt.title('View Count Distribution')
plt.xlabel('Views')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Sentiment analysis on video titles
def get_sentiment(text):
    return TextBlob(str(text)).sentiment.polarity
df['title_sentiment'] = df['title'].apply(get_sentiment)
df[['title', 'title_sentiment']].head()

In [None]:
# Plot sentiment distribution
plt.figure(figsize=(10,6))
sns.histplot(df['title_sentiment'], bins=30, kde=True, color='green')
plt.title('Sentiment Polarity of Video Titles')
plt.xlabel('Polarity')
plt.ylabel('Frequency')
plt.show()

In [None]:
def tag_sentiments(tag):
  return TextBlob(str(tag)).sentiment.polarity
df['sentiment_tags']=df['tags'].apply(tag_sentiments)
df[['tags','sentiment_tags']].head()

In [None]:
df.to_csv('YouTube_Trending_Analytics_GB',index=False)

In [None]:
# prompt: df['tags'].value_counts().head(20) & df['views'].head(20) want to know the trending views relation

trending = df.groupby('tags')['views'].sum().sort_values(ascending=False).head(20)
trending
