In [None]:
import os
import sqlite3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

connection = sqlite3.connect(os.getenv("SQLITE_DB_FILE"))
cursor = connection.cursor()

query = """
SELECT activity.timestamp, video.title, video.length, channel.name AS channel_name, category.name AS category_name
FROM activity
JOIN video ON activity.video_id = video.id
JOIN channel ON activity.channel_id = channel.id
JOIN category ON channel.category_id = category.id
"""

df = pd.read_sql_query(query, connection)
connection.close()

# Create a list of categories to keep
categories_to_keep = ["Science and Technology", "AI and Programming", "Tech Review"]

# Create a copy of the df dataframe and select only the rows that match the specified categories
df_new = df[df['category_name'].isin(categories_to_keep)].copy()

# Convert video length from seconds to minutes
df_new['length_min'] = df_new['length'] / 60

plt.style.use('dark_background')

fig, ax = plt.subplots(figsize=(12, 5))

sns.violinplot(data=df_new, x='category_name', y='length_min', split=True, inner='stick')

# Set x-axis limit to exclude videos longer than 150 minutes
ax.set_ylim(0, 240)

ax.set_xlabel('Video Category', fontsize=12)
ax.set_ylabel('Video Length (minutes)', fontsize=12)
ax.set_facecolor('black')
plt.show()