In [None]:
# Running the NeuroTube Nexus analysis with synthetic data and showing outputs.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate synthetic data (same logic as notebook)
rng = np.random.default_rng(123)
n = 600
df = pd.DataFrame({
    'Channel_Name': [f'Channel_{i}' for i in range(1, n+1)],
    'Youtuber_Name': [f'Creator_{i}' for i in range(1, n+1)],
    'Total_Videos': rng.integers(20, 2000, size=n),
    'Avg_Video_Length_min': np.round(rng.normal(12, 6, size=n),1),
    'Total_Subscribers': (np.round(np.exp(rng.normal(8, 2, size=n)))).astype(int),
    'Members_Count': (np.round(np.exp(rng.normal(3, 1.5, size=n)))).astype(int),
    'AI_Generated_Content_pct': np.clip(np.round(rng.normal(15, 20, size=n),1), 0, 100),
    'Neural_Interface_Compatibility': rng.choice(['Low','Medium','High'], size=n, p=[0.6,0.3,0.1]),
    'Metaverse_Integration_Level': rng.choice(['None','Low','Medium','High'], size=n, p=[0.4,0.3,0.2,0.1]),
    'Quantum_Computing_Topics': rng.choice([0,1], size=n, p=[0.85,0.15]),
    'Holographic_Content_Rating': np.clip(np.round(rng.normal(40, 20, size=n),1), 0, 100),
    'Engagement_Score': np.clip(np.round(rng.normal(50, 15, size=n),1), 0, 100),
    'Content_Value_Index': np.clip(np.round(rng.normal(55, 12, size=n),1), 0, 100)
})
df['Avg_Video_Length_min'] = df['Avg_Video_Length_min'].clip(1, 120)
df['Engagement_per_1k_subs'] = (df['Engagement_Score'] / (df['Total_Subscribers']/1000 + 1)).round(2)

# Show top of dataframe
print("Sample rows (first 8):")
display(df.head(8))

# Question 1: Top creators by Engagement Score and Content Value Index
top_engagement = df.sort_values('Engagement_Score', ascending=False).head(10)[['Channel_Name','Youtuber_Name','Total_Subscribers','Engagement_Score','Content_Value_Index']]
top_value = df.sort_values('Content_Value_Index', ascending=False).head(10)[['Channel_Name','Youtuber_Name','Total_Subscribers','Engagement_Score','Content_Value_Index']]

print("\nTop 10 by Engagement Score:")
display(top_engagement)

print("\nTop 10 by Content Value Index:")
display(top_value)

# Bar chart for top 10 engagement
plt.figure(figsize=(10,4))
plt.barh(top_engagement['Channel_Name'][::-1], top_engagement['Engagement_Score'][::-1])
plt.xlabel('Engagement Score')
plt.title('Top 10 Channels by Engagement Score')
plt.tight_layout()
plt.show()

# Question 2: Influence of AI, Metaverse, Neural
df['AI_bucket'] = pd.cut(df['AI_Generated_Content_pct'], bins=[-0.1,0,10,30,60,100], labels=['0','1-10','11-30','31-60','61+'])
ai_group = df.groupby('AI_bucket')[['Total_Subscribers','Engagement_Score','Content_Value_Index']].median().round(2)
print("\nBy AI content bucket (median):")
display(ai_group)

metro_group = df.groupby('Metaverse_Integration_Level')[['Total_Subscribers','Engagement_Score','Content_Value_Index']].median().round(2)
print("\nBy Metaverse Integration Level (median):")
display(metro_group)

neural_group = df.groupby('Neural_Interface_Compatibility')[['Total_Subscribers','Engagement_Score','Content_Value_Index']].median().round(2)
print("\nBy Neural Interface Compatibility (median):")
display(neural_group)

# Question 3: Video length correlation
plt.figure(figsize=(6,4))
plt.scatter(df['Avg_Video_Length_min'], df['Engagement_Score'], alpha=0.5)
plt.xlabel('Avg Video Length (min)')
plt.ylabel('Engagement Score')
plt.title('Engagement vs Avg Video Length')
plt.tight_layout()
plt.show()

corr_len_eng = df['Avg_Video_Length_min'].corr(df['Engagement_Score'])
corr_len_val = df['Avg_Video_Length_min'].corr(df['Content_Value_Index'])
print(f"\nCorrelation (length, engagement): {corr_len_eng:.3f}")
print(f"Correlation (length, content value): {corr_len_val:.3f}")

# Question 4: Quantum and Holographic
quant_group = df.groupby('Quantum_Computing_Topics')[['Engagement_Score','Content_Value_Index','Total_Subscribers']].median().round(2)
print("\nBy Quantum topic coverage (median):")
display(quant_group)

corr_holo_eng = df['Holographic_Content_Rating'].corr(df['Engagement_Score'])
corr_holo_val = df['Holographic_Content_Rating'].corr(df['Content_Value_Index'])
print(f"\nCorrelation (holographic rating, engagement): {corr_holo_eng:.3f}")
print(f"Correlation (holographic rating, value): {corr_holo_val:.3f}")

plt.figure(figsize=(6,4))
plt.scatter(df['Holographic_Content_Rating'], df['Engagement_Score'], alpha=0.5)
plt.xlabel('Holographic Content Rating')
plt.ylabel('Engagement Score')
plt.title('Engagement vs Holographic Rating')
plt.tight_layout()
plt.show()

# Question 5: Innovation score and best balance channels
score = (
    (df['AI_Generated_Content_pct'] / 100) * 0.3 +
    df['Holographic_Content_Rating'] / 100 * 0.3 +
    df['Content_Value_Index'] / 100 * 0.2
)
met_map = {'None':0, 'Low':0.25, 'Medium':0.6, 'High':1.0}
neural_map = {'Low':0.2, 'Medium':0.6, 'High':1.0}
score += df['Metaverse_Integration_Level'].map(met_map) * 0.1
score += df['Neural_Interface_Compatibility'].map(neural_map) * 0.1
df['Innovation_Score'] = (score * 100).round(2)

top_balance = df.sort_values(['Innovation_Score','Engagement_per_1k_subs'], ascending=False).head(12)[['Channel_Name','Innovation_Score','Engagement_per_1k_subs','Total_Subscribers','Engagement_Score']]
print("\nTop channels balancing innovation and retention:")
display(top_balance)

# Quick summary insights (text)
insights = []
insights.append(f"Median engagement score overall: {df['Engagement_Score'].median():.1f}")
insights.append(f"Median content value index overall: {df['Content_Value_Index'].median():.1f}")
insights.append(f"Correlation between avg length and engagement: {corr_len_eng:.3f} (near zero implies weak/no linear relation)")
insights.append(f"Holographic rating correlation with engagement: {corr_holo_eng:.3f}")

print("\nKey insights:")
for it in insights:
    print("-", it)
