In [1]:
# Import required libraries
import pandas as pd
import plotly.express as px
from sklearn.linear_model import LinearRegression
import numpy as np
import os

# Set up paths
PROJECT_ROOT = os.getcwd()
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
NOTEBOOKS_DIR = os.path.join(PROJECT_ROOT, "notebooks")
OUTPUTS_DIR = os.path.join(PROJECT_ROOT, "outputs")

# Create output directories
os.makedirs(OUTPUTS_DIR, exist_ok=True)

In [2]:
# 1. Temporal Analysis
df["Release Month"] = df["Created time"].dt.month_name()
monthly_stats = (
    df.groupby("Release Month")
    .agg({"Song Title": "count", "Song URL": "nunique"})
    .rename(columns={"Song Title": "Total Songs", "Song URL": "Unique Releases"})
)

# 2. Tag Analysis
tag_counts = df["Keys"].str.split(", ").explode().value_counts()


# 3. Sentiment Analysis
def analyze_sentiment(text):
    positive_words = ["magic", "enchanting", "joyful", "empowerment", "serenity"]
    negative_words = ["melancholy", "haunting", "gritty", "feral", "imperfection"]
    return sum(1 for word in positive_words if word in text.lower()) - sum(
        1 for word in negative_words if word in text.lower()
    )


df["Sentiment Score"] = df["Analysis"].apply(analyze_sentiment)

NameError: name 'df' is not defined

In [None]:
# 1. Release Timeline
fig1 = px.line(
    df.groupby(df["Created time"].dt.date).size().reset_index(name="count"),
    x="Created time",
    y="count",
    title="Daily Song Releases Over Time",
)
fig1.show()

# 2. Tag Distribution
fig2 = px.bar(
    tag_counts.reset_index(),
    x="index",
    y="Keys",
    title="Tag Usage Distribution",
    labels={"index": "Tag", "Keys": "Count"},
)
fig2.show()

# 3. Sentiment Analysis
fig3 = px.box(df, y="Sentiment Score", title="Sentiment Distribution Across Tracks")
fig3.show()

In [None]:
# 1. Predictive Modeling
df["Date Ordinal"] = df["Created time"].apply(lambda x: x.toordinal())
model = LinearRegression()
model.fit(df[["Date Ordinal"]], df["Sentiment Score"])

# Predict future sentiment
future_date = pd.to_datetime("2025-12-01").toordinal()
predicted_sentiment = model.predict([[future_date]])
print(f"Predicted sentiment for Dec 2025: {predicted_sentiment[0]:.2f}")

# 2. Correlation Analysis
correlation_matrix = df.corr(numeric_only=True)
px.imshow(correlation_matrix, text_auto=True, title="Feature Correlation Matrix").show()

In [None]:
# Save visualizations
fig1.write_html(os.path.join(OUTPUTS_DIR, "release_timeline.html"))
fig2.write_html(os.path.join(OUTPUTS_DIR, "tag_distribution.html"))
fig3.write_html(os.path.join(OUTPUTS_DIR, "sentiment_analysis.html"))

# Save processed data
df.to_csv(os.path.join(OUTPUTS_DIR, "processed_songs.csv"), index=False)

# Save analytics report
with open(os.path.join(OUTPUTS_DIR, "analytics_report.md"), "w") as f:
    f.write(
        f"""
    # Suno Music Analytics Report
    ## Generated: {pd.Timestamp.now()}
    
    ### Basic Statistics:
    - Total Tracks: {len(df)}
    - Date Range: {df['Created time'].min().date()} to {df['Created time'].max().date()}
    - Most Common Tag: {tag_counts.index[0]} ({tag_counts.iloc[0]} tracks)
    
    ### Advanced Insights:
    - Predicted Sentiment for 2025-12-01: {predicted_sentiment[0]:.2f}
    - Monthly Release Frequency: {monthly_stats['Total Songs'].mean():.1f} tracks/month
    - Positive/Negative Ratio: {len(df[df['Sentiment Score'] > 0])/len(df):.2%}
    """
    )

In [None]:
# Schedule daily reports (Linux/Mac)
# Add this to your crontab:
# 0 3 * * * /path/to/suno-analytics/start_analytics.sh >> /path/to/logs/analytics.log 2>&1

# Or for Windows Task Scheduler:
# Create a task to run the script daily at 3 AM