In [None]:
!pip install --upgrade --force-reinstall python-docx --quiet


In [None]:
from docx import Document
from docx.shared import Inches

# Create a new Word document
doc = Document()

# Title
doc.add_heading('An Analytical Overview of the Rapping Industry (2025 Edition)', level=0)

# Introduction
doc.add_heading('Introduction', level=1)
doc.add_paragraph(
    "The rapping industry has undergone significant transformation in recent years due to the widespread influence of digital platforms. "
    "This report presents a comprehensive analysis of four major platforms—Spotify, YouTube, Instagram, and TikTok—and their impact on rap artists' reach, engagement, and performance metrics. "
    "By synthesizing publicly available and proprietary data, we aim to provide a strategic snapshot of the industry's digital health."
)

# Spotify Section
doc.add_heading('Spotify Engagement and Metrics', level=1)
doc.add_paragraph(
    "Spotify remains the leading music streaming platform for rap artists. The following table summarizes key performance metrics such as monthly listeners, follower growth, playlist reach, and conversion rates."
)
doc.add_paragraph("{{table_spotify_stats}}")  # Placeholder for Spotify table
doc.add_paragraph("{{analysis_spotify}}")     # Placeholder for LLM analysis on Spotify

# YouTube Section
doc.add_heading('YouTube Metrics and Visibility', level=1)
doc.add_paragraph(
    "YouTube continues to be a vital platform for rap music video distribution and subscriber growth. Key statistics like daily and monthly video views, subscriber count, and rank provide insights into visibility trends."
)
doc.add_paragraph("{{table_youtube_stats}}")  # Placeholder for YouTube table
doc.add_paragraph("{{analysis_youtube}}")     # Placeholder for LLM analysis on YouTube

# Instagram Section
doc.add_heading('Instagram Followers and Engagement', level=1)
doc.add_paragraph(
    "Instagram plays a critical role in shaping the social image and brand value of rap artists. We examine follower counts, engagement rates, and growth dynamics to understand the platform's contribution."
)
doc.add_paragraph("{{table_instagram_stats}}")  # Placeholder for Instagram table
doc.add_paragraph("{{analysis_instagram}}")     # Placeholder for LLM analysis on Instagram

# TikTok Section
doc.add_heading('TikTok Virality and Influence', level=1)
doc.add_paragraph(
    "TikTok has emerged as a game-changer for artist discovery and song promotion. This section highlights the follower base, like counts, and other performance data for rap content on TikTok."
)
doc.add_paragraph("{{table_tiktok_stats}}")  # Placeholder for TikTok table
doc.add_paragraph("{{analysis_tiktok}}")     # Placeholder for LLM analysis on TikTok

# Conclusion
doc.add_heading('Conclusion', level=1)
doc.add_paragraph(
    "The four platforms studied—Spotify, YouTube, Instagram, and TikTok—each offer unique engagement opportunities for rap artists. "
    "Understanding these metrics provides a competitive edge in developing marketing strategies and anticipating future trends in the industry. "
    "The following placeholders are dynamically generated to insert up-to-date analysis from LLM models and visual summaries based on real-time data."
)


output_path = "/content/sample_data/Rap_Industry_Analysis_Report_Template.docx"
doc.save(output_path)

output_path


In [None]:
import pandas as pd
import os


tables = {
    "spotify.csv": [
        ["Artist", "Followers (M)", "Popularity", "New Album (2025)"],
        ["Lil Nas X", 12.3, 89, "Montero Reloaded"],
        ["Doja Cat", 15.1, 91, "Planet Her: Deluxe"],
        ["Jack Harlow", 9.7, 86, "White Men Rap Too"]
    ],
    "youtube.csv": [
        ["Artist", "Subscribers (M)", "Popularity", "New Album (2025)"],
        ["Lil Nas X", 10.2, 88, "Montero Reloaded"],
        ["Doja Cat", 14.0, 90, "Planet Her: Deluxe"],
        ["Jack Harlow", 8.5, 84, "White Men Rap Too"]
    ],
    "instagram.csv": [
        ["Artist", "Followers (M)", "Popularity", "New Album (2025)"],
        ["Lil Nas X", 13.5, 87, "Montero Reloaded"],
        ["Doja Cat", 18.2, 93, "Planet Her: Deluxe"],
        ["Jack Harlow", 11.1, 85, "White Men Rap Too"]
    ],
    "tiktok.csv": [
        ["Artist", "Followers (M)", "Popularity", "New Album (2025)"],
        ["Lil Nas X", 16.0, 90, "Montero Reloaded"],
        ["Doja Cat", 19.4, 94, "Planet Her: Deluxe"],
        ["Jack Harlow", 12.8, 88, "White Men Rap Too"]
    ]
}


csv_paths = []
for filename, data in tables.items():
    df = pd.DataFrame(data[1:], columns=data[0])
    path = os.path.join("/content/sample_data", filename)
    df.to_csv(path, index=False)
    csv_paths.append(path)

csv_paths


In [None]:
from docx import Document
from docx.shared import Pt
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
import pandas as pd
from openai import OpenAI

DS_API_KEY = "YOUR-KEY"
MODEL = "deepseek-chat"
client = OpenAI(api_key=DS_API_KEY, base_url="https://api.deepseek.com")
doc_path = "/content/sample_data/Rap_Industry_Analysis_Report_Template.docx"
save_path = "/content/sample_data/Rap_Industry_Analysis_Report_Filled.docx"
csv_paths = {
    "{{table_spotify_stats}}": "/content/sample_data/spotify.csv",
    "{{table_youtube_stats}}": "/content/sample_data/youtube.csv",
    "{{table_instagram_stats}}": "/content/sample_data/instagram.csv",
    "{{table_tiktok_stats}}": "/content/sample_data/tiktok.csv"
}


analysis_placeholders = {
    "{{analysis_spotify}}": "spotify",
    "{{analysis_youtube}}": "youtube",
    "{{analysis_instagram}}": "instagram",
    "{{analysis_tiktok}}": "tiktok"
}

def insert_page_break(paragraph):
    run = paragraph.add_run()
    br = OxmlElement('w:br')
    br.set(qn('w:type'), 'page')
    run._r.append(br)

def add_bold_borders(table):
    tbl = table._element
    tblPr = tbl.tblPr
    tblBorders = OxmlElement('w:tblBorders')
    for border_name in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
        border = OxmlElement(f'w:{border_name}')
        border.set(qn('w:val'), 'single')
        border.set(qn('w:sz'), '12')
        border.set(qn('w:space'), '0')
        border.set(qn('w:color'), '000000')
        tblBorders.append(border)
    tblPr.append(tblBorders)

# --- Generate LLM Analysis ---
def generate_analysis(df, platform):
    table_text = df.to_string(index=False)
    messages = [
        {
            "role": "system",
            "content": "You are a professional music data analyst. Analyze the table below."
        },
        {
            "role": "user",
            "content": f"""Analyze the following data for {platform}.
             Focus on artist comparisons, trends, and engagement insights.
             Put everything in one paragraph, limit to 1000 words for each table.
             No markdown, go straight forward to analysis, do not use opening words.

Table:
{table_text}
"""
        }
    ]
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages
    )
    return response.choices[0].message.content.strip()

doc = Document(doc_path)

# Replace placeholders with tables and analysis
for para in doc.paragraphs:
    text = para.text.strip()

    if text in csv_paths:
        df = pd.read_csv(csv_paths[text])
        table = doc.add_table(rows=1, cols=len(df.columns))
        table.style = 'Table Grid'
        hdr_cells = table.rows[0].cells
        for j, col in enumerate(df.columns):
            hdr_cells[j].text = str(col)
        for _, row in df.iterrows():
            row_cells = table.add_row().cells
            for j, val in enumerate(row):
                row_cells[j].text = str(val)
        add_bold_borders(table)
        p = para._element
        p.getparent().replace(p, table._element)

    elif text in analysis_placeholders:
        platform = analysis_placeholders[text]
        df = pd.read_csv(csv_paths[f"{{{{table_{platform}_stats}}}}"])
        analysis = generate_analysis(df, platform)
        para.text = analysis
        insert_page_break(para)


doc.save(save_path)
save_path



