# SocialProphet Phase 3: Content Generation

This notebook demonstrates the content generation pipeline:
1. Load forecast results from Phase 2
2. Extract actionable insights
3. Generate content using LLM (Llama 3.1)
4. Validate content quality with FIIT framework

**Target: FIIT Score > 85%**

In [None]:
# Standard imports
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import json
from datetime import datetime

# Add project root to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

# Load environment variables
from dotenv import load_dotenv
load_dotenv(project_root / '.env')

print(f"Project root: {project_root}")
print(f"HF Token available: {'Yes' if os.getenv('HF_TOKEN') else 'No'}")

In [None]:
# Import SocialProphet modules
from src.insights.extractor import InsightExtractor
from src.insights.prompt_builder import PromptBuilder
from src.generation.llm_client import HuggingFaceClient
from src.generation.content_gen import ContentGenerator
from src.generation.fiit_validator import FIITValidator
from src.utils.config import Config

config = Config()
print("Modules imported successfully!")

## 1. Load Data from Phase 2

In [None]:
# Load training data (historical)
train_df = pd.read_csv(config.PROCESSED_DATA_DIR / 'train_data.csv')
train_df['ds'] = pd.to_datetime(train_df['ds'])

# Load test data (predictions period)
test_df = pd.read_csv(config.PROCESSED_DATA_DIR / 'test_data.csv')
test_df['ds'] = pd.to_datetime(test_df['ds'])

print(f"Historical data: {len(train_df)} days")
print(f"Test period: {len(test_df)} days")
print(f"Date range: {train_df['ds'].min()} to {test_df['ds'].max()}")

In [None]:
# Load ensemble results
with open(config.PROCESSED_DATA_DIR / 'ensemble_results.json', 'r') as f:
    ensemble_results = json.load(f)

print("Ensemble Model Performance:")
print(f"  MAPE: {ensemble_results['ensemble_metrics']['mape']:.2f}%")
print(f"  RMSE: {ensemble_results['ensemble_metrics']['rmse']:,.0f}")
print(f"  RÂ²: {ensemble_results['ensemble_metrics']['r2']:.4f}")

In [None]:
# Create predictions DataFrame (simulated from test data)
predictions_df = test_df.copy()
predictions_df['ensemble_pred'] = predictions_df['y']
predictions_df['ensemble_pred_original'] = predictions_df['y_raw']

print(f"Predictions shape: {predictions_df.shape}")
print(f"Predicted engagement range: {predictions_df['ensemble_pred_original'].min():,.0f} - {predictions_df['ensemble_pred_original'].max():,.0f}")

## 2. Extract Insights

In [None]:
# Initialize InsightExtractor
extractor = InsightExtractor()

# Extract all insights
insights = extractor.extract_all(
    predictions_df=predictions_df,
    historical_df=train_df,
    forecast_horizon=len(predictions_df)
)

print("Insights extracted!")
print(f"Keys: {list(insights.keys())}")

In [None]:
# Display trend analysis
print("=" * 50)
print("TREND ANALYSIS")
print("=" * 50)
trend = insights['trend_analysis']
print(f"Direction: {trend['direction']}")
print(f"Strength: {trend['strength']}")
print(f"7-day momentum: {trend['momentum_7d']:.2f}%")
print(f"Historical mean: {trend['historical_mean']:,.0f}")
print(f"Predicted mean: {trend['predicted_mean']:,.0f}")
print(f"Confidence: {trend['confidence']}")

In [None]:
# Display temporal patterns
print("=" * 50)
print("TEMPORAL PATTERNS")
print("=" * 50)
temporal = insights['temporal_patterns']

print("\nBest Days:")
for day in temporal['best_days']:
    print(f"  {day['day']}: {day['avg_engagement']:,.0f} avg engagement")

ww = temporal['weekend_vs_weekday']
print(f"\nWeekend vs Weekday:")
print(f"  {ww['better'].capitalize()}s perform {ww['difference_pct']:.1f}% better")

In [None]:
# Display recommendations
print("=" * 50)
print("RECOMMENDATIONS")
print("=" * 50)
recs = insights['recommendations']

print("\nPosting Schedule:")
for rec in recs['posting_schedule']:
    print(f"  [{rec['priority'].upper()}] {rec['recommendation']}")

print("\nContent Strategy:")
for rec in recs['content_strategy']:
    print(f"  [{rec['priority'].upper()}] {rec['recommendation']}")

targets = recs['engagement_targets']
print(f"\nEngagement Targets:")
print(f"  Minimum: {targets['minimum']:,.0f}")
print(f"  Target: {targets['target']:,.0f}")
print(f"  Stretch: {targets['stretch']:,.0f}")

In [None]:
# Get prompt context
context = extractor.to_prompt_context()
print("=" * 50)
print("PROMPT CONTEXT")
print("=" * 50)
print(context)

In [None]:
# Save insights
insights_path = extractor.save_insights()
print(f"Insights saved to: {insights_path}")

## 3. Initialize LLM Client

In [None]:
# Initialize HuggingFace client
try:
    llm_client = HuggingFaceClient(model='llama')
    print("LLM client initialized!")
    
    # Check model status
    status = llm_client.check_model_status()
    print(f"Model: {status.get('model', 'unknown')}")
    print(f"Available: {status.get('available', False)}")
except Exception as e:
    print(f"Error initializing LLM: {e}")
    print("Using mock responses for demonstration...")
    llm_client = None

## 4. Build Prompts

In [None]:
# Initialize PromptBuilder
prompt_builder = PromptBuilder(
    platform='instagram',
    brand_voice='friendly, engaging, and informative'
)

# Get system prompt
system_prompt = prompt_builder.get_system_prompt()
print("System Prompt:")
print("-" * 50)
print(system_prompt[:500] + "...")

In [None]:
# Build post prompt
post_prompt = prompt_builder.build_post_prompt(
    insights=insights,
    theme='educational',
    topic='social media engagement tips'
)

print("Post Generation Prompt:")
print("-" * 50)
print(post_prompt)

## 5. Generate Content

In [None]:
# Initialize ContentGenerator
if llm_client:
    generator = ContentGenerator(llm_client, prompt_builder)
    
    # Generate a single post
    print("Generating post...")
    post = generator.generate_post(
        insights=insights,
        theme='educational',
        topic='boosting engagement'
    )
    
    print("\n" + "=" * 50)
    print("GENERATED POST")
    print("=" * 50)
    print(f"\nCaption:\n{post.get('caption', 'N/A')}")
    print(f"\nHashtags: {' '.join(post.get('hashtags', []))}")
    print(f"\nBest Time: {post.get('best_time', 'N/A')}")
    print(f"Content Type: {post.get('content_type', 'N/A')}")
else:
    # Demo content
    post = {
        'caption': 'Ready to boost your engagement? Here are 3 proven tips that increased our reach by 50%! What strategies work best for you? Drop a comment below!',
        'hashtags': ['#socialmediatips', '#engagement', '#growthhacks', '#marketing', '#contentcreator'],
        'best_time': 'Tuesday 10:00 AM',
        'content_type': 'carousel'
    }
    print("Using demo content (LLM not available)")
    print(f"\nCaption: {post['caption']}")
    print(f"Hashtags: {' '.join(post['hashtags'])}")

## 6. Validate with FIIT Framework

In [None]:
# Initialize FIIT Validator
validator = FIITValidator()

# Get content to validate
content_to_validate = post.get('caption', '')
if post.get('hashtags'):
    content_to_validate += ' ' + ' '.join(post['hashtags'])

print(f"Content to validate ({len(content_to_validate)} chars):")
print(content_to_validate)

In [None]:
# Validate content
validation_result = validator.validate(
    content=content_to_validate,
    insights=insights,
    target_tone='engaging'
)

# Print score report
report = validator.get_score_report(validation_result)
print(report)

In [None]:
# Detailed breakdown
scores = validation_result['scores']
details = validation_result['details']

print("\n" + "=" * 50)
print("DETAILED BREAKDOWN")
print("=" * 50)

print("\nFLUENCY:")
print(f"  Score: {scores['fluency']:.2f}")
print(f"  Word count: {details['fluency'].get('word_count', 'N/A')}")
print(f"  Flesch Reading Ease: {details['fluency'].get('flesch_reading_ease', 'N/A')}")

print("\nINTERACTIVITY:")
print(f"  Score: {scores['interactivity']:.2f}")
print(f"  Has CTA: {details['interactivity'].get('has_cta', False)}")
print(f"  Has Question: {details['interactivity'].get('has_question', False)}")
print(f"  Emoji count: {details['interactivity'].get('emoji_count', 0)}")
print(f"  Hashtag count: {details['interactivity'].get('hashtag_count', 0)}")

print("\nINFORMATION:")
print(f"  Score: {scores['information']:.2f}")
print(f"  Has Numbers: {details['information'].get('has_numbers', False)}")
print(f"  Has Value Indicators: {details['information'].get('has_value_indicators', False)}")

print("\nTONE:")
print(f"  Score: {scores['tone']:.2f}")
print(f"  Detected Tone: {details['tone'].get('detected_tone', 'N/A')}")
print(f"  Sentiment Polarity: {details['tone'].get('sentiment_polarity', 0):.2f}")

In [None]:
# Check if target met
overall_score = scores['overall']
target = validator.TARGET_OVERALL

print("\n" + "=" * 50)
print("FINAL ASSESSMENT")
print("=" * 50)
print(f"\nOverall FIIT Score: {overall_score:.2f}")
print(f"Target Score: {target}")
print(f"Status: {'PASS' if overall_score >= target else 'NEEDS IMPROVEMENT'}")

if validation_result['improvements_needed']:
    print("\nImprovements Needed:")
    for imp in validation_result['improvements_needed']:
        print(f"  - {imp}")

## 7. Generate Multiple Posts (Campaign)

In [None]:
# Generate multiple posts with different themes
themes = ['educational', 'inspirational', 'behind-the-scenes', 'promotional', 'interactive']

all_posts = []
all_scores = []

for theme in themes:
    if llm_client:
        post = generator.generate_post(insights, theme=theme)
    else:
        # Demo posts for each theme
        demo_posts = {
            'educational': 'Did you know? Posting on Tuesdays can boost engagement by 20%! Here are the best practices we discovered.',
            'inspirational': 'Every great brand started with a single post. Keep creating, keep growing!',
            'behind-the-scenes': 'A sneak peek at how we create content! What goes into your posts?',
            'promotional': 'Limited time offer! Check out our latest features designed to boost your engagement.',
            'interactive': 'Quick poll: What type of content do you prefer? A) Tips B) Stories C) Tutorials. Comment below!'
        }
        post = {'caption': demo_posts[theme], 'hashtags': ['#demo', '#content'], 'theme': theme}
    
    content = post.get('caption', '') + ' ' + ' '.join(post.get('hashtags', []))
    result = validator.validate(content, insights)
    
    all_posts.append(post)
    all_scores.append({
        'theme': theme,
        'overall': result['scores']['overall'],
        'fluency': result['scores']['fluency'],
        'interactivity': result['scores']['interactivity'],
        'information': result['scores']['information'],
        'tone': result['scores']['tone'],
        'passed': result['all_passed']
    })

# Display results
scores_df = pd.DataFrame(all_scores)
print("\nCampaign FIIT Scores:")
print(scores_df.to_string(index=False))

In [None]:
# Calculate average scores
print("\n" + "=" * 50)
print("CAMPAIGN SUMMARY")
print("=" * 50)
print(f"\nTotal Posts: {len(all_posts)}")
print(f"Average FIIT Score: {scores_df['overall'].mean():.2f}")
print(f"Posts Passing (>0.85): {scores_df['passed'].sum()}/{len(scores_df)}")
print(f"\nBest Theme: {scores_df.loc[scores_df['overall'].idxmax(), 'theme']} ({scores_df['overall'].max():.2f})")
print(f"Worst Theme: {scores_df.loc[scores_df['overall'].idxmin(), 'theme']} ({scores_df['overall'].min():.2f})")

## 8. Save Results

In [None]:
# Compile final results
generation_results = {
    'timestamp': datetime.now().isoformat(),
    'platform': 'instagram',
    'insights_summary': {
        'trend': insights['trend_analysis']['direction'],
        'best_days': [d['day'] for d in insights['temporal_patterns']['best_days']],
        'predicted_engagement': insights['predictions_summary']['mean_predicted']
    },
    'posts_generated': len(all_posts),
    'average_fiit_score': float(scores_df['overall'].mean()),
    'posts_passing': int(scores_df['passed'].sum()),
    'target_fiit': 0.85,
    'score_breakdown': {
        'fluency_avg': float(scores_df['fluency'].mean()),
        'interactivity_avg': float(scores_df['interactivity'].mean()),
        'information_avg': float(scores_df['information'].mean()),
        'tone_avg': float(scores_df['tone'].mean())
    }
}

# Save results
results_path = config.PROCESSED_DATA_DIR / 'generation_results.json'
with open(results_path, 'w') as f:
    json.dump(generation_results, f, indent=2)

print(f"Results saved to: {results_path}")

In [None]:
# Final summary
print("\n" + "=" * 60)
print("PHASE 3 CONTENT GENERATION - FINAL RESULTS")
print("=" * 60)
print(f"\nPlatform: Instagram")
print(f"Posts Generated: {len(all_posts)}")
print(f"\nFIIT Scores:")
print(f"  Fluency:        {scores_df['fluency'].mean():.2f}")
print(f"  Interactivity:  {scores_df['interactivity'].mean():.2f}")
print(f"  Information:    {scores_df['information'].mean():.2f}")
print(f"  Tone:           {scores_df['tone'].mean():.2f}")
print(f"  " + "-" * 30)
print(f"  OVERALL:        {scores_df['overall'].mean():.2f}")
print(f"\nTarget: 0.85")
print(f"Status: {'TARGET MET' if scores_df['overall'].mean() >= 0.85 else 'NEEDS IMPROVEMENT'}")
print("\n" + "=" * 60)