In [3]:
# Cell: Fix JSON serialization issue
import json
import pandas as pd

def fix_json_serialization(obj):
    """Convert pandas objects to JSON-serializable format"""
    if isinstance(obj, pd.Timestamp):
        return obj.strftime('%Y-%m-%d')
    elif isinstance(obj, pd.Series):
        return obj.to_dict()
    elif isinstance(obj, pd.DataFrame):
        return obj.to_dict()
    elif isinstance(obj, dict):
        return {key: fix_json_serialization(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [fix_json_serialization(item) for item in obj]
    else:
        return obj

# Monkey patch the method temporarily
original_method = gemini.analytics.export_insights_for_gemini

def fixed_export_insights():
    result = original_method()
    return fix_json_serialization(result)

gemini.analytics.export_insights_for_gemini = fixed_export_insights

print("✅ JSON serialization fix applied!")


✅ JSON serialization fix applied!


In [6]:
# Cell 1: Setup and Imports
# Cell 1: Fix all serialization issues
import pandas as pd
import numpy as np
import json
from datetime import datetime

# Override JSON encoder to handle pandas objects
class PandasJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, pd.Timestamp):
            return obj.strftime('%Y-%m-%d')
        elif isinstance(obj, (pd.Series, pd.DataFrame)):
            return obj.to_dict()
        elif isinstance(obj, (np.integer, np.floating, np.ndarray)):
            return float(obj)
        elif isinstance(obj, np.bool_):
            return bool(obj)
        return super().default(obj)

# Patch json.dumps to use our encoder
original_dumps = json.dumps
def patched_dumps(obj, **kwargs):
    kwargs['cls'] = PandasJSONEncoder
    return original_dumps(obj, **kwargs)

json.dumps = patched_dumps

print("✅ JSON serialization patches applied!")

# Cell 8: Bulletproof Export
print("💾 Exporting data for Power BI dashboard...")

import os
import pandas as pd

# Create data directory in current location
data_path = 'data'
os.makedirs(data_path, exist_ok=True)

try:
    # Create summary tables
    yearly_summary = pharma_df.groupby('Year')[drug_cols].agg(['sum', 'mean']).round(2)
    quarterly_summary = pharma_df.groupby(['Year', 'Quarter'])[drug_cols].sum().reset_index()
    monthly_trends = pharma_df.groupby('Month')[drug_cols].mean().reset_index()
    
    # Save files
    files_to_save = [
        (yearly_summary, 'yearly_summary.csv'),
        (quarterly_summary, 'quarterly_summary.csv'),
        (monthly_trends, 'monthly_trends.csv'),
        (pharma_df, 'pharma_sales_data.csv')
    ]
    
    for data, filename in files_to_save:
        filepath = os.path.join(data_path, filename)
        data.to_csv(filepath, index=False)
        print(f"✅ Saved: {filepath}")
    
    print(f"\n📁 All files saved to: {os.path.abspath(data_path)}")
    
except Exception as e:
    print(f"❌ Export error: {e}")
    print("Saving to current directory instead...")
    
    # Fallback: save to current directory
    yearly_summary.to_csv('yearly_summary.csv')
    quarterly_summary.to_csv('quarterly_summary.csv')
    monthly_trends.to_csv('monthly_trends.csv')
    pharma_df.to_csv('pharma_sales_data.csv', index=False)
    print("✅ Files saved to current directory")


import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import custom modules
from scripts.data_processor import PharmaDataGenerator
from scripts.analytics_dashboard import PharmaAnalytics
from scripts.gemini_insights import GeminiInsights

# Set display options
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')

print("✅ All imports successful!")

# Cell 2: Generate Pharma Sales Dataset
print("🔄 Generating pharma sales dataset...")

# Create data generator
generator = PharmaDataGenerator()

# Generate 36 months of data (2022-2024)
pharma_df = generator.create_pharma_dataset(start_date='2022-01-01', periods=36)

# Add market metrics
pharma_df = generator.add_market_metrics(pharma_df)

# Save dataset
generator.save_dataset(pharma_df, 'pharma_sales_data.csv')

# Display sample data
print("📊 Sample of generated data:")
print(pharma_df.head())
print(f"\nDataset shape: {pharma_df.shape}")
print(f"Date range: {pharma_df['Date'].min()} to {pharma_df['Date'].max()}")

# Cell 3: Basic Data Analysis
print("📈 Basic Data Analysis")

# Summary statistics
drug_cols = ['Lipitor', 'Humira', 'Keytruda', 'Revlimid']
summary_stats = pharma_df[drug_cols].describe()
print("\n📊 Summary Statistics:")
print(summary_stats.round(2))

# Total sales by drug
total_sales = pharma_df[drug_cols].sum().sort_values(ascending=False)
print("\n💰 Total Sales by Drug:")
for drug, sales in total_sales.items():
    print(f"{drug}: ${sales:,}")

# Market share analysis
market_shares = (total_sales / total_sales.sum() * 100).round(2)
print("\n📈 Market Share:")
for drug, share in market_shares.items():
    print(f"{drug}: {share}%")

# Cell 4: Visualizations
# Initialize analytics
analytics = PharmaAnalytics()

print("📊 Creating visualizations...")

# 1. Sales trend chart
trend_fig = analytics.create_sales_trend_chart()
trend_fig.show()

# 2. Market share chart
market_share_fig = analytics.create_market_share_chart()
market_share_fig.show()

# 3. Quarterly performance
quarterly_fig = analytics.create_quarterly_performance()
quarterly_fig.show()

# Cell 5: Anomaly Detection
print("🔍 Detecting sales anomalies...")

anomalies = analytics.detect_anomalies(z_threshold=2.0)

if anomalies:
    print("⚠️ Anomalies detected:")
    for drug, anomaly_list in anomalies.items():
        print(f"\n{drug}:")
        for anomaly in anomaly_list:
            date = pd.to_datetime(anomaly['Date']).strftime('%B %Y')
            sales = anomaly[drug]
            print(f"  - {date}: ${sales:,} (unusual sales volume)")
else:
    print("✅ No significant anomalies detected")

# Cell 6: Gemini AI Insights Generation
print("🤖 Generating AI-powered insights...")

# Initialize Gemini (make sure to set your API key)
gemini = GeminiInsights()

# Generate executive summary
print("📋 Executive Summary:")
executive_summary = gemini.generate_executive_summary()
print(executive_summary)

print("\n" + "="*80 + "\n")

# Generate forecasting insights
print("🔮 Forecasting Insights:")
forecast_insights = gemini.generate_forecast_insights()
print(forecast_insights)

# Cell 7: Interactive Q&A with Gemini
print("❓ Interactive Business Q&A")

# Sample business questions
business_questions = [
    "What strategies should we implement to increase market share for our lowest-performing drug?",
    "Which quarters show the strongest seasonal trends and how can we capitalize on them?",
    "What are the key risk factors that could impact our pharmaceutical portfolio in the next year?",
    "How should we allocate our marketing budget across the four drugs based on performance data?"
]

for i, question in enumerate(business_questions, 1):
    print(f"\n🔶 Question {i}: {question}")
    answer = gemini.answer_business_question(question)
    print(f"🤖 Answer: {answer}")
    print("-" * 80)

# Cell 8: Export Data for Power BI Dashboard
print("💾 Exporting data for Power BI dashboard...")

# Create additional summary tables for Power BI
yearly_summary = pharma_df.groupby('Year')[drug_cols].agg(['sum', 'mean']).round(2)
quarterly_summary = pharma_df.groupby(['Year', 'Quarter'])[drug_cols].sum().reset_index()
monthly_trends = pharma_df.groupby('Month')[drug_cols].mean().reset_index()

# Save summary files
yearly_summary.to_csv('../data/yearly_summary.csv')
quarterly_summary.to_csv('../data/quarterly_summary.csv')
monthly_trends.to_csv('../data/monthly_trends.csv')

print("✅ Export complete! Files saved:")
print("  - yearly_summary.csv")
print("  - quarterly_summary.csv") 
print("  - monthly_trends.csv")

# Cell 9: Project Summary & Next Steps
print("🎯 PROJECT SUMMARY")
print("=" * 50)

print("📊 DASHBOARD COMPONENTS CREATED:")
print("✅ Realistic pharma sales dataset (36 months)")
print("✅ Interactive sales trend visualizations")
print("✅ Market share analysis")
print("✅ Quarterly performance comparison")
print("✅ Automated anomaly detection")
print("✅ AI-powered executive insights")
print("✅ Natural language Q&A capability")
print("✅ Power BI ready data exports")

print("\n🎪 FOR BEGHOU CONSULTING INTERVIEW:")
print("• Emphasize real-world pharma industry focus")
print("• Highlight AI integration for business insights")
print("• Demonstrate end-to-end analytics workflow")
print("• Show ability to translate data into actionable recommendations")

print("\n📋 NEXT STEPS:")
print("1. Import data into Power BI for dashboard creation")
print("2. Set up automated reporting workflows")
print("3. Customize Gemini prompts for specific client needs")
print("4. Deploy as web application using Streamlit (optional)")

print("\n🚀 Project ready for presentation!")
