In [1]:
# Autonomous Industry Intelligence Report Generator
# Google Colab Implementation (Fixed Version with Visualizations)

!pip install requests beautifulsoup4 pandas matplotlib python-docx
!pip install transformers sentencepiece
!pip install google-search-results

from IPython.display import clear_output
clear_output()
print("Libraries installed!")

# Import additional libraries for visualizations
import matplotlib.pyplot as plt
import pandas as pd
from docx import Document
from docx.shared import Inches
from datetime import datetime
import io

## 1. Simplified Query Processing Module
class QueryProcessor:
    def parse_query(self, query):
        # Simple keyword extraction (more reliable than small NLP model)
        industry_keywords = ["electric vehicle", "renewable energy", "healthcare",
                           "artificial intelligence", "financial", "retail"]

        parsed = {
            "industry": "",
            "focus_areas": [],
            "geography": "",
            "timeframe": ""
        }

        # Extract industry
        for kw in industry_keywords:
            if kw in query.lower():
                parsed["industry"] = kw
                break
        else:
            parsed["industry"] = query.split("for")[-1].split("market")[0].strip()

        # Extract focus areas
        if "key players" in query.lower():
            parsed["focus_areas"].append("competitor analysis")
        if "trends" in query.lower():
            parsed["focus_areas"].append("market trends")

        return parsed

## 2. Research Engine (Using Free APIs and Web Scraping)
import requests
from bs4 import BeautifulSoup
import json
from serpapi import GoogleSearch
import time

class ResearchEngine:
    def __init__(self):
        self.news_api_key = "YOUR_NEWSAPI_KEY"  # Get free key from newsapi.org
        self.serpapi_key = "YOUR_SERPAPI_KEY"   # Free tier available

    def get_news_articles(self, query, num_results=5):
        try:
            url = f"https://newsapi.org/v2/everything?q={query}&apiKey={self.news_api_key}"
            response = requests.get(url)
            return response.json().get('articles', [])[:num_results]
        except:
            return []

    def google_search(self, query, num_results=3):
        try:
            params = {
                "q": query,
                "api_key": self.serpapi_key,
                "num": num_results
            }
            search = GoogleSearch(params)
            return search.get_dict().get('organic_results', [])
        except:
            return []

    def scrape_website(self, url):
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            }
            response = requests.get(url, headers=headers, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            return ' '.join([p.get_text().strip() for p in soup.find_all('p')][:10])
        except:
            return ""

    def collect_data(self, parsed_query):
        industry = parsed_query['industry']

        print(f"Researching {industry} market...")

        # Collect data from multiple sources
        data = {
            "news_articles": self.get_news_articles(f"{industry} market"),
            "search_results": self.google_search(f"{industry} market trends 2023"),
            "competitor_data": self.google_search(f"top companies in {industry}"),
        }

        # Add scraped content from top results
        for result in data['search_results'][:2]:
            if 'link' in result:
                print(f"Scraping {result['link']}")
                result['scraped_content'] = self.scrape_website(result['link'])
                time.sleep(2)  # Be polite to servers

        return data

## 3. Analysis Engine
class AnalysisEngine:
    def analyze_trends(self, text_data):
        trends = [
            "Growing adoption of new technologies",
            "Increasing competition in the market",
            "Regulatory changes impacting the industry",
            "Shift toward sustainable solutions",
            "Emerging markets showing rapid growth"
        ]
        return trends[:3]  # Return top 3 trends

    def analyze_competitors(self, competitor_data):
        companies = []
        for item in competitor_data[:3]:  # Only top 3 competitors
            companies.append({
                'name': item.get('title', 'Unknown Company').split(' - ')[0],
                'description': item.get('snippet', 'No description available'),
                'link': item.get('link', '#')
            })
        return companies

    def generate_swot(self, company_data):
        return {
            "strengths": ["Strong brand", "Innovative products"],
            "weaknesses": ["High costs", "Limited distribution"],
            "opportunities": ["Market expansion", "New technologies"],
            "threats": ["Competition", "Economic downturn"]
        }

    def process(self, research_data):
        # Analyze trends from news and articles
        trends = self.analyze_trends(research_data.get('news_articles', []))

        # Analyze competitors
        competitors = self.analyze_competitors(research_data.get('competitor_data', []))

        # Generate SWOT for top competitors
        for competitor in competitors:
            competitor['swot'] = self.generate_swot(competitor)

        return {
            "trends": trends,
            "competitors": competitors,
            "market_size": "Estimated $50 billion",  # Placeholder
            "growth_rate": "15% CAGR"  # Placeholder
        }

## 4. Report Generator (Modified with Visualizations)
class ReportGenerator:
    def __init__(self):
        # Initialize sample data for visualizations
        self.years = [2023, 2024, 2025, 2026, 2027]
        self.market_sizes = [50, 57.5, 66.125, 76.04375, 87.4503125]  # Assuming 15% CAGR from $50B
        self.competitors = []
        self.market_shares = []

    def generate_market_size_chart(self, industry):
        """Generate a bar chart for market size over time."""
        plt.figure(figsize=(8, 5))
        plt.bar(self.years, self.market_sizes, color='skyblue')
        plt.title(f'{industry.title()} Market Size (2023-2027)', fontsize=14)
        plt.xlabel('Year', fontsize=12)
        plt.ylabel('Market Size ($B)', fontsize=12)
        plt.grid(True, axis='y', linestyle='--', alpha=0.7)

        # Save chart to a BytesIO object
        buf = io.BytesIO()
        plt.savefig(buf, format='png', bbox_inches='tight')
        plt.close()
        buf.seek(0)
        return buf

    def generate_competitor_pie_chart(self, competitors, industry):
        """Generate a pie chart for competitor market share."""
        self.competitors = [comp['name'] for comp in competitors]
        self.market_shares = [30, 25, 20] if len(competitors) >= 3 else [40, 30, 20][:len(competitors)]  # Placeholder shares
        plt.figure(figsize=(6, 6))
        plt.pie(self.market_shares, labels=self.competitors, autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff','#99ff99'])
        plt.title(f'Market Share in {industry.title()} Industry', fontsize=14)

        # Save chart to a BytesIO object
        buf = io.BytesIO()
        plt.savefig(buf, format='png', bbox_inches='tight')
        plt.close()
        buf.seek(0)
        return buf

    def generate_growth_trend_chart(self, industry):
        """Generate a line chart for market growth trend."""
        growth_rates = [15, 15, 15, 15, 15]  # Placeholder: 15% CAGR each year
        plt.figure(figsize=(8, 5))
        plt.plot(self.years, self.market_sizes, marker='o', color='green', linewidth=2)
        plt.title(f'{industry.title()} Market Growth Trend (2023-2027)', fontsize=14)
        plt.xlabel('Year', fontsize=12)
        plt.ylabel('Market Size ($B)', fontsize=12)
        plt.grid(True, linestyle='--', alpha=0.7)

        # Save chart to a BytesIO object
        buf = io.BytesIO()
        plt.savefig(buf, format='png', bbox_inches='tight')
        plt.close()
        buf.seek(0)
        return buf

    def generate_report(self, analysis_results, query):
        doc = Document()

        # Title Page
        doc.add_heading('Industry Intelligence Report', 0)
        doc.add_paragraph(f"Prepared for: {query}")
        doc.add_paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}")
        doc.add_page_break()

        # Executive Summary
        doc.add_heading('Executive Summary', 1)
        doc.add_paragraph(
            f"This report provides an analysis of the {analysis_results.get('industry', 'specified')} market, "
            "covering key trends, competitive landscape, and strategic recommendations based on "
            "recent market data and analysis."
        )

        # Market Overview
        doc.add_heading('Market Overview', 1)
        doc.add_paragraph(
            f"The global {analysis_results.get('industry', '')} market is currently estimated at "
            f"{analysis_results.get('market_size', '')} with a projected growth rate of "
            f"{analysis_results.get('growth_rate', '')} over the next five years."
        )
        # Add Market Size Chart
        market_size_chart = self.generate_market_size_chart(analysis_results.get('industry', 'Industry'))
        doc.add_picture(market_size_chart, width=Inches(5.5))
        doc.add_paragraph('Figure 1: Projected Market Size (2023-2027)', style='Caption')

        # Add Growth Trend Chart
        growth_trend_chart = self.generate_growth_trend_chart(analysis_results.get('industry', 'Industry'))
        doc.add_picture(growth_trend_chart, width=Inches(5.5))
        doc.add_paragraph('Figure 2: Market Growth Trend (2023-2027)', style='Caption')

        # Key Trends
        doc.add_heading('Key Market Trends', 1)
        for trend in analysis_results.get('trends', []):
            doc.add_paragraph(trend, style='List Bullet')

        # Competitor Analysis
        doc.add_heading('Competitor Analysis', 1)
        # Add Competitor Pie Chart
        competitor_pie_chart = self.generate_competitor_pie_chart(analysis_results.get('competitors', []), analysis_results.get('industry', 'Industry'))
        doc.add_picture(competitor_pie_chart, width=Inches(4.5))
        doc.add_paragraph('Figure 3: Competitor Market Share', style='Caption')

        for competitor in analysis_results.get('competitors', []):
            doc.add_heading(competitor['name'], 2)
            doc.add_paragraph(competitor['description'])

            doc.add_heading('SWOT Analysis', 3)
            swot = competitor.get('swot', {})
            for category, items in swot.items():
                doc.add_paragraph(category.title() + ":")
                for item in items:
                    doc.add_paragraph(item, style='List Bullet')

        # Recommendations
        doc.add_heading('Strategic Recommendations', 1)
        recommendations = [
            "Invest in emerging technologies to stay competitive",
            "Explore partnerships with key industry players",
            "Monitor regulatory changes in target markets",
            "Develop differentiated offerings for niche segments"
        ]
        for rec in recommendations:
            doc.add_paragraph(rec, style='List Bullet')

        return doc

## 5. Main System Integration
def generate_intelligence_report(query):
    print("Processing query...")
    processor = QueryProcessor()
    parsed_query = processor.parse_query(query)
    print(f"Parsed query: {parsed_query}")

    print("\nResearching data...")
    researcher = ResearchEngine()
    research_data = researcher.collect_data(parsed_query)

    print("\nAnalyzing data...")
    analyzer = AnalysisEngine()
    analysis_results = analyzer.process(research_data)
    analysis_results['industry'] = parsed_query['industry']  # Add industry to results

    print("\nGenerating report...")
    reporter = ReportGenerator()
    report = reporter.generate_report(analysis_results, query)

    # Save report
    report_filename = f"{parsed_query['industry'].replace(' ', '_')}_report.docx"
    report.save(report_filename)
    print(f"\nReport generated: {report_filename}")

    return report_filename

## Example Usage - With Error Handling
try:
    query = "Generate a strategy intelligence report for the electric vehicle market and its key players"
    report_file = generate_intelligence_report(query)

    # Download the report
    from google.colab import files
    files.download(report_file)
except Exception as e:
    print(f"An error occurred: {str(e)}")
    print("Generating sample report instead...")

    # Create a sample report if the main process fails
    from docx import Document
    doc = Document()
    doc.add_heading('Sample Industry Report', 0)
    doc.add_paragraph("This is a sample report generated because the main process encountered an error.")
    doc.add_paragraph(f"Original query was: {query}")
    sample_file = "sample_report.docx"
    doc.save(sample_file)
    files.download(sample_file)

Libraries installed!
Processing query...
Parsed query: {'industry': 'electric vehicle', 'focus_areas': ['competitor analysis'], 'geography': '', 'timeframe': ''}

Researching data...
Researching electric vehicle market...

Analyzing data...

Generating report...

Report generated: electric_vehicle_report.docx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>