In [1]:
!pip install PyPDF2


Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [2]:
# Pitch Analysis Model using Gemini API

# Import necessary libraries
import os
import re
import pandas as pd
import numpy as np
import google.generativeai as genai
import PyPDF2
from IPython.display import display, Markdown
import json
import matplotlib.pyplot as plt
import seaborn as sns

# Set your API key
# Replace with your actual Gemini API key
api_key = "your-gemini-api-key"
genai.configure(api_key=api_key)

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None

# Function to preprocess extracted text
def preprocess_text(text):
    """Clean and preprocess the extracted text."""
    if text is None:
        return ""

    # Remove extra whitespaces
    text = re.sub(r'\s+', ' ', text)

    # Remove special characters and symbols
    text = re.sub(r'[^\w\s.,?!:;()\[\]{}-]', '', text)

    return text.strip()

# Function to identify key sections in the pitch deck
def identify_sections(text):
    """Use Gemini to identify and extract key sections from the pitch deck."""

    model = genai.GenerativeModel('gemini-1.5-flash')

    prompt = f"""
    Analyze the following pitch deck text and identify the key sections: Problem, Solution, Market, Business Model, Financials, and Team.
    For each section, extract the relevant text. If a section is missing, indicate it as "Not found".

    Pitch Deck Text:
    {text[:15000]}  # Limiting text length to avoid token limits

    Return the results in JSON format:
    {{
        "Problem": "extracted text or 'Not found'",
        "Solution": "extracted text or 'Not found'",
        "Market": "extracted text or 'Not found'",
        "Business_Model": "extracted text or 'Not found'",
        "Financials": "extracted text or 'Not found'",
        "Team": "extracted text or 'Not found'"
    }}
    """

    response = model.generate_content(prompt)
    try:
        # Extract JSON from response
        json_text = re.search(r'{.*}', response.text, re.DOTALL).group(0)
        sections = json.loads(json_text)
        return sections
    except Exception as e:
        print(f"Error parsing response: {e}")
        print(f"Response: {response.text}")
        return {
            "Problem": "Not found",
            "Solution": "Not found",
            "Market": "Not found",
            "Business_Model": "Not found",
            "Financials": "Not found",
            "Team": "Not found"
        }

# Function to evaluate each section
def evaluate_section(section, section_text):
    """Use Gemini to evaluate the quality of each section."""

    model = genai.GenerativeModel('gemini-1.5-flash')

    if section_text == "Not found":
        return {
            "score": 0,
            "feedback": f"The {section} section is missing from the pitch deck.",
            "strengths": [],
            "weaknesses": [f"Missing {section} section"]
        }

    # Section-specific evaluation criteria
    criteria_map = {
        "Problem": "clarity of problem statement, market pain points identified, evidence of problem existence",
        "Solution": "clarity of solution, uniqueness, feasibility, how it solves the stated problem",
        "Market": "market size (TAM, SAM, SOM), growth potential, target audience, competitive landscape",
        "Business_Model": "revenue streams, pricing strategy, go-to-market approach, customer acquisition",
        "Financials": "revenue projections, cost structure, fundraising needs, unit economics, break-even analysis",
        "Team": "team qualifications, relevant experience, roles coverage, advisors, previous successes"
    }

    prompt = f"""
    Evaluate the quality of the {section} section from a startup pitch deck.

    Section Text:
    {section_text[:5000]}

    Criteria to evaluate:
    {criteria_map.get(section, "quality, completeness, clarity, and persuasiveness")}

    Please rate this section on a scale of 0-100 and provide specific feedback on strengths and weaknesses.

    Return the results in JSON format:
    {{
        "score": 0-100,
        "feedback": "detailed evaluation of the section",
        "strengths": ["strength1", "strength2", ...],
        "weaknesses": ["weakness1", "weakness2", ...]
    }}
    """

    response = model.generate_content(prompt)
    try:
        # Extract JSON from response
        json_text = re.search(r'{.*}', response.text, re.DOTALL).group(0)
        evaluation = json.loads(json_text)
        return evaluation
    except Exception as e:
        print(f"Error parsing response: {e}")
        return {
            "score": 50,  # Default middle score
            "feedback": "Unable to evaluate this section properly.",
            "strengths": [],
            "weaknesses": ["Evaluation failed"]
        }

# Function to generate overall pitch score
def generate_pitch_score(sections_eval):
    """Calculate the overall pitch score based on section evaluations."""

    # Weights for each section (adjust as needed)
    weights = {
        "Problem": 0.15,
        "Solution": 0.20,
        "Market": 0.15,
        "Business_Model": 0.20,
        "Financials": 0.15,
        "Team": 0.15
    }

    weighted_score = 0
    for section, evaluation in sections_eval.items():
        weighted_score += evaluation["score"] * weights[section]

    return round(weighted_score, 1)

# Function to generate overall strengths and weaknesses
def generate_overall_feedback(sections_eval):
    """Generate overall feedback based on section evaluations."""

    model = genai.GenerativeModel('gemini-1.5-flash')

    # Prepare input for the model
    feedback_input = {section: {
        "score": eval_data["score"],
        "strengths": eval_data["strengths"],
        "weaknesses": eval_data["weaknesses"]
    } for section, eval_data in sections_eval.items()}

    prompt = f"""
    Based on the following section-by-section evaluation of a startup pitch deck,
    provide overall feedback highlighting the main strengths and weaknesses.

    Evaluation Data:
    {json.dumps(feedback_input, indent=2)}

    Return the results in JSON format:
    {{
        "overall_feedback": "comprehensive analysis of the pitch deck",
        "key_strengths": ["strength1", "strength2", "strength3"],
        "key_weaknesses": ["weakness1", "weakness2", "weakness3"],
        "improvement_suggestions": ["suggestion1", "suggestion2", "suggestion3"]
    }}
    """

    response = model.generate_content(prompt)
    try:
        # Extract JSON from response
        json_text = re.search(r'{.*}', response.text, re.DOTALL).group(0)
        overall_feedback = json.loads(json_text)
        return overall_feedback
    except Exception as e:
        print(f"Error parsing response: {e}")
        return {
            "overall_feedback": "Unable to generate comprehensive feedback.",
            "key_strengths": [],
            "key_weaknesses": ["Feedback generation failed"],
            "improvement_suggestions": ["Please review individual section evaluations"]
        }

# Main function to analyze a pitch deck
def analyze_pitch_deck(pdf_path):
    """Main function to analyze a pitch deck and generate feedback."""

    print(f"Analyzing pitch deck: {pdf_path}")

    # Step 1: Extract text from PDF
    print("Extracting text from PDF...")
    raw_text = extract_text_from_pdf(pdf_path)

    # Step 2: Preprocess text
    print("Preprocessing text...")
    processed_text = preprocess_text(raw_text)

    # Step 3: Identify sections
    print("Identifying key sections...")
    sections = identify_sections(processed_text)

    # Step 4: Evaluate each section
    print("Evaluating sections...")
    sections_eval = {}
    for section, text in sections.items():
        print(f"  Evaluating {section} section...")
        sections_eval[section] = evaluate_section(section, text)

    # Step 5: Generate pitch score
    print("Generating overall pitch score...")
    pitch_score = generate_pitch_score(sections_eval)

    # Step 6: Generate overall feedback
    print("Generating overall feedback...")
    overall_feedback = generate_overall_feedback(sections_eval)

    # Step 7: Prepare results
    results = {
        "pitch_score": pitch_score,
        "sections_evaluation": sections_eval,
        "overall_feedback": overall_feedback
    }

    return results

# Function to visualize the pitch score
def visualize_pitch_score(results, title="Pitch Deck Analysis", output_dir="output"):
    """Create visualizations for the pitch analysis results."""

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    sections_eval = results["sections_evaluation"]

    # Create a figure with two subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))

    # Plot 1: Overall pitch score gauge
    pitch_score = results["pitch_score"]

    # Create a gauge-like visualization
    ax1.set_xlim(-1, 1)
    ax1.set_ylim(-1, 1)
    ax1.add_patch(plt.Circle((0, 0), 0.8, color='lightgray', fill=True))
    ax1.add_patch(plt.Circle((0, 0), 0.6, color='white', fill=True))

    # Calculate the angle based on the score
    score_angle = (pitch_score / 100) * 360

    # Determine color based on score
    if pitch_score >= 75:
        score_color = 'green'
    elif pitch_score >= 50:
        score_color = 'orange'
    else:
        score_color = 'red'

    # Add score arc
    arc_angles = np.linspace(0, score_angle * np.pi / 180, 100)
    arc_x = 0.7 * np.cos(arc_angles)
    arc_y = 0.7 * np.sin(arc_angles)
    ax1.plot(arc_x, arc_y, color=score_color, linewidth=15)

    # Add score text
    ax1.text(0, 0, f"{pitch_score}", fontsize=48, ha='center', va='center', fontweight='bold')
    ax1.text(0, -0.3, 'Overall Score', fontsize=18, ha='center', va='center')

    # Remove axes
    ax1.axis('off')
    ax1.set_title("Pitch Score", fontsize=20, pad=20)

    # Plot 2: Section-wise scores
    section_names = list(sections_eval.keys())
    scores = [sections_eval[section]["score"] for section in section_names]

    # Clean up section names for display
    section_names = [section.replace("_", " ") for section in section_names]

    # Create colors based on scores
    colors = ['green' if score >= 75 else 'orange' if score >= 50 else 'red' for score in scores]

    # Create horizontal bar chart
    bars = ax2.barh(section_names, scores, color=colors)
    ax2.set_xlim(0, 100)
    ax2.set_xlabel('Score', fontsize=14)
    ax2.set_title('Section Scores', fontsize=20)

    # Add score values at the end of each bar
    for bar, score in zip(bars, scores):
        ax2.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2,
                  f'{score}', va='center', fontsize=12)

    plt.tight_layout()

    # Save the visualization
    viz_path = os.path.join(output_dir, "report.png")
    plt.savefig(viz_path)
    plt.close(fig)

    print(f"Visualization saved to {viz_path}")

    return viz_path

# Function to create a detailed report
def create_report(results, pdf_path, output_dir="output"):
    """Create a detailed report of the pitch analysis."""

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    sections_eval = results["sections_evaluation"]
    overall_feedback = results["overall_feedback"]
    pitch_score = results["pitch_score"]

    # Determine overall rating text
    if pitch_score >= 85:
        rating_text = "Excellent"
    elif pitch_score >= 70:
        rating_text = "Good"
    elif pitch_score >= 50:
        rating_text = "Average"
    else:
        rating_text = "Needs Improvement"

    # Create markdown report
    report = f"""# Pitch Deck Analysis Report

## Executive Summary

**Pitch Deck:** {os.path.basename(pdf_path)}
**Overall Score:** {pitch_score}/100 ({rating_text})

{overall_feedback["overall_feedback"]}

## Key Strengths
"""

    for strength in overall_feedback["key_strengths"]:
        report += f"- {strength}\n"

    report += "\n## Key Weaknesses\n"

    for weakness in overall_feedback["key_weaknesses"]:
        report += f"- {weakness}\n"

    report += "\n## Improvement Suggestions\n"

    for suggestion in overall_feedback["improvement_suggestions"]:
        report += f"- {suggestion}\n"

    report += "\n## Section-by-Section Analysis\n"

    for section, eval_data in sections_eval.items():
        section_display = section.replace("_", " ")
        report += f"\n### {section_display} (Score: {eval_data['score']}/100)\n"
        report += f"\n{eval_data['feedback']}\n"

        report += "\n**Strengths:**\n"
        for strength in eval_data["strengths"]:
            report += f"- {strength}\n"

        report += "\n**Weaknesses:**\n"
        for weakness in eval_data["weaknesses"]:
            report += f"- {weakness}\n"

    # Save the report
    report_path = os.path.join(output_dir, "report.md")
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write(report)

    print(f"Report saved to {report_path}")

    return report_path
# Main function
def main():
    # Get input file path from user
    pdf_path = input("Enter the path to the pitch deck PDF file: ")

    # Check if the PDF file exists
    if not os.path.exists(pdf_path):
        print(f"Error: The file '{pdf_path}' does not exist.")
        return

    # Get output directory from user
    output_dir = input("Enter the folder path where you want to save the report and visualization: ")

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Get the base filename without extension for use in output files
    base_filename = os.path.splitext(os.path.basename(pdf_path))[0]

    print(f"\nAnalyzing pitch deck: {pdf_path}")
    print(f"Results will be saved in: {output_dir}")

    # Analyze the pitch deck
    results = analyze_pitch_deck(pdf_path)

    # Create visualization
    viz_path = visualize_pitch_score(results, output_dir=output_dir)

    # Create report
    report_path = create_report(results, pdf_path, output_dir=output_dir)

    print("\nAnalysis completed successfully!")
    print(f"Results saved in '{output_dir}' directory")
    print(f"- Report: {os.path.basename(report_path)}")
    print(f"- Visualization: {os.path.basename(viz_path)}")



# RUNNING THE FUNC

**IM USING NVCPitchDeckTemplate.pdf I DOWNLOADED THIS PITCH FROM GOOGLE, I AM ALSO ATTACHING THIS FILE IF YOU WANT TO RUN IT BY YOURSELF**

Using **NVCPitchDeckTemplate.pdf** and saving the report in **pitch1 **folder

In [3]:
# Run the main function
if __name__ == "__main__":
    main()


Enter the path to the pitch deck PDF file: /content/NVCPitchDeckTemplate.pdf
Enter the folder path where you want to save the report and visualization: pitch1

Analyzing pitch deck: /content/NVCPitchDeckTemplate.pdf
Results will be saved in: pitch1
Analyzing pitch deck: /content/NVCPitchDeckTemplate.pdf
Extracting text from PDF...
Preprocessing text...
Identifying key sections...
Evaluating sections...
  Evaluating Problem section...
  Evaluating Solution section...
  Evaluating Market section...
  Evaluating Business_Model section...
  Evaluating Financials section...
  Evaluating Team section...
Generating overall pitch score...
Generating overall feedback...
Visualization saved to pitch1/report.png
Report saved to pitch1/report.md

Analysis completed successfully!
Results saved in 'pitch1' directory
- Report: report.md
- Visualization: report.png


## Using **uber-pitch-deck.pdf** as sample and storing the result as **pitch2**

In [4]:
if __name__ == "__main__":
    main()

Enter the path to the pitch deck PDF file: /content/uber-pitch-deck.pdf
Enter the folder path where you want to save the report and visualization: pitch2

Analyzing pitch deck: /content/uber-pitch-deck.pdf
Results will be saved in: pitch2
Analyzing pitch deck: /content/uber-pitch-deck.pdf
Extracting text from PDF...
Preprocessing text...
Identifying key sections...
Evaluating sections...
  Evaluating Problem section...
  Evaluating Solution section...
  Evaluating Market section...
  Evaluating Business_Model section...
  Evaluating Financials section...
  Evaluating Team section...
Generating overall pitch score...
Generating overall feedback...
Visualization saved to pitch2/report.png
Report saved to pitch2/report.md

Analysis completed successfully!
Results saved in 'pitch2' directory
- Report: report.md
- Visualization: report.png


# using **visionAI.pdf** and storing result as **pitch3**

In [5]:
if __name__ == "__main__":
    main()

Enter the path to the pitch deck PDF file: /content/VisionAI.pdf
Enter the folder path where you want to save the report and visualization: pitch3

Analyzing pitch deck: /content/VisionAI.pdf
Results will be saved in: pitch3
Analyzing pitch deck: /content/VisionAI.pdf
Extracting text from PDF...
Preprocessing text...
Identifying key sections...
Evaluating sections...
  Evaluating Problem section...
  Evaluating Solution section...
  Evaluating Market section...
  Evaluating Business_Model section...
  Evaluating Financials section...
  Evaluating Team section...
Generating overall pitch score...
Generating overall feedback...
Visualization saved to pitch3/report.png
Report saved to pitch3/report.md

Analysis completed successfully!
Results saved in 'pitch3' directory
- Report: report.md
- Visualization: report.png


# using **Health pitch.pdf **and storing result as **pitch4**

In [6]:
if __name__ == "__main__":
    main()

Enter the path to the pitch deck PDF file: /content/health pitch.pdf
Enter the folder path where you want to save the report and visualization: pitch4

Analyzing pitch deck: /content/health pitch.pdf
Results will be saved in: pitch4
Analyzing pitch deck: /content/health pitch.pdf
Extracting text from PDF...
Preprocessing text...
Identifying key sections...
Evaluating sections...
  Evaluating Problem section...
  Evaluating Solution section...
  Evaluating Market section...
  Evaluating Business_Model section...
  Evaluating Financials section...
  Evaluating Team section...
Generating overall pitch score...
Generating overall feedback...
Visualization saved to pitch4/report.png
Report saved to pitch4/report.md

Analysis completed successfully!
Results saved in 'pitch4' directory
- Report: report.md
- Visualization: report.png


# using **ubercab.pdf** and saving as **pitch5**

In [7]:
if __name__ == "__main__":
    main()

Enter the path to the pitch deck PDF file: /content/ubercab.pdf
Enter the folder path where you want to save the report and visualization: pitch5

Analyzing pitch deck: /content/ubercab.pdf
Results will be saved in: pitch5
Analyzing pitch deck: /content/ubercab.pdf
Extracting text from PDF...
Preprocessing text...
Identifying key sections...
Evaluating sections...
  Evaluating Problem section...
  Evaluating Solution section...
  Evaluating Market section...
  Evaluating Business_Model section...
  Evaluating Financials section...
  Evaluating Team section...
Generating overall pitch score...
Generating overall feedback...
Visualization saved to pitch5/report.png
Report saved to pitch5/report.md

Analysis completed successfully!
Results saved in 'pitch5' directory
- Report: report.md
- Visualization: report.png
