# Business Acquisition Opportunity Scoring Algorithm

This notebook analyzes business listings from BizBuySell and applies a comprehensive scoring system to identify the best acquisition opportunities based on multiple criteria including price, location, business type, and market potential.

In [16]:
import json
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from typing import List, Dict, Tuple
import re
from datetime import datetime

# Load and parse the JSON file
with open('/Users/chrisbuonocore/personal/python/business/example.json', 'r') as f:
    raw_data = json.load(f)

print("‚úì JSON file loaded successfully")
print(f"Data structure keys: {raw_data['value'].keys() if 'value' in raw_data else raw_data.keys()}")

‚úì JSON file loaded successfully
Data structure keys: dict_keys(['jwtToken', 'categoryHierarchy', 'categoryFlat', 'preLoadedRegions', 'cmsFilteredData', 'relatedBrokerRightRailResults', 'bfsSearchResultCounts', 'bfsSearchResult', 'schemaElements'])


## Section 2: Extract and Explore Data Structure

Parse the nested JSON to extract business listings and examine relevant fields for analysis.

In [17]:
# Extract business listings from JSON
def extract_businesses(data: Dict) -> List[Dict]:
    """Extract business listings from the nested JSON structure"""
    businesses = []
    
    try:
        # Navigate to the listings array - correct path is through schemaElements
        listings = data['value']['schemaElements']['listProductItemSchema']
        
        for item in listings:
            if item.get('@type') == 'ListItem' and 'item' in item:
                business = item['item']
                
                # Extract relevant fields
                extracted = {
                    'position': item.get('position'),
                    'name': business.get('name', 'N/A'),
                    'productId': business.get('productId', 'N/A'),
                    'description': business.get('description', 'N/A'),
                    'url': business.get('url', 'N/A'),
                    'price': business.get('offers', {}).get('price', 0),
                    'availability': business.get('offers', {}).get('availability', 'N/A'),
                    'address_locality': business.get('offers', {}).get('availableAtOrFrom', {}).get('address', {}).get('addressLocality', 'N/A'),
                    'address_region': business.get('offers', {}).get('availableAtOrFrom', {}).get('address', {}).get('addressRegion', 'N/A'),
                }
                
                businesses.append(extracted)
    except KeyError as e:
        print(f"Error navigating JSON structure: {e}")
    
    return businesses

# Extract all businesses
businesses = extract_businesses(raw_data)
df_raw = pd.DataFrame(businesses)

print(f"‚úì Extracted {len(df_raw)} business listings")
print(f"\nColumns: {df_raw.columns.tolist()}")
print(f"\nFirst few businesses:")
print(df_raw[['name', 'price', 'address_locality', 'address_region']].head(10))

‚úì Extracted 22 business listings

Columns: ['position', 'name', 'productId', 'description', 'url', 'price', 'availability', 'address_locality', 'address_region']

First few businesses:
                                                name     price  \
0  Highly profitable window cleaning business in ...   60000.0   
1             Incredible Kids Focused Amazon Listing   72550.0   
2  Established Commercial and Residential Cleanin...  200000.0   
3           Martin's Bread Route, Greater Boston, MA  170000.0   
4             Residential Cleaning Services Business  115000.0   
5               The UPS Store Franchise in Boston MA  250000.0   
6  Vending Business with latest in Smart Technolo...  150000.0   
7               Mission's Tortilla Route, Boston, MA  150000.0   
8  Well-established dry cleaner drop-off store fo...   78000.0   
9                                Pizza Shop for Sale  120000.0   

  address_locality address_region  
0           Boston             MA  
1           Bo

## Section 3: Define Scoring Criteria and Weighting System

The acquisition scoring algorithm evaluates businesses across multiple dimensions:

**Scoring Factors:**
1. **Price-to-Value Ratio (25%)** - Lower prices relative to business type are better
2. **Location Desirability (20%)** - Boston metro area and high-demand markets score higher
3. **Business Stability (20%)** - Established, proven businesses with recurring revenue
4. **Market Opportunity (15%)** - Growth potential and recurring revenue models
5. **Price Range Efficiency (20%)** - Optimal price window for ROI ($100K-$1M range)

In [18]:
# Define scoring weights and thresholds
SCORING_CONFIG = {
    'weights': {
        'price_value': 0.25,
        'location': 0.20,
        'stability': 0.20,
        'opportunity': 0.15,
        'price_efficiency': 0.20
    },
    'high_value_locations': [
        'Boston', 'Cambridge', 'Brookline', 'Newton', 'Wellesley',
        'Needham', 'Waltham', 'Arlington', 'Somerville', 'Watertown'
    ],
    'metro_areas': ['Middlesex County', 'Suffolk County', 'Essex County'],
    'price_range_target': (100000, 1000000),  # Optimal ROI range
    'established_keywords': [
        'established', 'profitable', 'well-established', 'proven',
        'turnkey', 'successful', 'steady', 'growing'
    ],
    'recurring_revenue_keywords': [
        'subscription', 'franchise', 'license', 'contract', 'recurring',
        'multi-unit', 'scalable', 'passive', 'recurring'
    ],
    'high_potential_industries': {
        'Healthcare': 1.2,  # High margins, recurring
        'Professional Services': 1.15,  # Recurring revenue
        'Technology/SaaS': 1.2,  # Scalable, recurring
        'Food Service': 0.9,  # Thin margins, labor-intensive
        'Retail': 0.85,  # Declining industry
        'Service': 1.0,  # Stable, repeatable
        'Education': 1.1,  # Growing demand
        'Real Estate/Property': 1.05,  # Stable income
    }
}

print("‚úì Scoring configuration loaded")
print(f"Weights: {SCORING_CONFIG['weights']}")
print(f"Price efficiency target: ${SCORING_CONFIG['price_range_target'][0]:,} - ${SCORING_CONFIG['price_range_target'][1]:,}")

‚úì Scoring configuration loaded
Weights: {'price_value': 0.25, 'location': 0.2, 'stability': 0.2, 'opportunity': 0.15, 'price_efficiency': 0.2}
Price efficiency target: $100,000 - $1,000,000


## Section 4: Implement Filtering Logic

Filter businesses based on key criteria to focus on viable acquisition targets.

In [19]:
def classify_industry(name: str, description: str) -> str:
    """Classify business into industry categories"""
    text = (name + ' ' + description).lower()
    
    if any(word in text for word in ['dental', 'medical', 'practice', 'healthcare', 'spa']):
        return 'Healthcare'
    elif any(word in text for word in ['software', 'saas', 'tech', 'app', 'digital']):
        return 'Technology/SaaS'
    elif any(word in text for word in ['restaurant', 'pizza', 'bar', 'cafe', 'diner']):
        return 'Food Service'
    elif any(word in text for word in ['retail', 'store', 'shop']):
        return 'Retail'
    elif any(word in text for word in ['cleaning', 'maintenance', 'plumbing', 'hvac']):
        return 'Service'
    elif any(word in text for word in ['education', 'school', 'training', 'tutoring']):
        return 'Education'
    elif any(word in text for word in ['franchise']):
        return 'Professional Services'
    else:
        return 'Other'

def apply_filters(df: pd.DataFrame, min_price: float = 0, max_price: float = float('inf'), 
                  min_location_quality: bool = False) -> pd.DataFrame:
    """Apply basic filters to the dataset"""
    df_filtered = df.copy()
    
    # Filter by price range
    df_filtered = df_filtered[(df_filtered['price'] >= min_price) & (df_filtered['price'] <= max_price)]
    
    # Filter by availability (only InStock items)
    df_filtered = df_filtered[df_filtered['availability'] == 'http://schema.org/InStock']
    
    # Remove businesses with missing critical data
    df_filtered = df_filtered[df_filtered['price'] > 0]
    
    # Optional: filter by location quality
    if min_location_quality:
        quality_locations = SCORING_CONFIG['high_value_locations'] + SCORING_CONFIG['metro_areas']
        df_filtered = df_filtered[
            df_filtered['address_locality'].isin(quality_locations) | 
            df_filtered['address_region'].str.contains('MA', case=False, na=False)
        ]
    
    return df_filtered

# Apply filters
df_filtered = apply_filters(df_raw, min_price=50000, max_price=2000000)
print(f"‚úì Applied filters:")
print(f"  - Original businesses: {len(df_raw)}")
print(f"  - After filtering: {len(df_filtered)}")
print(f"  - Filtered out: {len(df_raw) - len(df_filtered)}")

# Add industry classification
df_filtered['industry'] = df_filtered.apply(
    lambda row: classify_industry(row['name'], row['description']), 
    axis=1
)

print(f"\nIndustry breakdown:")
print(df_filtered['industry'].value_counts())

‚úì Applied filters:
  - Original businesses: 22
  - After filtering: 20
  - Filtered out: 2

Industry breakdown:
industry
Other                    8
Service                  4
Retail                   3
Technology/SaaS          2
Food Service             1
Professional Services    1
Healthcare               1
Name: count, dtype: int64


## Section 5: Calculate Opportunity Scores

Calculate composite acquisition opportunity scores based on weighted criteria.

In [20]:
def score_price_value(price: float, industry: str) -> float:
    """Score based on price relative to industry norms"""
    # Industry-specific price ranges (rough market estimates)
    industry_ranges = {
        'Healthcare': (300000, 1500000),
        'Technology/SaaS': (200000, 2000000),
        'Professional Services': (150000, 800000),
        'Food Service': (50000, 500000),
        'Service': (50000, 300000),
        'Education': (150000, 500000),
        'Retail': (100000, 500000),
        'Real Estate/Property': (300000, 1500000),
        'Other': (100000, 800000)
    }
    
    low, high = industry_ranges.get(industry, (100000, 1000000))
    mid = (low + high) / 2
    
    # Score higher if near the middle of the range (optimal)
    if price < low:
        return min(90, (price / low) * 100)
    elif price > high:
        return min(90, (high / price) * 100)
    else:
        # Price in range - closer to mid is better
        distance_from_mid = abs(price - mid)
        range_size = (high - low) / 2
        return 100 - (distance_from_mid / range_size * 40)

def score_location(locality: str, region: str) -> float:
    """Score based on location desirability"""
    locality = str(locality).strip()
    region = str(region).strip()
    
    # High-value locations
    if locality in SCORING_CONFIG['high_value_locations']:
        return 100
    elif locality in SCORING_CONFIG['metro_areas']:
        return 90
    elif 'County' in locality and region == 'MA':
        return 80
    elif region == 'MA':
        return 70
    else:
        return 50

def score_stability(description: str) -> float:
    """Score based on business stability indicators"""
    description_lower = description.lower()
    score = 50
    
    # Look for establishment indicators
    establishment_phrases = {
        'established': 5,
        'well-established': 8,
        'since': 7,
        'years': 6,
        'profitable': 5,
        'successful': 5,
        'proven': 5,
        'turnkey': 4,
        'loyal customer': 5,
        'strong': 4,
        'growing': 3
    }
    
    for phrase, points in establishment_phrases.items():
        if phrase in description_lower:
            score += points
    
    # Penalty for red flags
    if 'foreclosure' in description_lower or 'closing' in description_lower:
        score -= 20
    
    return min(100, score)

def score_opportunity(name: str, description: str, industry: str) -> float:
    """Score based on growth and revenue potential"""
    text = (name + ' ' + description).lower()
    score = 60
    
    # Recurring revenue model indicators
    for keyword in SCORING_CONFIG['recurring_revenue_keywords']:
        if keyword in text:
            score += 8
    
    # Growth indicators
    growth_keywords = ['expansion', 'growing', 'increasing', 'high profit', 'scalable', 'multi-unit']
    for keyword in growth_keywords:
        if keyword in text:
            score += 5
    
    # Industry-based opportunity multiplier
    industry_boost = SCORING_CONFIG['high_potential_industries'].get(industry, 1.0)
    score = score * industry_boost
    
    return min(100, score)

def score_price_efficiency(price: float) -> float:
    """Score based on ideal price range for ROI"""
    target_low, target_high = SCORING_CONFIG['price_range_target']
    
    if target_low <= price <= target_high:
        # In optimal range
        mid = (target_low + target_high) / 2
        distance_from_mid = abs(price - mid)
        range_size = (target_high - target_low) / 2
        return 100 - (distance_from_mid / range_size * 20)
    elif price < target_low:
        return (price / target_low) * 80
    else:
        return (target_high / price) * 80

def calculate_composite_score(row: pd.Series) -> float:
    """Calculate weighted composite score"""
    weights = SCORING_CONFIG['weights']
    
    price_value = score_price_value(row['price'], row['industry'])
    location = score_location(row['address_locality'], row['address_region'])
    stability = score_stability(row['description'])
    opportunity = score_opportunity(row['name'], row['description'], row['industry'])
    price_efficiency = score_price_efficiency(row['price'])
    
    composite = (
        price_value * weights['price_value'] +
        location * weights['location'] +
        stability * weights['stability'] +
        opportunity * weights['opportunity'] +
        price_efficiency * weights['price_efficiency']
    )
    
    return composite

# Calculate individual scores
df_filtered['score_price_value'] = df_filtered.apply(
    lambda row: score_price_value(row['price'], row['industry']), axis=1
)
df_filtered['score_location'] = df_filtered.apply(
    lambda row: score_location(row['address_locality'], row['address_region']), axis=1
)
df_filtered['score_stability'] = df_filtered['description'].apply(score_stability)
df_filtered['score_opportunity'] = df_filtered.apply(
    lambda row: score_opportunity(row['name'], row['description'], row['industry']), axis=1
)
df_filtered['score_price_efficiency'] = df_filtered['price'].apply(score_price_efficiency)

# Calculate composite opportunity score
df_filtered['opportunity_score'] = df_filtered.apply(calculate_composite_score, axis=1)

print("‚úì Opportunity scores calculated")
print(f"\nScore statistics:")
print(df_filtered['opportunity_score'].describe().round(2))

‚úì Opportunity scores calculated

Score statistics:
count    20.00
mean     73.72
std       5.50
min      60.73
25%      71.93
50%      73.46
75%      78.33
max      82.74
Name: opportunity_score, dtype: float64


## Section 6: Sort and Rank Businesses

Sort businesses by opportunity score to identify top acquisition candidates.

In [21]:
# Sort businesses by opportunity score
df_ranked = df_filtered.sort_values('opportunity_score', ascending=False).reset_index(drop=True)
df_ranked['rank'] = range(1, len(df_ranked) + 1)

# Display top 15 opportunities
print("=" * 120)
print("TOP ACQUISITION OPPORTUNITIES")
print("=" * 120)

top_15 = df_ranked.head(15)[['rank', 'name', 'industry', 'price', 'address_locality', 
                               'opportunity_score', 'score_stability', 'score_location']]

for idx, row in top_15.iterrows():
    print(f"\n{int(row['rank'])}. {row['name'][:70]}")
    print(f"   Industry: {row['industry']} | Price: ${row['price']:,.0f}")
    locality = row.get('address_locality', 'N/A')
    region = df_ranked.loc[idx, 'address_region'] if idx < len(df_ranked) else 'N/A'
    print(f"   Location: {locality}, {region}")
    print(f"   üìä Opportunity Score: {row['opportunity_score']:.1f}/100")
    print(f"      ‚îî‚îÄ Stability: {row['score_stability']:.1f} | Location: {row['score_location']:.1f}")

# Summary table
print("\n" + "=" * 120)
print("DETAILED RANKING TABLE (Top 20)")
print("=" * 120)

summary_df = df_ranked.head(20)[['rank', 'name', 'industry', 'price', 'opportunity_score', 
                                   'score_price_value', 'score_stability', 'score_location', 
                                   'score_opportunity', 'score_price_efficiency']].copy()

# Shorten name for display
summary_df['name'] = summary_df['name'].str[:50]

print(summary_df.to_string(index=False))

print(f"\n‚úì Total opportunities ranked: {len(df_ranked)}")
print(f"‚úì Average opportunity score: {df_ranked['opportunity_score'].mean():.1f}")
print(f"‚úì Median opportunity score: {df_ranked['opportunity_score'].median():.1f}")

TOP ACQUISITION OPPORTUNITIES

1. Vending Route - Home-Based, Scalable, and Semi-Passive
   Industry: Technology/SaaS | Price: $169,000
   Location: Boston,  MA
   üìä Opportunity Score: 82.7/100
      ‚îî‚îÄ Stability: 50.0 | Location: 100.0

2. Established Commercial and Residential Cleaning Company
   Industry: Service | Price: $200,000
   Location: Boston,  MA
   üìä Opportunity Score: 79.9/100
      ‚îî‚îÄ Stability: 55.0 | Location: 100.0

3. Full-Service Heating & Air Company
   Industry: Service | Price: $208,900
   Location: Boston,  MA
   üìä Opportunity Score: 79.3/100
      ‚îî‚îÄ Stability: 55.0 | Location: 100.0

4. The UPS Store Franchise in Boston MA
   Industry: Retail | Price: $250,000
   Location: Boston,  MA
   üìä Opportunity Score: 78.5/100
      ‚îî‚îÄ Stability: 50.0 | Location: 100.0

5. Vending Business with latest in Smart Technology and highly scalable
   Industry: Technology/SaaS | Price: $150,000
   Location: Boston,  MA
   üìä Opportunity Score: 78.3

## Section 7: Visualize Results

Create interactive visualizations to compare business opportunities.

In [22]:
# Visualization 1: Top 15 Opportunities Bar Chart
top_n = 15
viz_data = df_ranked.head(top_n).copy()
viz_data['short_name'] = viz_data['name'].str[:40]

fig1 = go.Figure(data=[
    go.Bar(
        x=viz_data['opportunity_score'].values,
        y=viz_data['short_name'].values,
        orientation='h',
        marker=dict(
            color=viz_data['opportunity_score'].values,
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Score")
        ),
        text=viz_data['opportunity_score'].round(1),
        textposition='auto',
        hovertemplate='<b>%{y}</b><br>Score: %{x:.1f}<extra></extra>'
    )
])

fig1.update_layout(
    title=f'Top {top_n} Business Acquisition Opportunities by Score',
    xaxis_title='Opportunity Score',
    yaxis_title='Business Name',
    height=600,
    margin=dict(l=250, r=100),
    template='plotly_white'
)
fig1.write_html('/Users/chrisbuonocore/personal/python/business/chart_1_top_opportunities.html')
print("‚úì Chart 1 displayed (saved to chart_1_top_opportunities.html)")

‚úì Chart 1 displayed (saved to chart_1_top_opportunities.html)


In [23]:
# Visualization 2: Price vs Opportunity Score Scatter Plot
fig2 = px.scatter(
    df_ranked,
    x='price',
    y='opportunity_score',
    color='industry',
    size='score_stability',
    hover_name='name',
    hover_data={'price': '$,.0f', 'opportunity_score': ':.1f'},
    title='Business Price vs Acquisition Opportunity Score',
    labels={'price': 'Price ($)', 'opportunity_score': 'Opportunity Score'},
    height=600
)

fig2.update_layout(
    xaxis_title='Price ($)',
    yaxis_title='Opportunity Score',
    hovermode='closest',
    template='plotly_white'
)
fig2.write_html('/Users/chrisbuonocore/personal/python/business/chart_2_price_vs_score.html')
print("‚úì Chart 2 displayed (saved to chart_2_price_vs_score.html)")

‚úì Chart 2 displayed (saved to chart_2_price_vs_score.html)


In [24]:
# Visualization 3: Score Components Breakdown for Top 5
top_5 = df_ranked.head(5).copy()

fig3 = go.Figure()

score_components = ['score_price_value', 'score_location', 'score_stability', 
                    'score_opportunity', 'score_price_efficiency']
component_labels = ['Price Value', 'Location', 'Stability', 'Opportunity', 'Price Efficiency']

for component, label in zip(score_components, component_labels):
    fig3.add_trace(go.Scatterpolar(
        r=top_5[component].values,
        theta=component_labels,
        fill='toself',
        name=top_5['name'].iloc[0][:30]
    ))

fig3.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 100]
        )),
    title='Score Components Breakdown - Top 5 Opportunities',
    height=600,
    showlegend=False,
    template='plotly_white'
)

# Create individual radars for each top 5
for idx, (_, row) in enumerate(top_5.iterrows()):
    fig_temp = go.Figure()
    
    fig_temp.add_trace(go.Scatterpolar(
        r=[row['score_price_value'], row['score_location'], row['score_stability'],
           row['score_opportunity'], row['score_price_efficiency']],
        theta=component_labels,
        fill='toself',
        name=row['name'][:30]
    ))
    
    fig_temp.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 100]
            )),
        title=f"#{int(row['rank'])} - {row['name'][:50]}<br>Score: {row['opportunity_score']:.1f}",
        height=500,
        template='plotly_white'
    )
    rank_num = int(row['rank'])
    fig_temp.write_html(f'/Users/chrisbuonocore/personal/python/business/chart_3_radar_rank_{rank_num}.html')

print("‚úì Chart 3 (Radar charts) displayed (saved as chart_3_radar_rank_*.html)")

‚úì Chart 3 (Radar charts) displayed (saved as chart_3_radar_rank_*.html)


In [25]:
# Visualization 4: Industry Distribution and Average Scores
from plotly.subplots import make_subplots

industry_stats = df_ranked.groupby('industry').agg({
    'opportunity_score': ['mean', 'count'],
    'price': 'mean'
}).round(1)

industry_stats.columns = ['Avg Score', 'Count', 'Avg Price']
industry_stats = industry_stats.sort_values('Avg Score', ascending=False)

fig4 = make_subplots(specs=[[{"secondary_y": True}]])

fig4.add_trace(
    go.Bar(x=industry_stats.index, y=industry_stats['Avg Score'], 
           name='Avg Score', marker_color='rgba(99, 110, 250, 0.7)'),
    secondary_y=False,
)

fig4.add_trace(
    go.Scatter(x=industry_stats.index, y=industry_stats['Count'], 
               name='Count', marker=dict(size=10, color='red')),
    secondary_y=True,
)

fig4.update_xaxes(title_text="Industry")
fig4.update_yaxes(title_text="Average Opportunity Score", secondary_y=False)
fig4.update_yaxes(title_text="Number of Businesses", secondary_y=True)
fig4.update_layout(
    title="Opportunity Scores by Industry",
    height=500,
    hovermode='x unified',
    template='plotly_white'
)
fig4.write_html('/Users/chrisbuonocore/personal/python/business/chart_4_industry_analysis.html')
print("‚úì Chart 4 displayed (saved to chart_4_industry_analysis.html)")
print("\nIndustry Analysis:")
print(industry_stats)

‚úì Chart 4 displayed (saved to chart_4_industry_analysis.html)

Industry Analysis:
                       Avg Score  Count  Avg Price
industry                                          
Technology/SaaS             80.5      2   159500.0
Professional Services       77.0      1   200000.0
Service                     76.0      4   145975.0
Retail                      74.2      3   154333.3
Food Service                72.4      1   120000.0
Other                       71.6      8   159881.1
Healthcare                  64.9      1    99000.0


In [26]:
# Visualization 5: Location Heatmap
location_analysis = df_ranked.groupby('address_locality').agg({
    'opportunity_score': ['mean', 'count'],
    'price': 'mean'
}).round(1)

location_analysis.columns = ['Avg Score', 'Count', 'Avg Price']
location_analysis = location_analysis[location_analysis['Count'] >= 2].sort_values('Avg Score', ascending=False)

fig5 = go.Figure(data=[
    go.Bar(
        y=location_analysis.index,
        x=location_analysis['Avg Score'],
        orientation='h',
        marker=dict(
            color=location_analysis['Avg Score'],
            colorscale='RdYlGn',
            showscale=True,
            colorbar=dict(title="Avg Score")
        ),
        text=location_analysis['Avg Score'].round(1),
        textposition='auto',
        hovertemplate='<b>%{y}</b><br>Avg Score: %{x:.1f}<br>Count: %{customdata}<extra></extra>',
        customdata=location_analysis['Count']
    )
])

fig5.update_layout(
    title='Average Opportunity Score by Location (Min. 2 businesses)',
    xaxis_title='Average Opportunity Score',
    yaxis_title='Location',
    height=500,
    margin=dict(l=150),
    template='plotly_white'
)
fig5.write_html('/Users/chrisbuonocore/personal/python/business/chart_5_location_analysis.html')
print("‚úì Chart 5 displayed (saved to chart_5_location_analysis.html)")

‚úì Chart 5 displayed (saved to chart_5_location_analysis.html)


## Section 8: Summary and Key Insights

Analyze overall results and provide actionable recommendations.

In [27]:
# Generate comprehensive summary report
print("\n" + "=" * 120)
print("ACQUISITION STRATEGY SUMMARY & RECOMMENDATIONS")
print("=" * 120)

top_1 = df_ranked.iloc[0]
print(f"\nüèÜ HIGHEST OPPORTUNITY (Rank #1):")
print(f"   Name: {top_1['name']}")
print(f"   Industry: {top_1['industry']}")
print(f"   Price: ${top_1['price']:,.0f}")
print(f"   Location: {top_1['address_locality']}, {top_1['address_region']}")
print(f"   Overall Score: {top_1['opportunity_score']:.1f}/100")
print(f"   Why: Strong {top_1['industry']} opportunity with excellent location and stability")

# Segment analysis
print(f"\nüìä MARKET SEGMENTATION:")

price_segments = {
    'Under $200K': df_ranked[df_ranked['price'] < 200000],
    '$200K - $500K': df_ranked[(df_ranked['price'] >= 200000) & (df_ranked['price'] < 500000)],
    '$500K - $1M': df_ranked[(df_ranked['price'] >= 500000) & (df_ranked['price'] < 1000000)],
    'Over $1M': df_ranked[df_ranked['price'] >= 1000000]
}

for segment, segment_df in price_segments.items():
    if len(segment_df) > 0:
        print(f"\n   {segment}:")
        print(f"      Count: {len(segment_df)} businesses")
        print(f"      Avg Score: {segment_df['opportunity_score'].mean():.1f}")
        print(f"      Top Opportunity: {segment_df.iloc[0]['name'][:50]} ({segment_df.iloc[0]['opportunity_score']:.1f})")

# Industry recommendations
print(f"\nüéØ INDUSTRY RECOMMENDATIONS:")
industry_ranking = df_ranked.groupby('industry')['opportunity_score'].agg(['mean', 'count']).sort_values('mean', ascending=False)

for idx, (industry, row) in enumerate(industry_ranking.iterrows(), 1):
    if row['count'] > 0:
        print(f"   {idx}. {industry}: Avg Score {row['mean']:.1f} ({int(row['count'])} opportunities)")

# Risk factors
print(f"\n‚ö†Ô∏è  KEY DECISION FACTORS:")
print(f"   ‚Ä¢ Price Range: ${df_ranked['price'].min():,.0f} - ${df_ranked['price'].max():,.0f}")
print(f"   ‚Ä¢ Average Price: ${df_ranked['price'].mean():,.0f}")
print(f"   ‚Ä¢ Median Price: ${df_ranked['price'].median():,.0f}")
print(f"   ‚Ä¢ Best Location: {df_ranked['score_location'].idxmax()} with score {df_ranked['score_location'].max():.1f}")
print(f"   ‚Ä¢ Most Established: {df_ranked.loc[df_ranked['score_stability'].idxmax(), 'name'][:50]}")

# Filtering recommendations
print(f"\n‚úÖ ACQUISITION CRITERIA RECOMMENDATIONS:")
print(f"   ‚Ä¢ Target Price Range: $200,000 - $750,000 (optimal ROI window)")
print(f"   ‚Ä¢ Target Industries: Healthcare, Technology/SaaS, Professional Services")
print(f"   ‚Ä¢ Target Locations: Boston metro area (higher growth/stability)")
print(f"   ‚Ä¢ Minimum Stability Score: 70+ (established, proven track record)")
print(f"   ‚Ä¢ Minimum Opportunity Score: 70+ (strong growth/recurring revenue potential)")

# Create exportable ranking
export_df = df_ranked[[
    'rank', 'name', 'industry', 'price', 'address_locality', 
    'opportunity_score', 'score_stability', 'score_location', 'score_opportunity',
    'url'
]].head(30).copy()

export_df.columns = [
    'Rank', 'Business Name', 'Industry', 'Price', 'Location',
    'Opportunity Score', 'Stability', 'Location Score', 'Growth Score', 'URL'
]

print(f"\nüíæ TOP 30 OPPORTUNITIES (Ready for Export):")
print(export_df.to_string(index=False))


ACQUISITION STRATEGY SUMMARY & RECOMMENDATIONS

üèÜ HIGHEST OPPORTUNITY (Rank #1):
   Name: Vending Route - Home-Based, Scalable, and Semi-Passive
   Industry: Technology/SaaS
   Price: $169,000
   Location: Boston,  MA
   Overall Score: 82.7/100
   Why: Strong Technology/SaaS opportunity with excellent location and stability

üìä MARKET SEGMENTATION:

   Under $200K:
      Count: 15 businesses
      Avg Score: 72.2
      Top Opportunity: Vending Route - Home-Based, Scalable, and Semi-Pas (82.7)

   $200K - $500K:
      Count: 5 businesses
      Avg Score: 78.3
      Top Opportunity: Established Commercial and Residential Cleaning Co (79.9)

üéØ INDUSTRY RECOMMENDATIONS:
   1. Technology/SaaS: Avg Score 80.5 (2 opportunities)
   2. Professional Services: Avg Score 77.0 (1 opportunities)
   3. Service: Avg Score 76.0 (4 opportunities)
   4. Retail: Avg Score 74.2 (3 opportunities)
   5. Food Service: Avg Score 72.4 (1 opportunities)
   6. Other: Avg Score 71.6 (8 opportunities)
   7

In [28]:
# Quick Reference: Top 10 Condensed Table with Clickable URLs
from IPython.display import HTML

print("\n" + "=" * 120)
print("QUICK REFERENCE: TOP 10 OPPORTUNITIES (WITH CLICKABLE LINKS)")
print("=" * 120)

# Create HTML table with clickable links
html_content = """
<table style="border-collapse: collapse; width: 100%; font-family: Arial, sans-serif; font-size: 12px; color: #000;">
    <tr style="background-color: #f0f0f0; font-weight: bold; border-bottom: 2px solid #333;">
        <td style="padding: 8px; border: 1px solid #ddd; width: 4%; color: #000;">#</td>
        <td style="padding: 8px; border: 1px solid #ddd; width: 25%; color: #000;">Business</td>
        <td style="padding: 8px; border: 1px solid #ddd; width: 15%; color: #000;">Industry</td>
        <td style="padding: 8px; border: 1px solid #ddd; width: 10%; color: #000;">Price</td>
        <td style="padding: 8px; border: 1px solid #ddd; width: 8%; color: #000;">Score</td>
        <td style="padding: 8px; border: 1px solid #ddd; width: 18%; color: #000;">Location</td>
        <td style="padding: 8px; border: 1px solid #ddd; width: 20%; color: #000;">Link</td>
    </tr>
"""

for idx, row in df_ranked.head(10).iterrows():
    rank = int(row['rank'])
    business_name = row['name'][:30]
    industry = row['industry']
    price = f"${row['price']/1000:.0f}K"
    score = f"{row['opportunity_score']:.1f}"
    location = row['address_locality']
    url = row['url']
    
    # Alternate row colors for better readability
    bg_color = "#ffffff" if rank % 2 == 1 else "#f9f9f9"
    
    html_content += f"""
    <tr style="background-color: {bg_color}; border-bottom: 1px solid #ddd;">
        <td style="padding: 8px; border: 1px solid #ddd; text-align: center; font-weight: bold; color: #000;">{rank}</td>
        <td style="padding: 8px; border: 1px solid #ddd; color: #000;">{business_name}</td>
        <td style="padding: 8px; border: 1px solid #ddd; color: #000;">{industry}</td>
        <td style="padding: 8px; border: 1px solid #ddd; text-align: right; color: #000;">{price}</td>
        <td style="padding: 8px; border: 1px solid #ddd; text-align: center; color: #000;">{score}/100</td>
        <td style="padding: 8px; border: 1px solid #ddd; color: #000;">{location}</td>
        <td style="padding: 8px; border: 1px solid #ddd;"><a href="{url}" target="_blank" style="color: #0066cc; text-decoration: none;">üîó View</a></td>
    </tr>
"""

html_content += """
</table>
"""

display(HTML(html_content))
print("\n" + "=" * 120)


QUICK REFERENCE: TOP 10 OPPORTUNITIES (WITH CLICKABLE LINKS)


0,1,2,3,4,5,6
#,Business,Industry,Price,Score,Location,Link
1,"Vending Route - Home-Based, Sc",Technology/SaaS,$169K,82.7/100,Boston,üîó View
2,Established Commercial and Res,Service,$200K,79.9/100,Boston,üîó View
3,Full-Service Heating & Air Com,Service,$209K,79.3/100,Boston,üîó View
4,The UPS Store Franchise in Bos,Retail,$250K,78.5/100,Boston,üîó View
5,Vending Business with latest i,Technology/SaaS,$150K,78.3/100,Boston,üîó View
6,Residential Cleaning Services,Service,$115K,78.3/100,Boston,üîó View
7,Profitable Recession Proof Sen,Professional Services,$200K,77.0/100,Boston,üîó View
8,Full Liquor license off-premis,Other,$250K,76.8/100,Boston,üîó View
9,Driveway and Parking Lot Aspha,Other,$199K,74.9/100,Boston,üîó View





In [29]:
# Print summary of top opportunities with proper formatting
for idx, row in df_ranked.head(10).iterrows():
    rank = int(row['rank'])
    print(f"{rank:>2}. {row['url']}")


 1. https://www.bizbuysell.com/business-opportunity/vending-route-home-based-scalable-and-semi-passive/2432303/
 2. https://www.bizbuysell.com/business-opportunity/established-commercial-and-residential-cleaning-company/2454604/
 3. https://www.bizbuysell.com/business-opportunity/full-service-heating-and-air-company/2167883/
 4. https://www.bizbuysell.com/business-opportunity/the-ups-store-franchise-in-boston-ma/2344215/
 5. https://www.bizbuysell.com/business-opportunity/vending-business-with-latest-in-smart-technology-and-highly-scalable/2324220/
 6. https://www.bizbuysell.com/business-opportunity/residential-cleaning-services-business/2415615/
 7. https://www.bizbuysell.com/business-opportunity/profitable-recession-proof-senior-care-franchise-in-boston-ma/2449680/
 8. https://www.bizbuysell.com/business-opportunity/full-liquor-license-off-premises-in-boston-for-sale/2176467/
 9. https://www.bizbuysell.com/business-opportunity/driveway-and-parking-lot-asphalt-sealcoating-plus-repairs