In [None]:
# Google Map API
!pip install googlemaps


Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25l[?25hdone
  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40715 sha256=5ff2dc55ae225349c3d4499c53ced69393c65394916bbacc313c06142f9dea4a
  Stored in directory: /root/.cache/pip/wheels/f1/09/77/3cc2f5659cbc62341b30f806aca2b25e6a26c351daa5b1f49a
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0


In [None]:
import pandas as pd
import requests
import json
import re
from difflib import SequenceMatcher

CONFIG = {
    'input_file': 'LEADS.csv',
    'output_file': 'LEADS_with_PlacesAPI_WeightedScore.csv',
    'api_key': 'Your Google API Key',
    'target_cuisines': ['pizza', 'italian'],
    'matching_threshold': 0.65,
    'column_mapping': {
        'restaurant_name': 'restaurant_name',
        'address': 'address',
        'address_line_2': 'address_line_2',
        'city': 'city',
        'state': 'State',
        'postal_code': 'postal_code',
        'county': 'county'
    },
    'weights': {
        'status': 0.30,
        'cuisine_match': 0.15,
        'address_match': 0.30,
        'name_match': 0.15,
        'website': 0.10
    }
}

def clean_text(value):
    if isinstance(value, str):
        return value.replace("&#x27;", "`").replace("&amp;", "&").replace("&#x2F;", "/")
    return value

def normalize_postal_code(postal_code):
    postal_code = str(postal_code).strip().split('-')[0]
    return postal_code.zfill(5)[:5]

def refine_address_with_unit(address):
    if pd.isna(address):
        return ""
    address = address.lower().strip()

    address = re.sub(r'\b(suite|unit|apt|apartment|floor|ste|#)\s*\d+[a-zA-Z]*', '', address)
    address = re.sub(r'#\d+[a-zA-Z]*', '', address)

    address = re.sub(r'\s*,\s*', ', ', address)
    address = re.sub(r',+', ',', address)
    address = re.sub(r'\s+', ' ', address)
    return address.strip(', ')

def broad_name_match(name1, name2, threshold=CONFIG['matching_threshold']):
    name1 = str(name1).lower().strip() if pd.notna(name1) else ''
    name2 = str(name2).lower().strip() if pd.notna(name2) else ''
    return SequenceMatcher(None, name1, name2).ratio() >= threshold

def cuisine_match(primary_type):
    primary_type = primary_type.lower() if isinstance(primary_type, str) else ''
    return 1 if any(cuisine in primary_type for cuisine in CONFIG['target_cuisines']) else 0

def call_places_api(textQuery):
    response = requests.post(
        "https://places.googleapis.com/v1/places:searchText",
        json={"textQuery": textQuery},
        headers={
            "Content-Type": "application/json",
            "X-Goog-Api-Key": CONFIG['api_key'],
            "X-Goog-FieldMask": "places.displayName,places.formattedAddress,places.primaryType,places.businessStatus,places.websiteUri"
        }
    )
    return response.json().get('places', []) if response.status_code == 200 else []

leads_df = pd.read_csv(CONFIG['input_file'], encoding='latin1')
leads_df[CONFIG['column_mapping']['restaurant_name']] = leads_df[CONFIG['column_mapping']['restaurant_name']].apply(clean_text)
leads_df[CONFIG['column_mapping']['postal_code']] = leads_df[CONFIG['column_mapping']['postal_code']].apply(normalize_postal_code)

leads_df['full_address'] = leads_df[CONFIG['column_mapping']['address']].fillna('') + ', ' + \
                           leads_df[CONFIG['column_mapping']['city']].fillna('') + ', ' + \
                           leads_df[CONFIG['column_mapping']['state']].fillna('') + ' ' + \
                           leads_df[CONFIG['column_mapping']['postal_code']].fillna('') + ', USA'
leads_df['full_address'] = leads_df['full_address'].str.replace(', ,', ',', regex=False).str.strip(', ').str.lower()

for col in ['google_display_name', 'google_formatted_address', 'google_primary_type', 'google_business_status', 'google_website_uri',
            'address_match', 'name_match', 'cuisine_match', 'status', 'website']:
    leads_df[col] = None if col.startswith('google_') else 0

for index, row in leads_df.iterrows():
    address_query = f"{row[CONFIG['column_mapping']['address']]} {row[CONFIG['column_mapping']['city']]} {row[CONFIG['column_mapping']['state']]} {row[CONFIG['column_mapping']['postal_code']]}"
    full_query = f"{row[CONFIG['column_mapping']['restaurant_name']]} {address_query}"

    address_matches = set()
    for place in call_places_api(address_query):
        if 'formattedAddress' in place:
            address_matches.add(refine_address_with_unit(place['formattedAddress']))

    if not address_matches:
        for match_column in ['address_match', 'name_match', 'cuisine_match', 'status', 'website']:
            leads_df.at[index, match_column] = 0
        continue

    best_score = 0
    matched_place = None
    for place in call_places_api(full_query):
        formatted_address = refine_address_with_unit(place.get('formattedAddress', ''))
        if formatted_address in address_matches:
            display_name = place.get('displayName', {}).get('text', '')
            lead_name = str(row[CONFIG['column_mapping']['restaurant_name']]).lower().strip() if pd.notna(row[CONFIG['column_mapping']['restaurant_name']]) else ''
            score = SequenceMatcher(None, lead_name, display_name.lower()).ratio()
            if score > best_score:
                best_score = score
                matched_place = place

    if not matched_place:
        for match_column in ['address_match', 'name_match', 'cuisine_match', 'status', 'website']:
            leads_df.at[index, match_column] = 0
        continue

    display_name = matched_place.get('displayName', {}).get('text', '')
    formatted_address = matched_place.get('formattedAddress', '')
    primary_type = matched_place.get('primaryType', '')
    business_status = matched_place.get('businessStatus', '')
    website_uri = matched_place.get('websiteUri', '')

    leads_df.at[index, 'google_display_name'] = display_name
    leads_df.at[index, 'google_formatted_address'] = refine_address_with_unit(formatted_address)
    leads_df.at[index, 'google_primary_type'] = primary_type
    leads_df.at[index, 'google_business_status'] = business_status
    leads_df.at[index, 'google_website_uri'] = website_uri

    if business_status in [None, '', 'CLOSED_PERMANENTLY']:
        leads_df.at[index, 'status'] = 0
    else:
        leads_df.at[index, 'status'] = 1

    leads_df.at[index, 'address_match'] = 1 if refine_address_with_unit(formatted_address) == row['full_address'] else 0

    if broad_name_match(row[CONFIG['column_mapping']['restaurant_name']], display_name):
        leads_df.at[index, 'name_match'] = 1

    leads_df.at[index, 'cuisine_match'] = cuisine_match(primary_type)

    if website_uri:
       leads_df.at[index, 'website'] = 1

leads_df['lead_score'] = (
    leads_df['status'] * CONFIG['weights']['status'] +
    leads_df['cuisine_match'] * CONFIG['weights']['cuisine_match'] +
    leads_df['address_match'] * CONFIG['weights']['address_match'] +
    leads_df['name_match'] * CONFIG['weights']['name_match'] +
    leads_df['website'] * CONFIG['weights']['website']
) * 100

leads_df.to_csv(CONFIG['output_file'], index=False)
print(f"✅ Process complete! Results saved to: {CONFIG['output_file']}")

✅ Process complete! Results saved to: LEADS_with_PlacesAPI_NewWeightedScore.csv
