In [None]:
# Google Map API
!pip install googlemaps

In [None]:
import pandas as pd
import re
import requests
from difflib import SequenceMatcher
from fuzzywuzzy import fuzz

CONFIG = {
    'input_file': 'Leads.csv',
    'output_file': 'LEADS_with_PlacesAPI_WeightedScore.csv',
    'api_key': 'API Key',
    'target_cuisines': ['restaurant', 'bar', 'cafe', 'shop', 'bakery', 'breakfast', 'deli', 'catering', 'food', 'meal', 'diner'],
    'matching_threshold': 0.65,
    'column_mapping': {
        'restaurant_name': 'restaurant_name',
        'address': 'address',
        'address_line_2': 'address_line_2',
        'city': 'city',
        'state': 'state',
        'postal_code': 'postal_code',
        'county': 'county',
        'country': 'country'
    },
    'weights': {
        'status': 0.30,
        'cuisine_match': 0.10,
        'address_match': 0.30,
        'name_match': 0.20,
        'website': 0.10
    }
}

def clean_text(value):
    if isinstance(value, str):
        return value.replace("&#x27;", "`").replace("&amp;", "&").replace("&#x2F;", "/")
    return value

def normalize_postal_code(postal_code):
    postal_code = str(postal_code).strip().split('-')[0]
    return postal_code.zfill(5)[:5]

def refine_address_with_unit(address):
    if pd.isna(address):
        return ""
    address = address.lower().strip()
    address = re.sub(r'\b(suite|unit|apt|apartment|floor|ste|#)\s*\d+[a-zA-Z]*', '', address)
    address = re.sub(r'#\d+[a-zA-Z]*', '', address)
    address = re.sub(r'\s*,\s*', ', ', address)
    address = re.sub(r',+', ',', address)
    address = re.sub(r'\s+', ' ', address)
    return address.strip(', ')

def normalize_address(address):
    if pd.isna(address):
        return ""
    address = str(address).lower()
    replacements = {
        "street": "st", "road": "rd", "avenue": "ave", "drive": "dr",
        "lane": "ln", "court": "ct", "north": "n", "south": "s",
        "east": "e", "west": "w"
    }
    for k, v in replacements.items():
        address = address.replace(k, v)
    address = address.replace(",", "").strip()
    return address

US_STATE_ABBR = {
    'alabama': 'al', 'alaska': 'ak', 'arizona': 'az', 'arkansas': 'ar', 'california': 'ca', 'colorado': 'co',
    'connecticut': 'ct', 'delaware': 'de', 'florida': 'fl', 'georgia': 'ga', 'hawaii': 'hi', 'idaho': 'id',
    'illinois': 'il', 'indiana': 'in', 'iowa': 'ia', 'kansas': 'ks', 'kentucky': 'ky', 'louisiana': 'la',
    'maine': 'me', 'maryland': 'md', 'massachusetts': 'ma', 'michigan': 'mi', 'minnesota': 'mn',
    'mississippi': 'ms', 'missouri': 'mo', 'montana': 'mt', 'nebraska': 'ne', 'nevada': 'nv',
    'new hampshire': 'nh', 'new jersey': 'nj', 'new mexico': 'nm', 'new york': 'ny', 'north carolina': 'nc',
    'north dakota': 'nd', 'ohio': 'oh', 'oklahoma': 'ok', 'oregon': 'or', 'pennsylvania': 'pa',
    'rhode island': 'ri', 'south carolina': 'sc', 'south dakota': 'sd', 'tennessee': 'tn', 'texas': 'tx',
    'utah': 'ut', 'vermont': 'vt', 'virginia': 'va', 'washington': 'wa', 'west virginia': 'wv',
    'wisconsin': 'wi', 'wyoming': 'wy'
}

def normalize_state(state):
    state = state.strip().lower()
    return US_STATE_ABBR.get(state, state)

def extract_city_state_zip(formatted_address):
    try:
        parts = formatted_address.lower().split(',')
        city = parts[-3].strip() if len(parts) >= 3 else ''
        state_zip = parts[-2].strip().split()
        state = state_zip[0] if len(state_zip) >= 1 else ''
        zip_code = state_zip[1] if len(state_zip) >= 2 else ''
        return city, state, zip_code
    except:
        return '', '', ''

def broad_name_match(name1, name2, threshold=CONFIG['matching_threshold']):
    name1 = str(name1).lower().strip() if pd.notna(name1) else ''
    name2 = str(name2).lower().strip() if pd.notna(name2) else ''
    return SequenceMatcher(None, name1, name2).ratio() >= threshold

def cuisine_match(primary_type):
    primary_type = primary_type.lower() if isinstance(primary_type, str) else ''
    return 1 if any(cuisine in primary_type for cuisine in CONFIG['target_cuisines']) else 0

def call_places_api(textQuery):
    response = requests.post(
        "https://places.googleapis.com/v1/places:searchText",
        json={"textQuery": textQuery},
        headers={
            "Content-Type": "application/json",
            "X-Goog-Api-Key": CONFIG['api_key'],
            "X-Goog-FieldMask": "places.displayName,places.formattedAddress,places.primaryType,places.businessStatus,places.websiteUri"
        }
    )
    return response.json().get('places', []) if response.status_code == 200 else []

leads_df = pd.read_csv(CONFIG['input_file'], encoding='latin1')
leads_df[CONFIG['column_mapping']['restaurant_name']] = leads_df[CONFIG['column_mapping']['restaurant_name']].apply(clean_text)
leads_df[CONFIG['column_mapping']['postal_code']] = leads_df[CONFIG['column_mapping']['postal_code']].apply(normalize_postal_code)

leads_df['full_address'] = leads_df[CONFIG['column_mapping']['address']].fillna('') + ', ' + \
                           leads_df[CONFIG['column_mapping']['city']].fillna('') + ', ' + \
                           leads_df[CONFIG['column_mapping']['state']].fillna('') + ' ' + \
                           leads_df[CONFIG['column_mapping']['postal_code']].fillna('') + ', ' + \
                           leads_df[CONFIG['column_mapping']['country']].fillna('')
leads_df['full_address'] = leads_df['full_address'].str.replace(', ,', ',', regex=False).str.strip(', ').str.lower()

for col in ['google_display_name', 'google_formatted_address', 'google_primary_type', 'google_business_status', 'google_website_uri',
            'address_match', 'name_match', 'cuisine_match', 'status', 'website']:
    leads_df[col] = None if col.startswith('google_') else 0

for index, row in leads_df.iterrows():
    has_street_address = pd.notna(row['address']) and row['address'].strip() != ''

    if has_street_address:
        location_query = f"{row['address']} {row['city']} {row['state']} {row['postal_code']} {row['country']}"
        full_query = f"{row['restaurant_name']} {location_query}"
    else:
        location_query = f"{row['restaurant_name']} {row['city']} {row['state']} {row['postal_code']}"
        full_query = location_query

    matched_place = None
    best_score = 0
    for place in call_places_api(full_query):
        display_name = place.get('displayName', {}).get('text', '')
        if not display_name:
            continue

        lead_name = str(row['restaurant_name']).lower().strip() if pd.notna(row['restaurant_name']) else ''
        score = SequenceMatcher(None, lead_name, display_name.lower()).ratio()

        if score > best_score:
            best_score = score
            matched_place = place

    if not matched_place:
        continue

    display_name = matched_place.get('displayName', {}).get('text', '')
    formatted_address = matched_place.get('formattedAddress', '')
    primary_type = matched_place.get('primaryType', '')
    business_status = matched_place.get('businessStatus', '')
    website_uri = matched_place.get('websiteUri', '')

    leads_df.at[index, 'google_display_name'] = display_name
    leads_df.at[index, 'google_formatted_address'] = refine_address_with_unit(formatted_address)
    leads_df.at[index, 'google_primary_type'] = primary_type
    leads_df.at[index, 'google_business_status'] = business_status
    leads_df.at[index, 'google_website_uri'] = website_uri

    if business_status and business_status != 'CLOSED_PERMANENTLY':
        leads_df.at[index, 'status'] = 1

    # Name match: keyword match logic
    name_keywords = set(str(row['restaurant_name']).lower().split())
    display_keywords = set(display_name.lower().split())
    if name_keywords & display_keywords:
        leads_df.at[index, 'name_match'] = 1

    leads_df.at[index, 'cuisine_match'] = cuisine_match(primary_type)
    if website_uri:
        leads_df.at[index, 'website'] = 1

    if has_street_address:
        lead_addr = normalize_address(row['full_address'])
        google_addr = normalize_address(refine_address_with_unit(formatted_address))
        leads_df.at[index, 'address_match'] = 1 if fuzz.token_set_ratio(lead_addr, google_addr) >= 80 else 0
    else:
        google_city, google_state, google_zip = extract_city_state_zip(formatted_address)
        lead_state_abbr = normalize_state(row['state'])
        google_state_abbr = normalize_state(google_state)

        zip_prefix_match = row['postal_code'][:3] == google_zip[:3]
        state_match = lead_state_abbr == google_state_abbr

        if state_match and zip_prefix_match:
            leads_df.at[index, 'address_match'] = 1

leads_df['lead_score'] = (
    leads_df['status'] * CONFIG['weights']['status'] +
    leads_df['cuisine_match'] * CONFIG['weights']['cuisine_match'] +
    leads_df['address_match'] * CONFIG['weights']['address_match'] +
    leads_df['name_match'] * CONFIG['weights']['name_match'] +
    leads_df['website'] * CONFIG['weights']['website']
) * 100

leads_df.to_csv(CONFIG['output_file'], index=False)
print(f"✅ Process complete! Results saved to: {CONFIG['output_file']}")
