# Texas Trampoline Parks Data Collection & Analysis

This notebook allows you to:
1. Run Google Places API calls to collect park data
2. Inspect and analyze the results
3. Filter out unwanted entries
4. Export clean data to CSV
5. Generate the final TypeScript file

In [1]:
import pandas as pd
import requests
import json
import time
import os
from typing import List, Dict, Any
from dotenv import load_dotenv

# Load environment variables
load_dotenv('.env.local')
API_KEY = os.getenv('GOOGLE_PLACES_API_KEY')

if not API_KEY:
    raise ValueError("Please set GOOGLE_PLACES_API_KEY in .env.local")

print(f"✅ API Key loaded: {API_KEY[:10]}...")

✅ API Key loaded: AIzaSyCyjZ...


In [2]:
# Optimized search terms based on data analysis
# Removed low-performing terms, added missing brands, focused on high-yield searches

SEARCH_TERMS = [
    # High-performing generic terms (100% keep rate)
    'trampoline park Texas',
    'trampoline center Texas',
    'trampoline gym Texas',
    'indoor trampoline Texas',
    
    # Proven brand searches (100% keep rate)
    'urban air Texas',
    'altitude trampoline Texas', 
    'sky zone Texas',
    'ground control trampoline Texas',
    'airtopia adventure park Texas',
    'jumping world Texas',
    
    # Missing brands found in analysis
    'ninja kidz Texas',
    'cosmic air Texas',
    'big air trampoline Texas',
    'xtreme jump Texas',
    'bolder adventure Texas',
    'ijump trampoline Texas',
    'hijinx trampoline Texas',
    
    # Additional brand searches
    'defy trampoline Texas',
    'rockin jump Texas',  
    'launch trampoline Texas',
    'flight deck trampoline Texas',
    'rush air sports Texas',
    
    # Major metro area searches (keep the working ones)
    'urban air Austin Texas',
    'urban air Houston',
    'urban air Dallas',
    'urban air San Antonio',
    'sky zone Houston',
    'sky zone Dallas', 
    'altitude trampoline Houston',
    'altitude trampoline San Antonio',
    
    # Medium city searches for better coverage
    'trampoline park Beaumont Texas',
    'trampoline park Brownsville Texas',
    'trampoline park Corpus Christi Texas', 
    'trampoline park Amarillo Texas',
    'trampoline park Lubbock Texas',
    'trampoline park Waco Texas',
    'trampoline park Killeen Texas',
    'trampoline park Lufkin Texas',  # For Hijinx specifically
    'trampoline park Tyler Texas',
    'trampoline park Longview Texas',
    
    # Alternative search phrases
    'bounce park Texas',
    'family entertainment trampoline Texas',
    'adventure trampoline Texas'
]

# REMOVED these low-performing terms:
# 'pump it up Texas' - 0% keep rate (bounce houses for parties)
# 'indoor adventure park Texas' - 0% keep rate
# 'amusement center texas' - 10% keep rate  
# Most hyper-specific city searches that returned 0 results

print(f"📝 Optimized to {len(SEARCH_TERMS)} search terms")
print("✅ Expected improvement: Higher keep rate, better brand coverage")
print("🎯 Should find missing parks like Hijinx in Lufkin")

📝 Optimized to 43 search terms
✅ Expected improvement: Higher keep rate, better brand coverage
🎯 Should find missing parks like Hijinx in Lufkin


In [3]:
def search_places(query: str) -> List[Dict]:
    """Search for places using Google Places API Text Search"""
    url = 'https://places.googleapis.com/v1/places:searchText'
    
    headers = {
        'Content-Type': 'application/json',
        'X-Goog-Api-Key': API_KEY,
        'X-Goog-FieldMask': 'places.id,places.displayName,places.formattedAddress,places.location,places.rating,places.userRatingCount,places.businessStatus,places.types'
    }
    
    body = {
        'textQuery': query,
        'locationRestriction': {
            'rectangle': {
                'low': {
                    'latitude': 25.8371,
                    'longitude': -106.6456
                },
                'high': {
                    'latitude': 36.5007,
                    'longitude': -93.5080
                }
            }
        },
        'maxResultCount': 20
    }
    
    try:
        response = requests.post(url, headers=headers, json=body)
        response.raise_for_status()
        data = response.json()
        return data.get('places', [])
    except Exception as e:
        print(f"❌ Error searching '{query}': {e}")
        return []

In [4]:
def get_place_details(place_id: str) -> Dict:
    """Get detailed information for a specific place"""
    url = f'https://places.googleapis.com/v1/places/{place_id}'
    
    headers = {
        'X-Goog-Api-Key': API_KEY,
        'X-Goog-FieldMask': 'id,displayName,formattedAddress,location,nationalPhoneNumber,websiteUri,regularOpeningHours,rating,userRatingCount,photos,types'
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"❌ Error getting details for {place_id}: {e}")
        return {}

In [5]:
# Collect all places
print("🔍 Starting comprehensive search...")

all_places = []
seen_place_ids = set()

for i, query in enumerate(SEARCH_TERMS, 1):
    print(f"  {i:2d}/{len(SEARCH_TERMS)}: {query}")
    
    results = search_places(query)
    
    for place in results:
        place_id = place.get('id')
        address = place.get('formattedAddress', '')
        
        # Filter for Texas locations and avoid duplicates
        if ('TX' in address and place_id and place_id not in seen_place_ids):
            seen_place_ids.add(place_id)
            
            # Convert to our format
            simplified = {
                'place_id': place_id,
                'name': place.get('displayName', {}).get('text', ''),
                'address': address,
                'lat': place.get('location', {}).get('latitude', 0),
                'lng': place.get('location', {}).get('longitude', 0),
                'rating': place.get('rating'),
                'review_count': place.get('userRatingCount'),
                'business_status': place.get('businessStatus'),
                'types': ', '.join(place.get('types', [])),
                'search_term': query
            }
            all_places.append(simplified)
    
    # Rate limiting
    time.sleep(0.1)

print(f"\n✅ Found {len(all_places)} unique places in Texas")

🔍 Starting comprehensive search...
   1/43: trampoline park Texas
   2/43: trampoline center Texas
   3/43: trampoline gym Texas
   4/43: indoor trampoline Texas
   5/43: urban air Texas
   6/43: altitude trampoline Texas
   7/43: sky zone Texas
   8/43: ground control trampoline Texas
   9/43: airtopia adventure park Texas
  10/43: jumping world Texas
  11/43: ninja kidz Texas
  12/43: cosmic air Texas
  13/43: big air trampoline Texas
  14/43: xtreme jump Texas
  15/43: bolder adventure Texas
  16/43: ijump trampoline Texas
  17/43: hijinx trampoline Texas
  18/43: defy trampoline Texas
  19/43: rockin jump Texas
  20/43: launch trampoline Texas
  21/43: flight deck trampoline Texas
  22/43: rush air sports Texas
  23/43: urban air Austin Texas
  24/43: urban air Houston
  25/43: urban air Dallas
  26/43: urban air San Antonio
  27/43: sky zone Houston
  28/43: sky zone Dallas
  29/43: altitude trampoline Houston
  30/43: altitude trampoline San Antonio
  31/43: trampoline park Beaum

In [6]:
# Convert to DataFrame for analysis
df = pd.DataFrame(all_places)

print("📊 Initial Results Summary:")
print(f"Total places: {len(df)}")
print(f"\nTop 10 cities:")
df['city'] = df['address'].str.extract(r', ([^,]+), TX')
print(df['city'].value_counts().head(10))

print(f"\nBusiness types:")
print(df['types'].str.contains('amusement_park').sum(), "amusement parks")
print(df['types'].str.contains('gym').sum(), "gyms")
print(df['types'].str.contains('establishment').sum(), "establishments")

# Display first few rows
print("\n📋 First 5 results:")
df.head()

📊 Initial Results Summary:
Total places: 120

Top 10 cities:
Houston        13
Fort Worth      5
San Antonio     5
El Paso         4
McAllen         4
Arlington       4
Austin          3
Longview        3
Webster         3
Beaumont        2
Name: city, dtype: int64

Business types:
71 amusement parks
7 gyms
120 establishments

📋 First 5 results:


Unnamed: 0,place_id,name,address,lat,lng,rating,review_count,business_status,types,search_term,city
0,ChIJ1WOAkGQ_NoYRLsd0UKwg3ZU,Air U Trampoline Park (Purchased by Gym U),"4300 US-259, Longview, TX 75605, USA",32.570952,-94.733641,4.4,548.0,OPERATIONAL,"amusement_center, point_of_interest, establish...",trampoline park Texas,Longview
1,ChIJo14qAZnLSYYRlYxvMMddm88,iJump Tyler Trampoline Park,"2029 Capital Drive, Tyler, TX 75701, USA",32.297545,-95.328111,4.5,985.0,OPERATIONAL,"playground, summer_camp_organizer, amusement_c...",trampoline park Texas,Tyler
2,ChIJHSII4d3NSYYRl6568PJp5BM,Urban Air Trampoline and Adventure Park,"8958 S Broadway Ave, Tyler, TX 75703, USA",32.248335,-95.3028,4.5,1108.0,OPERATIONAL,"amusement_park, adventure_sports_center, playg...",trampoline park Texas,Tyler
3,ChIJdZfXSXWdQIYR_GA98m0J_IQ,Altitude Trampoline Park,"20810 Gulf Fwy, Webster, TX 77598, USA",29.523679,-95.129265,4.9,1558.0,OPERATIONAL,"amusement_center, point_of_interest, establish...",trampoline park Texas,Webster
4,ChIJLVMNLL7XTYYRcIR5HbiL5Uc,Ninja Kidz Trampoline Park,"8800 N Tarrant Pkwy suite #200, North Richland...",32.902634,-97.197044,5.0,17902.0,OPERATIONAL,"amusement_park, adventure_sports_center, playg...",trampoline park Texas,North Richland Hills


In [7]:
# Show some filtering options
print("🔍 Analysis for filtering:")

# Look for potential non-trampoline parks
print("\nPotential bounce house rentals (to filter out):")
bounce_rentals = df[df['name'].str.contains('rental|party rental|bounce house rental', case=False, na=False)]
print(bounce_rentals[['name', 'city']].to_string(index=False))

print("\nGyms that might not be trampoline parks:")
gyms = df[df['types'].str.contains('gym', case=False, na=False) & 
          ~df['name'].str.contains('urban air|sky zone|altitude|trampoline', case=False, na=False)]
print(gyms[['name', 'city']].to_string(index=False))

print("\nLow-rated places (might need review):")
low_rated = df[(df['rating'] < 3.5) & (df['rating'].notna())]
print(low_rated[['name', 'city', 'rating', 'review_count']].to_string(index=False))

🔍 Analysis for filtering:

Potential bounce house rentals (to filter out):
Empty DataFrame
Columns: [name, city]
Index: []

Gyms that might not be trampoline parks:
                  name      city
Zero Gravity Jump Zone  Kingwood
       GJump Texarkana Texarkana

Low-rated places (might need review):
                                   name    city  rating  review_count
Urban Air Trampoline and Adventure Park Bedford     1.6         212.0
            Xtreme Jump Trampoline Park McAllen     3.3          12.0


In [8]:
# Export to CSV for manual review
csv_filename = 'texas_parks_raw_data.csv'
df.to_csv(csv_filename, index=False)
print(f"💾 Raw data exported to {csv_filename}")
print(f"\n📝 Next steps:")
print(f"1. Open {csv_filename} in Excel/Google Sheets")
print(f"2. Review and mark parks to keep/remove")
print(f"3. Add a 'keep' column (True/False)")
print(f"4. Save and run the next cell to get detailed data")

# Show the data in the notebook too
print(f"\n📊 Full dataset:")
df

💾 Raw data exported to texas_parks_raw_data.csv

📝 Next steps:
1. Open texas_parks_raw_data.csv in Excel/Google Sheets
2. Review and mark parks to keep/remove
3. Add a 'keep' column (True/False)
4. Save and run the next cell to get detailed data

📊 Full dataset:


Unnamed: 0,place_id,name,address,lat,lng,rating,review_count,business_status,types,search_term,city
0,ChIJ1WOAkGQ_NoYRLsd0UKwg3ZU,Air U Trampoline Park (Purchased by Gym U),"4300 US-259, Longview, TX 75605, USA",32.570952,-94.733641,4.4,548.0,OPERATIONAL,"amusement_center, point_of_interest, establish...",trampoline park Texas,Longview
1,ChIJo14qAZnLSYYRlYxvMMddm88,iJump Tyler Trampoline Park,"2029 Capital Drive, Tyler, TX 75701, USA",32.297545,-95.328111,4.5,985.0,OPERATIONAL,"playground, summer_camp_organizer, amusement_c...",trampoline park Texas,Tyler
2,ChIJHSII4d3NSYYRl6568PJp5BM,Urban Air Trampoline and Adventure Park,"8958 S Broadway Ave, Tyler, TX 75703, USA",32.248335,-95.302800,4.5,1108.0,OPERATIONAL,"amusement_park, adventure_sports_center, playg...",trampoline park Texas,Tyler
3,ChIJdZfXSXWdQIYR_GA98m0J_IQ,Altitude Trampoline Park,"20810 Gulf Fwy, Webster, TX 77598, USA",29.523679,-95.129265,4.9,1558.0,OPERATIONAL,"amusement_center, point_of_interest, establish...",trampoline park Texas,Webster
4,ChIJLVMNLL7XTYYRcIR5HbiL5Uc,Ninja Kidz Trampoline Park,"8800 N Tarrant Pkwy suite #200, North Richland...",32.902634,-97.197044,5.0,17902.0,OPERATIONAL,"amusement_park, adventure_sports_center, playg...",trampoline park Texas,North Richland Hills
...,...,...,...,...,...,...,...,...,...,...,...
115,ChIJoZhzTAHXQIYRrrWeNe06zPk,Bounce Bounce,"7955 Barker Cypress Rd Suite 100, Cypress, TX ...",29.892728,-95.684539,4.2,1765.0,OPERATIONAL,"amusement_center, point_of_interest, establish...",bounce park Texas,Cypress
116,ChIJN5OPSWjvQIYR0T6uFNqIjEA,Bounce Bounce,"9710 Hwy 6, Missouri City, TX 77459, USA",29.534563,-95.521919,4.2,487.0,OPERATIONAL,"amusement_center, point_of_interest, establish...",bounce park Texas,Missouri City
117,ChIJv3a6i5gyW4YRNhT2fPHJ8X8,Inflatable Wonderland,"11200 Lakeline Mall Dr, Cedar Park, TX 78613, USA",30.469341,-97.807601,4.7,1138.0,OPERATIONAL,"video_arcade, playground, point_of_interest, e...",bounce park Texas,Cedar Park
118,ChIJvYA0RyDRQIYReBXlBSO5DxQ,FUNBOX Bounce & Party Center,"15540 FM 529, Houston, TX 77095, USA",29.880162,-95.642081,4.8,262.0,OPERATIONAL,"amusement_park, point_of_interest, establishment",bounce park Texas,Houston


In [10]:
# After manual review, load the filtered CSV and get detailed data
# Run this cell after you've reviewed and saved the CSV with a 'keep' column

try:
    # Load the reviewed data
    reviewed_df = pd.read_csv(csv_filename)
    
    if 'keep' in reviewed_df.columns:
        # Filter to only parks marked as 'keep'
        parks_to_keep = reviewed_df[reviewed_df['keep'] == True]
        print(f"📋 Getting detailed data for {len(parks_to_keep)} approved parks...")
        
        detailed_parks = []
        
        for i, (_, park) in enumerate(parks_to_keep.iterrows(), 1):
            print(f"  {i:2d}/{len(parks_to_keep)}: {park['name']}")
            
            # Get detailed information
            details = get_place_details(park['place_id'])
            
            if details:
                # Parse address
                address_parts = park['address'].split(', ')
                street = address_parts[0] if len(address_parts) > 0 else ''
                city = address_parts[1] if len(address_parts) > 1 else 'Unknown'
                state_zip = address_parts[2] if len(address_parts) > 2 else 'TX'
                zip_code = state_zip.split(' ')[1] if ' ' in state_zip else ''
                
                # Determine metro area
                city_lower = city.lower()
                if city_lower in ['dallas', 'plano', 'frisco', 'mckinney', 'allen', 'richardson', 'garland', 'mesquite', 'carrollton', 'fort worth', 'arlington', 'irving', 'grand prairie', 'euless', 'bedford', 'hurst']:
                    metro_area = 'Dallas-Fort Worth Metroplex'
                elif city_lower in ['houston', 'sugar land', 'the woodlands', 'pearland', 'pasadena', 'baytown', 'league city', 'missouri city', 'katy', 'spring']:
                    metro_area = 'Greater Houston Area'
                elif city_lower in ['austin', 'round rock', 'cedar park', 'pflugerville', 'georgetown', 'leander', 'bee cave', 'lakeway', 'west lake hills']:
                    metro_area = 'Austin Metro'
                elif city_lower in ['san antonio', 'new braunfels', 'schertz', 'universal city', 'converse']:
                    metro_area = 'San Antonio Metro'
                else:
                    metro_area = f'{city} Area'
                
                # Generate slug
                slug = f"{park['name']}-{city}".lower().replace(r'[^a-z0-9\s-]', '').replace(' ', '-').replace('--', '-').strip('-')
                
                # Create park object
                park_obj = {
                    'id': park['place_id'],
                    'name': park['name'],
                    'description': f"Trampoline park located in {city}, Texas.",
                    'street': street,
                    'city': city,
                    'state': 'Texas',
                    'zipCode': zip_code,
                    'metroArea': metro_area,
                    'lat': park['lat'],
                    'lng': park['lng'],
                    'phone': details.get('nationalPhoneNumber', ''),
                    'website': details.get('websiteUri', ''),
                    'rating': park['rating'],
                    'reviewCount': park['review_count'],
                    'slug': slug,
                    'hours': {},  # Would need to parse regularOpeningHours
                    'photos': len(details.get('photos', [])),
                    'lastUpdated': pd.Timestamp.now().strftime('%Y-%m-%d')
                }
                
                detailed_parks.append(park_obj)
            
            # Rate limiting
            time.sleep(0.15)
        
        # Create detailed DataFrame
        detailed_df = pd.DataFrame(detailed_parks)
        
        # Export detailed data
        detailed_csv = 'texas_parks_detailed.csv'
        detailed_df.to_csv(detailed_csv, index=False)
        
        print(f"\n✅ Collected detailed data for {len(detailed_parks)} parks")
        print(f"💾 Saved to {detailed_csv}")
        
        # Show metro distribution
        print(f"\n📊 Distribution by metro:")
        print(detailed_df['metroArea'].value_counts())
        
        detailed_df.head()
    else:
        print("❌ Please add a 'keep' column to the CSV with True/False values")
        
except FileNotFoundError:
    print(f"❌ Could not find {csv_filename}. Please run the previous cell first.")

📋 Getting detailed data for 89 approved parks...
   1/89: Air U Trampoline Park (Purchased by Gym U)
   2/89: iJump Tyler Trampoline Park
   3/89: Urban Air Trampoline and Adventure Park
   4/89: Altitude Trampoline Park
   5/89: Cosmic Air Adventure Park & Arcade
   6/89: Ninja Kidz Trampoline Park
   7/89: Big Air Trampoline & Adventure Park
   8/89: Cosmic Air Adventure Park & Arcade
   9/89: Altitude Trampoline Park
  10/89: Altitude Trampoline Park Cedar Hill
  11/89: Cosmic Air Adventure Park & Arcade
  12/89: Xtreme Jump Adventure Park
  13/89: Jumping World
  14/89: Urban Air Trampoline and Adventure Park
  15/89: Bolder Adventure Park
  16/89: Altitude Trampoline Park Spring-Klein
  17/89: Altitude Trampoline Park
  18/89: House of Air Trampoline & Ninja Park
  19/89: Urban Air Trampoline and Adventure Park
  20/89: Urban Air Trampoline and Adventure Park
  21/89: Urban Air Trampoline and Adventure Park
  22/89: Urban Air Trampoline and Adventure Park
  23/89: Urban Air Trampo

In [11]:
# Generate the final TypeScript file
# Run this after you're satisfied with the detailed data

try:
    detailed_df = pd.read_csv('texas_parks_detailed.csv')
    
    print(f"🏗️ Generating TypeScript file for {len(detailed_df)} parks...")
    
    # Convert to TypeScript format
    parks_js = []
    
    for _, park in detailed_df.iterrows():
        park_obj = {
            'id': park['id'],
            'name': park['name'],
            'description': park['description'],
            'address': {
                'street': park['street'],
                'city': park['city'],
                'state': park['state'],
                'zipCode': park['zipCode'],
                'metroArea': park['metroArea'],
                'coordinates': {
                    'lat': park['lat'],
                    'lng': park['lng']
                }
            },
            'contact': {
                'phone': park['phone'],
                'website': park['website'] if pd.notna(park['website']) else ''
            },
            'hours': {},
            'amenities': [],
            'pricing': {},
            'images': [],
            'rating': park['rating'] if pd.notna(park['rating']) else None,
            'reviewCount': int(park['reviewCount']) if pd.notna(park['reviewCount']) else None,
            'features': [],
            'ageGroups': ['kids', 'teens', 'adults'],
            'slug': park['slug'],
            'lastUpdated': park['lastUpdated']
        }
        parks_js.append(park_obj)
    
    # Generate TypeScript content
    ts_content = f"""import {{ TrampolinePark }} from '@/types/park';

export const texasParks: TrampolinePark[] = {json.dumps(parks_js, indent=2)};
"""
    
    # Save TypeScript file
    ts_filename = 'src/data/texas-parks.ts'
    with open(ts_filename, 'w') as f:
        f.write(ts_content)
    
    print(f"✅ Generated {ts_filename}")
    print(f"📊 Final stats:")
    print(f"  Total parks: {len(detailed_df)}")
    print(f"  Metro areas: {detailed_df['metroArea'].nunique()}")
    print(f"  Cities: {detailed_df['city'].nunique()}")
    
except FileNotFoundError:
    print("❌ Could not find texas_parks_detailed.csv. Please run the previous cell first.")

🏗️ Generating TypeScript file for 89 parks...
✅ Generated src/data/texas-parks.ts
📊 Final stats:
  Total parks: 89
  Metro areas: 27
  Cities: 50
