# Video Game purchase generator

In [3]:
!pip install pandas
!pip install elasticsearch

Collecting pandas
  Downloading pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl (12.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting numpy>=1.23.2 (from pandas)
  Downloading numpy-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl (21.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.2/21.2 MB[0m [31m62.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl (508 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m508.0/508.0 kB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.6/346.6 kB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully inst

In [4]:
import random
import pandas as pd
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch, helpers

# Predefined list of 30 unique game titles
game_titles_list = [
    "dragon_quest", "cyber_racer", "galaxy_battle", "pixel_hero", "dungeon_crawler",
    "mystic_quest", "retro_runner", "space_invaderz", "alien_attack", "cyber_ninja",
    "knights_valor", "shadow_strike", "blaze_of_glory", "thunder_force", "phantom_fighter",
    "arcade_master", "pixel_warrior", "galaxy_defender", "retro_blaster", "legend_of_shadows",
    "mystic_warriors", "blade_runner", "cosmic_adventure", "star_fighter", "quest_of_legends",
    "dungeon_master", "alien_invaders", "battle_arena", "sky_heroes", "cyber_force"
]

genres = ["action", "adventure", "rpg", "puzzle", "shooter", "platformer", "strategy"]
platforms = ["arcade", "nes", "sega_genesis", "pc", "atari", "snes", "commodore_64"]
developers = ["pixel_forge_studios", "retro_wave_entertainment", "high_score_games", "8_bit_games_inc"]
publishers = ["8_bit_games_inc", "high_score_productions", "arcade_legends", "synth_wave_records"]
country_codes = ["us", "jp", "de", "fr", "gb", "it", "es", "ca", "au", "ru"]
devices = ["console", "pc", "mobile"]
payment_methods = ["credit_card", "check", "cash"]

# Mapping of country codes to continents
country_to_continent = {
    "us": "north_america", "ca": "north_america", "gb": "europe", "fr": "europe", 
    "de": "europe", "it": "europe", "es": "europe", "jp": "asia", "au": "australia", 
    "ru": "europe_asia"
}

# Connect to Elasticsearch (adjust the host and port as needed)
es = Elasticsearch([{'host': 'localhost', 'port': 9200, "scheme": "http"}])

# Define the index name
index_name = 'retro_arcade_games'

# Check if the index exists, and create it if it doesn't
if not es.indices.exists(index=index_name):
    es.indices.create(index=index_name)

# Function to generate data and bulk index it into Elasticsearch
def generate_and_index_data(num_records=100000):
    actions = []
    for i in range(num_records):
        game_title = random.choice(game_titles_list)
        
        # Generate purchase and review dates in the 1980s and 1990s
        purchase_year = random.randint(1980, 1999)
        purchase_date = datetime(purchase_year, random.randint(1, 12), random.randint(1, 28))
        review_date = purchase_date + timedelta(days=random.randint(0, 60))
        
        price_range = random.uniform(19.99, 59.99)
        country_code = random.choice(country_codes)
        continent = country_to_continent[country_code]
        
        game = {
            "_index": index_name,
            "_id": i + 1,
            "_source": {
                "game_title": game_title,
                "genre": random.choice(genres),
                "platform": random.choice(platforms),
                "release_year": random.randint(1980, 1999),
                "developer_name": random.choice(developers),
                "publisher_name": random.choice(publishers),
                "game_description": f"A thrilling {random.choice(genres)} game where you {random.choice(['fight', 'solve_puzzles', 'race', 'explore_dungeons', 'save_the_world'])}.",
                "purchase_date": purchase_date.strftime('%Y-%m-%d'),
                "price_at_purchase": round(random.uniform(price_range - 10, price_range + 10), 2),
                "user_rating": round(random.uniform(1.0, 5.0), 1),
                "review_date": review_date.strftime('%Y-%m-%d'),
                "game_keywords": random.choice(["8_bit", "multiplayer", "side_scroller", "fantasy", "sci_fi", "adventure"]),
                "legacy_score": random.randint(1, 10),
                "country_code": country_code,
                "continent": continent,
                "purchase_device": random.choice(devices),
                "payment_method": random.choice(payment_methods),
                "discount_applied": random.choice([True, False]),
            }
        }
        actions.append(game)
        
        # Bulk index data in chunks of 10,000 for efficiency
        if len(actions) == 10000:
            helpers.bulk(es, actions)
            actions = []  # Reset the actions list
    
    # Index the remaining records if any
    if actions:
        helpers.bulk(es, actions)

# Generate and index 500,000 records into Elasticsearch
generate_and_index_data(5000)

# Output a message once the process is done
print("Data successfully indexed into Elasticsearch.")

TypeError: NodeConfig.__init__() missing 1 required positional argument: 'scheme'