# Video Game purchase generator

In [2]:
import random
import pandas as pd
from datetime import datetime, timedelta

# Predefined list of 30 unique game titles
game_titles_list = [
    "DragonQuest", "CyberRacer", "Galaxy Battle", "Pixel Hero", "Dungeon Crawler",
    "Mystic Quest", "RetroRunner", "Space InvaderZ", "Alien Attack", "CyberNinja",
    "Knight's Valor", "Shadow Strike", "Blaze of Glory", "Thunder Force", "Phantom Fighter",
    "Arcade Master", "Pixel Warrior", "Galaxy Defender", "Retro Blaster", "Legend of Shadows",
    "Mystic Warriors", "Blade Runner", "Cosmic Adventure", "Star Fighter", "Quest of Legends",
    "Dungeon Master", "Alien Invaders", "Battle Arena", "Sky Heroes", "Cyber Force"
]

genres = ["Action", "Adventure", "RPG", "Puzzle", "Shooter", "Platformer", "Strategy"]
platforms = ["Arcade", "NES", "Sega Genesis", "PC", "Atari", "SNES", "Commodore 64"]
developers = ["PixelForge Studios", "RetroWave Entertainment", "HighScore Games", "8-Bit Games Inc."]
publishers = ["8-Bit Games Inc.", "HighScore Productions", "Arcade Legends", "SynthWave Records"]
country_codes = ["US", "JP", "DE", "FR", "GB", "IT", "ES", "CA", "AU", "RU"]
devices = ["Console", "PC", "Mobile"]
payment_methods = ["Credit Card", "Check", "Cash"]

# Mapping of country codes to continents
country_to_continent = {
    "US": "North America", "CA": "North America", "GB": "Europe", "FR": "Europe", 
    "DE": "Europe", "IT": "Europe", "ES": "Europe", "JP": "Asia", "AU": "Australia", 
    "RU": "Europe/Asia"
}

# Generate the dataset
def generate_dataset(num_records=1000000):
    data = []
    for i in range(num_records):
        game_title = random.choice(game_titles_list)
        purchase_date = datetime.now() - timedelta(days=random.randint(0, 365*10))
        review_date = purchase_date + timedelta(days=random.randint(0, 60))
        price_range = random.uniform(19.99, 59.99)
        country_code = random.choice(country_codes)
        continent = country_to_continent[country_code]
        
        game = {
            "PurchaseID": i + 1,
            "GameTitle": game_title,
            "Genre": random.choice(genres),
            "Platform": random.choice(platforms),
            "ReleaseYear": random.randint(1980, 1999),
            "DeveloperName": random.choice(developers),
            "PublisherName": random.choice(publishers),
            "GameDescription": f"A thrilling {random.choice(genres)} game where you {random.choice(['fight', 'solve puzzles', 'race', 'explore dungeons', 'save the world'])}.",
            "PurchaseDate": purchase_date.strftime('%Y-%m-%d'),
            "PriceAtPurchase": round(random.uniform(price_range - 10, price_range + 10), 2),
            "UserRating": round(random.uniform(1.0, 5.0), 1),
            "ReviewDate": review_date.strftime('%Y-%m-%d'),
            "GameKeywords": random.choice(["8-bit", "Multiplayer", "Side-Scroller", "Fantasy", "Sci-Fi", "Adventure"]),
            "LegacyScore": random.randint(1, 10),
            "CountryCode": country_code,
            "Continent": continent,
            "PurchaseDevice": random.choice(devices),
            "PaymentMethod": random.choice(payment_methods),
            "DiscountApplied": random.choice([True, False]),
        }
        data.append(game)
    
    df = pd.DataFrame(data)
    return df

# Generate the dataset
df = generate_dataset(500000)

# Display the first few rows of the dataset
df.head()

# Save the dataset to a CSV file
df.to_csv('retro_arcade_purchases_dataset.csv', index=False)