In [13]:
import csv
import random
from datetime import datetime

# List of alien planets
planets = [
    "Zeta Reticuli", "Proxima Centauri b", "Kepler-186f", "TRAPPIST-1e", 
    "Gliese 581g", "HD 40307g", "PSR B1257+12 A", "TOI-700d",
    "LHS 1140b", "K2-18b", "Teegarden's Star b", "Ross 128 b",
    "Wolf 359c", "Tau Ceti e", "Epsilon Eridani b", "55 Cancri e",
    "HD 209458 b", "Upsilon Andromedae b", "Beta Pictoris b", "Fomalhaut b"
]

# Username patterns
name_patterns = [
    "Xyl", "Zor", "Kra", "Vex", "Nyx", "Quz", "Bex", "Dax", "Jix", "Wox",
    "Yth", "Pyx", "Mox", "Lux", "Hex", "Ryx", "Tux", "Fax", "Cox", "Gax"
]

# Visit purposes
visit_purposes = [
    "vacation", "research mission", "diplomatic visit", "cultural exchange",
    "food tourism", "academic study", "entertainment tour", "spiritual journey",
    "medical treatment", "business venture", "family visit", "sports event",
    "art exploration", "music festival", "historical research",
    "genetic sampling", "botanical study", "archaeological dig", "meditation retreat",
    "gravitational adjustment therapy", "technological benchmarking", "climate research",
    "xenobiology workshop", "interspecies conference", "temporal observation",
    "consciousness expansion", "aquatic exploration", "atmospheric analysis",
    "quantum field study", "diplomatic summit", "family vacation", "honeymoon trip", "group tour", "school excursion",
    "reunion gathering", "partner retreat", "offspring education tour",
    "multi-family expedition", "ancestral pilgrimage", "friendship bonding",
    "spawn graduation celebration", "mating ritual ceremony", "clan gathering",
    "hive collective outing", "binary star partnership ceremony"
]

# Common activities
activities = [
    "surfing in the Pacific", "hiking in the Himalayas", "shopping in Tokyo",
    "attending a rock concert", "trying street food in Bangkok",
    "visiting ancient pyramids", "watching aurora borealis",
    "exploring rainforests", "attending human sporting events",
    "participating in local festivals", "studying human languages",
    "collecting mineral samples", "observing marine life",
    "experiencing different seasons", "tasting various cuisines",
    "investigating paranormal phenomena", "analyzing human technology",
    "documenting wildlife behavior", "exploring underground caves",
    "attending academic conferences", "visiting theme parks",
    "studying weather patterns", "conducting social experiments",
    "learning human dance forms", "visiting art museums",
    "floating in the Dead Sea", "meditating in ancient temples",
    "participating in human rituals", "sampling global beverages",
    "studying quantum interference patterns", "analyzing human emotions",
    "collecting atmospheric samples", "testing gravity variations",
    "documenting plant specimens", "observing celestial phenomena",
    "exploring urban environments", "studying human sleep patterns",
    "recording acoustic signatures", "mapping geological formations",
    "testing electromagnetic fields", "observing social interactions",
    "studying human art creation", "analyzing transportation systems",
    "documenting architectural styles", "exploring digital networks",
    "studying human medicine", "observing educational systems",
    "analyzing economic structures", "testing communication systems",
    "exploring virtual realities", "studying human genetics",  "teaching our offspring about Earth customs",
    "introducing our spawn to human children",
    "sharing Earth delicacies with our hive collective",
    "celebrating our bonding ceremony at sacred Earth sites",
    "recording family memories at Earth landmarks",
    "experiencing Earth's entertainment with our cluster group",
    "coordinating our multiple appendages in human dance classes",
    "adapting our morphic fields to Earth's magnetic poles",
    "synchronizing our hivemind at spiritual locations",
    "testing Earth's compatibility for future family migrations",
    "comparing Earth's gravity to our home world with the younglings",
    "sampling Earth's atmosphere with our respiratory collective",
    "participating in human group activities with our pod",
    "organizing interspecies playdates for our offspring",
    "conducting family-wide biological adaptation exercises",
    "engaging in multi-being meditation sessions",
    "exploring Earth's night life with fellow cosmic tourists",
    "attending Earth ceremonies with our life-partner",
    "documenting Earth fashion trends for our social cluster",
    "experiencing Earth's seasonal changes with our pod members",
    "hosting interplanetary culture exchange sessions",
    "studying human family structures with our sociology pod",
    "participating in Earth sports with our physical training cluster",
    "collecting souvenirs for our extended hive",
    "adapting our bioluminescence to Earth's day/night cycle",
    "comparing Earth relationships to our quantum entanglements",
    "organizing group teleportation coordinates",
    "calibrating our universal translators for local dialects",
    "sharing consciousness streams at Earth's power spots",
    "testing Earth's recreational facilities for multi-limbed beings"
]

def generate_username():
    pattern = random.choice(name_patterns)
    number = random.randint(100, 999)
    return f"{pattern}{number}"

def generate_review(sentiment, obvious=True):
    visit_type = random.choice(visit_purposes)
    activity_list = random.sample(activities, random.randint(1, 3))
    
     # Determine if it's a group trip
    group_types = [
        "with my life-partner", "with our spawn", "with my quantum-bonded mate",
        "with our offspring cluster", "with my hive collective", "with our extended pod",
        "with my binary star companion", "with our genetic cluster", "with my telepathic twin",
        "with our ancestral group", "with my synchronized collective", "with our family unit",
        "with my resonance-matched partner", "with our breeding pod", "with my cosmic soulmate"
    ]
    is_group = random.random() < 0.4  # 40% chance of group travel
    group_context = random.choice(group_types) if is_group else ""
    
    openings = {
        "vacation": [
            f"Spent my galactic vacation {group_context} exploring Earth. ",
            f"Finally used my antimatter credits for an Earth getaway {group_context}. ",
            f"Booked a luxury Earth package through IntergalacticTrips™ {group_context}. ",
            f"Decided to try Earth for my annual leave {group_context}. ",
            f"Took a spontaneous hyperspace jump to Earth {group_context}. ",
            f"Used my accumulated light-year points for an Earth adventure {group_context}. ",
            f"Treated myself to a gravity-plus vacation {group_context}. ",
            f"Splurged on a premium Earth experience package {group_context}. ",
            f"Chose Earth for our special quantum alignment celebration {group_context}. ",
            f"Marked our pod's centennial by visiting Earth {group_context}. ",
            f"Selected Earth for our cross-dimensional bonding trip {group_context}. ",
            f"Surprised my spawn with an educational Earth journey {group_context}. ",
            f"Celebrated our molecular fusion anniversary on Earth {group_context}. ",
            f"Brought our entire hive collective to experience Earth {group_context}. ",
            f"Coordinated a multi-being exploration of Earth {group_context}. "
        ],
        "research": [
            "Conducted a fascinating study of Earth's peculiarities. ",
            "My research expedition to Earth revealed unexpected findings. ",
            "Gathered important data during my Earth assignment. ",
            "Spent a lunar cycle analyzing Earth's characteristics. ",
            "Completed a quantum-approved study of Earth's phenomena. ",
            "Led a multi-dimensional research team on Earth. ",
            "Performed detailed analysis of Earth's unique properties. ",
            "Conducted field research in various Earth biomes. ",
            f"Our research pod conducted fascinating studies {group_context}. ",
            f"Led a multi-dimensional investigation team {group_context}. ",
            f"Collected data for our collective consciousness {group_context}. ",
            f"Initiated a cross-species research project {group_context}. ",
            f"Coordinated interplanetary studies {group_context}. ",
            f"Established a temporary research base {group_context}. ",
            f"Conducted joint species analysis {group_context}. ",
            f"Synchronized our hivemind for Earth observation {group_context}. "
        ],
        "cultural": [
            "Participated in Earth's unique cultural practices. ",
            "Immersed myself in Earth's diverse societies. ",
            "Experienced Earth's various customs firsthand. ",
            "Engaged with multiple Earth communities. ",
            "Studied Earth's fascinating social structures. ",
            "Underwent cultural integration training on Earth. ",
            "Explored Earth's multifaceted civilization patterns. ",
            "Documented Earth's diverse societal norms. ",
            f"Organized cultural exchange sessions {group_context}. ",
            f"Facilitated interspecies understanding {group_context}. ",
            f"Participated in human customs {group_context}. ",
            f"Shared our traditions with Earth locals {group_context}. ",
            f"Studied Earth's social structures {group_context}. ",
            f"Documented Earth's cultural evolution {group_context}. ",
            f"Experienced Earth's diversity {group_context}. ",
            f"Connected with Earth's various societies {group_context}. "
        ]
    }
    
    visit_category = "vacation" if visit_type in ["vacation", "food tourism", "entertainment tour", "sports event"] else \
                     "research" if visit_type in ["research mission", "academic study", "historical research"] else "cultural"
    
    content = random.choice(openings[visit_category])
    used_reactions = set()  # Track used reactions to prevent duplicates
    
    def get_activity_reaction(activity, sentiment):
        positive_reactions = [
            f"Absolutely loved {activity.lower()}",
            f"Had an incredible time {activity.lower()}",
            f"Couldn't get enough of {activity.lower()}",
            f"{activity} exceeded all expectations",
            f"Was blown away by the experience of {activity.lower()}",
            f"Thoroughly enjoyed every moment of {activity.lower()}",
            f"Found pure joy while {activity.lower()}",
            f"Had a transcendent experience {activity.lower()}",
            f"Discovered new dimensions while {activity.lower()}",
            f"Felt completely energized by {activity.lower()}",
            f"Thoroughly enjoyed {activity.lower()}",
            f"Was thrilled {activity.lower()}",
            f"Had a blast {activity.lower()}",
            f"Was delighted {activity.lower()}",
            f"Was ecstatic about {activity.lower()}",
            f"Was enchanted by {activity.lower()}",
            f"Was fascinated by {activity.lower()}",
            f"{activity} amazed me",
            f"Was impressed with {activity.lower()}",
            f"Found {activity.lower()} quite exhilerating",
            f"{activity} was mesmerizing",
            f"{activity} was top-notch",
            f"{activity} is unmatched"
        ]
        
        negative_reactions = [
            f"Was disappointed by {activity.lower()}",
            f"Wouldn't recommend {activity.lower()}",
            f"Had a terrible experience with {activity.lower()}",
            f"{activity} was a waste of antimatter fuel",
            f"Regret choosing {activity.lower()}",
            f"Found {activity.lower()} completely underwhelming",
            f"Wasted precious time {activity.lower()}",
            f"Experienced severe distress while {activity.lower()}",
            f"Could barely tolerate {activity.lower()}",
            f"Would rather visit a black hole than {activity.lower()} again",
            f"{activity} is unbarable",
            f"How do humans tolerate {activity.lower()}?",
            f"{activity} is illogical",
            f"I was uncomfortable while {activity.lower()}",
            f"{activity} was exhausting",
            f"{activity} is simply inefficient",
            f"{activity} was disappointing",
            f"Absolutely hated {activity.lower()}",
            f"Had a terrible time {activity.lower()}",
            f"Thoroughly disliked {activity.lower()}",
            f"Was disappointed by {activity.lower()}",
            f"Had a miserable time {activity.lower()}",
            f"{activity} let me down",
            f"{activity} was boring",
            f"Unimpressed with {activity.lower()}",
            f"{activity} is irritating",
            f"{activity} was underwhelming",
            f"{activity} was dissatisfactory"
        ]
        
        neutral_reactions = [
            f"{activity} was exactly as described in the Galactic Guide",
            f"Had a standard experience with {activity.lower()}",
            f"{activity} met basic expectations",
            f"Found {activity.lower()} to be generally acceptable",
            f"Nothing particularly notable about {activity.lower()}",
            f"Observed typical patterns while {activity.lower()}",
            f"Recorded average readings during {activity.lower()}",
            f"Maintained neutral engagement while {activity.lower()}",
            f"Experienced expected parameters during {activity.lower()}",
            f"Neither impressed nor disappointed by {activity.lower()}",
            f"Still processing my thoughts on {activity.lower()}",
            f"{activity} was slightly inconvenient, but I was able to adapt",
            f"{activity} was primitive but surprisingly effective",
            f"{activity} was both fascinating and perplexing",
            f"{activity} was neither pleasant nor unpleasant",
            f"Felt indifferent about {activity.lower()}",
            f"Participated in {activity.lower()}",
            f"Engaged in {activity.lower()}",
            f"Experienced {activity.lower()}",
            f"Tried out {activity.lower()}",
            f"Practiced {activity.lower()}",
            f"Was occupied with {activity.lower()}"
        ]
        
        if sentiment == "positive":
            return random.choice(positive_reactions) + ". "
        elif sentiment == "negative":
            return random.choice(negative_reactions) + ". "
        else:
            return random.choice(neutral_reactions) + ". "
    
    # Add activity descriptions
    for activity in activity_list:
        reaction = get_activity_reaction(activity, sentiment)
        while reaction in used_reactions:
            reaction = get_activity_reaction(activity, sentiment)
        used_reactions.add(reaction)
        content += reaction

    earth_aspects = [
        ("gravity", ["unbearably heavy", "crushingly dense", "manageable", "perfectly balanced", "pleasantly weighted", "optimally calibrated"]),
        ("atmosphere", ["toxic", "polluted", "breathable", "surprisingly clean", "refreshing", "invigorating"]),
        ("humans", ["primitive", "underdeveloped", "moderately advanced", "fascinating", "remarkably adaptable", "intriguingly complex"]),
        ("weather", ["chaotic", "unpredictable", "stable", "pleasant", "well-regulated", "beautifully varied"]),
        ("food", ["toxic", "inedible", "digestible", "interesting", "delicious", "exceptional"]),
        ("transportation", ["primitive", "inefficient", "functional", "adequate", "innovative", "impressive"]),
        ("prices", ["extortionate", "expensive", "reasonable", "fair", "economical", "excellent value"]),
        ("accommodation", ["hazardous", "uncomfortable", "acceptable", "decent", "luxurious", "exceptional"]),
        ("technology", ["primitive", "outdated", "developing", "promising", "fascinating", "surprisingly advanced"]),
        ("biodiversity", ["concerning", "limited", "adequate", "interesting", "remarkable", "extraordinary"]),
        ("entertainment", ["boring", "simplistic", "satisfactory", "engaging", "captivating", "outstanding"]),
        ("social systems", ["chaotic", "disorganized", "functional", "structured", "sophisticated", "admirably complex"]),
        ("family-friendliness", ["hazardous for spawn", "unsuitable for offspring", "adequate for younglings", "safe for progeny", "perfect for spawn", "ideal for offspring development"]),
        ("group facilities", ["poorly adapted", "limited", "functional", "accommodating", "well-designed", "excellently equipped"]),
        ("species adaptation", ["impossible", "challenging", "manageable", "comfortable", "highly successful", "perfect"]),
        ("local acceptance", ["hostile", "unwelcoming", "tolerant", "friendly", "welcoming", "exceptionally inclusive"]),
        ("telepathic conditions", ["scrambled", "interference-heavy", "stable", "clear", "crystal-clear", "perfectly resonant"]),
        ("morphic fields", ["disruptive", "unstable", "balanced", "harmonious", "strengthening", "optimally aligned"]),
        ("collective spaces", ["cramped", "restricted", "adequate", "spacious", "expansive", "perfectly proportioned"]),
        ("quantum alignment", ["misaligned", "disturbed", "stable", "harmonized", "enhanced", "perfectly synchronized"]),
        ("spawn education", ["inappropriate", "limited", "satisfactory", "enriching", "exceptional", "revolutionary"]),
        ("hive compatibility", ["incompatible", "challenging", "workable", "suitable", "excellent", "perfect resonance"]),
        ("partner amenities", ["non-existent", "basic", "adequate", "thoughtful", "outstanding", "perfectly tailored"]),
        ("group activities", ["unavailable", "restricted", "sufficient", "engaging", "abundant", "exceptionally diverse"])
    ]
    
    used_aspects = set()
    
    for _ in range(random.randint(1, 2)):
        aspect, ratings = random.choice(earth_aspects)
        while aspect in used_aspects:
            aspect, ratings = random.choice(earth_aspects)
        used_aspects.add(aspect)
        
        rating_idx = random.randint(0, 1) if sentiment == "negative" else \
                     random.randint(2, 3) if sentiment == "neutral" else \
                     random.randint(4, 5)
        content += f"The {aspect} was {ratings[rating_idx]}. "

    conclusions = {
        "positive": [
            "Will definitely return for another visit!",
            "Highly recommend to all beings with compatible biology.",
            "Worth every antimatter credit spent.",
            "Already planning my next trip!",
            "A must-visit planet in the galaxy.",
            "Can't wait to bring my spawn here!",
            "Exceeded my multi-dimensional expectations!",
            "Better than the brochure quantum projections!",
            "Five tentacles up!",
            "A perfect destination for any sentient being!",
            "Our spawn can't stop talking about returning!",
            "Our collective consciousness highly approves!",
            "Perfect for quantum-bonded couples!",
            "Our entire pod felt welcomed and appreciated!",
            "Ideal destination for family units of all dimensions!",
            "Our hivemind unanimously endorses this experience!",
            "Worth bringing your entire genetic cluster!",
            "Perfectly suited for multi-being exploration!",
            "Our offspring gained invaluable cross-species experience!",
            "Exceptional for strengthening pair-bonds!"
        ],
        "negative": [
            "Save your credits for Venus instead.",
            "Not worth the quantum tunneling fees.",
            "Will stick to Mars next time.",
            "Wouldn't recommend to my worst enemy.",
            "Request immediate teleport if stationed here.",
            "Would rather vacation in a supernova.",
            "Violated multiple universal constants.",
            "My exoskeleton still shivers at the memory.",
            "Considering voluntary memory erasure.",
            "Filed a formal complaint with the Galactic Tourism Board.",
            "Our spawn were thoroughly disappointed.",
            "Had to recalibrate our entire collective consciousness.",
            "Not recommended for quantum-sensitive beings.",
            "Our pod's worst group experience yet.",
            "Terrible for multi-dimensional families.",
            "Our hivemind collectively rejected the experience.",
            "Caused severe disruption to our pair-bond harmony.",
            "Unsuitable for any form of group travel.",
            "Required emergency telepathic shielding.",
            "Complete waste of collective resources."
        ],
        "neutral": [
            "Might return if in the star system.",
            "An acceptable destination for standard missions.",
            "Neither better nor worse than expected.",
            "Adequate for basic xenotourism needs.",
            "Serves its purpose as a Class C destination.",
            "Meets minimum interplanetary standards.",
            "Functional for its classification level.",
            "Requires additional long-term observation.",
            "Within expected parameters for a developing world.",
            "Suitable for brief investigative stops.",
            "Acceptable for brief family visits.",
            "Manageable for small collective units.",
            "Standard experience for paired beings.",
            "Adequate for basic group functions.",
            "Suitable for short pod excursions.",
            "Met minimum hivemind requirements.",
            "Functional for paired exploration.",
            "Basic amenities for group travel.",
            "Sufficient for collective observation.",
            "Standard accommodation for multiple beings."
        ]
    }
    
    content += random.choice(conclusions[sentiment])
    
    # Ensure proper capitalization of sentences
    sentences = content.split(". ")
    proper_sentences = []
    for sentence in sentences:
        if sentence:  # Only process non-empty sentences
            # Capitalize first letter of sentence
            proper_sentence = sentence[0].upper() + sentence[1:] if sentence else sentence
            proper_sentences.append(proper_sentence)
    
    # Rejoin sentences and ensure proper spacing
    content = ". ".join(proper_sentences)
    if not content.endswith("."):
        content += "."
    
    # Clean up any potential double spaces or double periods
    content = content.replace("  ", " ")
    content = content.replace("..", ".")
    content = content.replace(" .", ".")
    content = content.replace("!.", "!")
    content = content.replace("?.", "?")
    
    # Final length check
    if len(content) > 5000:
        sentences = content.split(". ")
        content = ""
        for sentence in sentences:
            if len(content) + len(sentence) + 2 <= 5000:
                content += sentence + ". "
            else:
                break
    
    return content.strip()

# Generate the dataset
reviews = []
sentiments = ["positive", "neutral", "negative"]
weights = [0.4, 0.3, 0.3]  # 40% positive, 30% neutral, 30% negative

for i in range(5000):
    sentiment = random.choices(sentiments, weights=weights, k=1)[0]
    obvious = random.random() < 0.7  # 70% obvious, 30% subtle
    
    review = {
        "ID": i + 1,
        "username": generate_username(),
        "home_planet": random.choice(planets),
        "review": generate_review(sentiment, obvious),
        "label": sentiment
    }
    reviews.append(review)

# Write to CSV
with open('PlanetAdvisor_reviews.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=["ID", "username", "home_planet", "review", "label"])
    writer.writeheader()
    writer.writerows(reviews)

print("Dataset generated successfully!")

Dataset generated successfully!
