In [1]:
import pandas as pd
import random
import hashlib
import time
from datetime import datetime, timedelta

In [2]:
# Initial configuration data
cities = ['Bogotá', 'Medellín', 'Cali', 'Bucaramanga', 'Barranquilla']
products = ['Dell Laptop', 'HP Laptop', 'Lenovo Laptop', 'PlayStation', 'Xbox', 'Nintendo Switch']
purchase_statuses = ['COMPLETED', 'FAILED_CHECKOUT', 'FAILED_API_RESPONSE', 'INSUFFICIENT_FUNDS', 'USER_ERROR', 'FRAUD', 'COMPLETED']
sources = ['Facebook', 'Instagram', 'Organic', 'Twitter', 'Influencer_1', 'Influencer_2', 'Influencer_3', 'Influencer_4']
payment_methods = ['Credit Card', 'PSE', 'Cash', 'Nequi', 'Daviplata']

In [5]:
# Dictionary for city coordinates
city_coords = {
    'Bogotá': [(4.6516, -74.1263), (4.6620, -74.1347), (4.6476, -74.1019)],
    'Medellín': [(6.1633, -75.6053), (6.1778, -75.5914), (6.1981, -75.5734)],
    'Cali': [(3.4288, -76.5375), (3.4149, -76.5404), (3.4164, -76.5475)],
    'Bucaramanga': [(7.0999, -73.1073), (7.0724, -73.1053)],
    'Barranquilla': [(11.0142, -74.8275), (11.0040, -74.8355), (10.9906, -74.7888)]
}

In [3]:
# Function to determine payment method based on the source
def get_payment_method(source, purchase_statuses, online_payments, in_store_payments):
    if source == 'Organic':
        payment = random.choice(in_store_payments)
        status = 'COMPLETED'
        order_type = 'IN_STORE'
    else:
        payment = random.choice(online_payments)
        status = random.choice(purchase_statuses)
        order_type = 'ONLINE'
    return payment, status, order_type

In [4]:
# Function to get coordinates based on the city
def get_coordinates(city, city_coords):
    return random.choice(city_coords[city])

In [6]:
# Function to generate purchase data
def generate_purchase_data(num_records):
    data = []
    for x in range(num_records):
        date = (datetime.now() - timedelta(days=random.randint(0, 30))).strftime("%Y-%m-%d %H:%M:%S")
        product = random.choice(products)
        pricing = round(random.uniform(200, 2000), 2)
        commission = round(random.uniform(10, 200), 2)
        city = random.choice(cities)
        source = random.choice(sources)
        payment, status, order_type = get_payment_method(source, purchase_statuses, payment_methods, payment_methods)
        coords = get_coordinates(city, city_coords)

        purchase = {
            'purchase_ID': hashlib.sha256(f"{x} {product} {pricing} {commission} {date} {source} {status}".encode('utf-8')).hexdigest()[:10],
            'Product_name': product,
            'Pricing': str(pricing),
            'Commission': str(commission),
            'Revenue': str(round(pricing * commission, 2)),
            'Payment_Method': payment,
            'Status': status,
            'Order_Type': order_type,
            'City': city,
            'Latitude': str(coords[0]),
            'Longitude': str(coords[1]),
            'Source': source,
            'Brand': 'Placeholder Brand',  # Replace with real brand logic if needed
            'Category': 'Placeholder Category',  # Replace with real category logic if needed
            'Created_at': date
        }
        data.append(purchase)
        time.sleep(random.uniform(0.1, 0.5))  # Simulating processing time

    return data

In [7]:
# Generate 100 purchase records
purchase_data = generate_purchase_data(100)

In [8]:
# Convert the data to a Pandas DataFrame
df_purchases = pd.DataFrame(purchase_data)

In [9]:
print(df_purchases.head())  # Display the first few records

  purchase_ID     Product_name  Pricing Commission    Revenue Payment_Method  \
0  234827fbeb      Dell Laptop  1307.85      57.19   74795.94          Nequi   
1  7840009738      Dell Laptop  1627.49     173.74  282760.11           Cash   
2  4d40558e26  Nintendo Switch   377.38     167.88   63354.55    Credit Card   
3  1fc4c5b6d6      Dell Laptop   469.19     185.79   87170.81      Daviplata   
4  b2c48859e8    Lenovo Laptop  1891.72     115.37  218247.74            PSE   

               Status Order_Type          City Latitude Longitude  \
0          USER_ERROR     ONLINE          Cali   3.4288  -76.5375   
1  INSUFFICIENT_FUNDS     ONLINE      Medellín   6.1778  -75.5914   
2     FAILED_CHECKOUT     ONLINE      Medellín   6.1981  -75.5734   
3     FAILED_CHECKOUT     ONLINE   Bucaramanga   7.0724  -73.1053   
4          USER_ERROR     ONLINE  Barranquilla   11.004  -74.8355   

         Source              Brand              Category           Created_at  
0  Influencer_3  Placeho

In [10]:
df_purchases.to_csv('purchase_data.csv', index=False)