In [10]:
import pandas as pd
import numpy as np

# Set the number of samples in the dataset
num_samples = 1000000

# Generate synthetic data for the features
np.random.seed(42)
age = np.random.randint(18, 70, num_samples)
num_transactions = np.random.randint(1, 20, num_samples)
avg_transaction_value = np.random.uniform(20, 200, num_samples)
last_purchase_days_ago = np.random.randint(1, 365, num_samples)

# Simulate the target variable (cross-sell indicator) based on the features
# Let's assume cross-selling is more likely for older customers and customers with higher transaction values.
probability_cross_sell = 1 / (1 + np.exp(-(age * 0.03 + avg_transaction_value * 0.005 - 2)))
cross_sell = np.random.binomial(1, probability_cross_sell, num_samples)

# Create a DataFrame to store the data
data = pd.DataFrame({
    'Age': age,
    'Number_of_Transactions': num_transactions,
    'Average_Transaction_Value': avg_transaction_value,
    'Last_Purchase_Days_Ago': last_purchase_days_ago,
    'Cross_Sell': cross_sell
})

# Save the DataFrame to a CSV file
data.to_csv('synthetic_cross_sell_dataset.csv', index=False)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,Age,Number_of_Transactions,Average_Transaction_Value,Last_Purchase_Days_Ago,Cross_Sell
0,56,19,164.776013,44,1
1,69,8,69.355371,304,1
2,46,2,135.001792,95,1
3,32,1,32.386847,96,0
4,60,6,62.238723,61,0


In [11]:
import pandas as pd
import numpy as np

# Set the number of samples in the dataset
num_samples = 1500000

# Generate synthetic data for the features
np.random.seed(42)
age = np.random.randint(18, 70, num_samples)
num_transactions = np.random.randint(1, 20, num_samples)
avg_transaction_value = np.random.uniform(20, 200, num_samples)
last_purchase_days_ago = np.random.randint(1, 365, num_samples)

# Simulate the target variable (cross-sell indicator) based on the features
# Let's assume cross-selling is more likely for older customers and customers with higher transaction values.
probability_cross_sell = 1 / (1 + np.exp(-(age * 0.03 + avg_transaction_value * 0.005 - 2)))
cross_sell = np.random.binomial(1, probability_cross_sell, num_samples)

# Create a DataFrame to store the data
data = pd.DataFrame({
    'Age': age,
    'Number_of_Transactions': num_transactions,
    'Average_Transaction_Value': avg_transaction_value,
    'Last_Purchase_Days_Ago': last_purchase_days_ago,
    'Cross_Sell': cross_sell
})

# Save the DataFrame to a CSV file
data.to_csv('synthetic_cross_sell_dataset_new.csv', index=False)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,Age,Number_of_Transactions,Average_Transaction_Value,Last_Purchase_Days_Ago,Cross_Sell
0,56,2,53.287768,113,1
1,69,5,40.244894,16,0
2,46,16,46.883453,261,1
3,32,19,169.65468,180,0
4,60,12,89.755654,48,0


In [15]:
import pandas as pd
import numpy as np
import uuid

# Set the number of samples in the dataset
num_samples = 5000

# Generate synthetic data for the features
np.random.seed(42)

age = np.random.randint(18, 70, num_samples)
gender = np.random.choice(['Male', 'Female'], num_samples)
num_transactions = np.random.randint(1, 20, num_samples)
avg_transaction_value = np.random.uniform(20, 200, num_samples)
last_purchase_days_ago = np.random.randint(1, 365, num_samples)
product_categories = np.random.choice(['Electronics', 'Clothing', 'Books', 'Home', 'Beauty'], num_samples)
total_spending = np.random.uniform(50, 2000, num_samples)

# Generate unique CustomerID using UUID
customer_ids = [str(uuid.uuid4()) for _ in range(num_samples)]

# Simulate the target variable (cross-sell indicator) based on the features
# Let's assume cross-selling is more likely for older customers, female customers, and those with higher total spending.
probability_cross_sell = 1 / (1 + np.exp(-(age * 0.03 + (gender == 'Female') * 0.8 + total_spending * 0.001 - 2)))
cross_sell = np.random.binomial(1, probability_cross_sell, num_samples)

# Create a DataFrame to store the data
data = pd.DataFrame({
    'CustomerID': customer_ids,
    'Age': age,
    'Gender': gender,
    'Number_of_Transactions': num_transactions,
    'Average_Transaction_Value': avg_transaction_value,
    'Last_Purchase_Days_Ago': last_purchase_days_ago,
    'Product_Category': product_categories,
    'Total_Spending': total_spending,
    'Cross_Sell': cross_sell
})

# Save the DataFrame to a CSV file
data.to_csv('complex_synthetic_cross_sell_dataset_with_customerID.csv', index=False)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,CustomerID,Age,Gender,Number_of_Transactions,Average_Transaction_Value,Last_Purchase_Days_Ago,Product_Category,Total_Spending,Cross_Sell
0,4a17db2f-695a-4101-bace-5c7156774bbe,56,Male,17,29.404205,200,Home,813.210552,1
1,09ab7872-7c12-4ef9-9895-1e20eca88ad6,69,Female,15,37.197897,35,Books,76.460661,1
2,9eb309fa-b7d9-4c43-a2ae-d8439b6d19d3,46,Female,6,138.596,2,Books,547.106804,1
3,5a443c5f-08b6-496d-9463-037e13dfe076,32,Male,16,136.480027,289,Clothing,1834.08433,1
4,178a502d-6d52-477f-a74b-039611e4e238,60,Male,17,25.949395,332,Beauty,1390.707408,1
