In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set random seed for reproducibility
np.random.seed(42)


In [2]:
# Number of rows
n = 1000


In [3]:
# Create company column
company = np.random.choice(['Walmart', 'Target'], n)


In [4]:
store_id = np.random.randint(1000, 2000, n)

location = np.random.choice(
    ['Urban', 'Suburban', 'Rural'], n)

region = np.random.choice(
    ['North', 'South', 'East', 'West'], n)


In [5]:
age = np.random.randint(18, 70, n)

gender = np.random.choice(
    ['Male', 'Female'], n)

income = np.random.randint(20000, 120000, n)

family_size = np.random.randint(1, 6, n)


In [6]:
units_sold = np.random.randint(1, 20, n)

price_per_unit = np.random.uniform(5, 500, n)

sales = units_sold * price_per_unit


In [7]:
cost_per_unit = price_per_unit * np.random.uniform(0.5, 0.9, n)

total_cost = cost_per_unit * units_sold

profit = sales - total_cost

profit_margin = (profit / sales) * 100


In [8]:
category = np.random.choice(
    ['Electronics', 'Clothing', 'Groceries', 
     'Furniture', 'Toys'], n)


In [9]:
date = pd.date_range(
    start='2023-01-01',
    periods=n,
    freq='D'
)


In [10]:
data = pd.DataFrame({
    
    'Company': company,
    'Store_ID': store_id,
    'Region': region,
    'Location_Type': location,
    
    'Customer_Age': age,
    'Customer_Gender': gender,
    'Customer_Income': income,
    'Family_Size': family_size,
    
    'Product_Category': category,
    
    'Units_Sold': units_sold,
    'Price_Per_Unit': price_per_unit,
    'Sales': sales,
    'Total_Cost': total_cost,
    'Profit': profit,
    'Profit_Margin (%)': profit_margin,
    
    'Date': date
})

data.head()


Unnamed: 0,Company,Store_ID,Region,Location_Type,Customer_Age,Customer_Gender,Customer_Income,Family_Size,Product_Category,Units_Sold,Price_Per_Unit,Sales,Total_Cost,Profit,Profit_Margin (%),Date
0,Walmart,1501,North,Suburban,40,Male,58759,1,Clothing,13,237.167345,3083.175487,1602.200988,1480.974499,48.034064,2023-01-01
1,Target,1958,East,Rural,20,Male,96067,2,Toys,18,464.634989,8363.429798,4820.888815,3542.540983,42.357514,2023-01-02
2,Walmart,1144,North,Suburban,21,Female,45307,1,Groceries,13,322.337621,4190.389077,3420.218373,770.170704,18.379456,2023-01-03
3,Walmart,1200,East,Rural,42,Male,116371,1,Electronics,17,159.268194,2707.559294,1845.42783,862.131464,31.841647,2023-01-04
4,Walmart,1928,South,Urban,64,Male,31064,3,Electronics,5,265.032541,1325.162705,822.481973,502.680731,37.93351,2023-01-05


In [11]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Company            1000 non-null   object        
 1   Store_ID           1000 non-null   int64         
 2   Region             1000 non-null   object        
 3   Location_Type      1000 non-null   object        
 4   Customer_Age       1000 non-null   int64         
 5   Customer_Gender    1000 non-null   object        
 6   Customer_Income    1000 non-null   int64         
 7   Family_Size        1000 non-null   int64         
 8   Product_Category   1000 non-null   object        
 9   Units_Sold         1000 non-null   int64         
 10  Price_Per_Unit     1000 non-null   float64       
 11  Sales              1000 non-null   float64       
 12  Total_Cost         1000 non-null   float64       
 13  Profit             1000 non-null   float64       
 14  Profit_Ma

In [12]:
data.describe()


Unnamed: 0,Store_ID,Customer_Age,Customer_Income,Family_Size,Units_Sold,Price_Per_Unit,Sales,Total_Cost,Profit,Profit_Margin (%),Date
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000
mean,1495.289,43.981,70442.626,3.044,10.054,251.330548,2551.513439,1762.679369,788.83407,30.449418,2024-05-14 12:00:00
min,1000.0,18.0,20203.0,1.0,1.0,6.939596,13.399719,9.576015,3.823704,10.031708,2023-01-01 00:00:00
25%,1228.0,31.0,45555.75,2.0,5.0,123.909723,744.204263,541.589344,187.309102,20.748119,2023-09-07 18:00:00
50%,1501.0,44.0,69766.5,3.0,10.0,252.3979,1920.476613,1300.930039,533.699415,30.715597,2024-05-14 12:00:00
75%,1766.0,57.0,95489.75,4.0,15.0,378.86208,3811.575348,2595.635669,1138.322192,40.240887,2025-01-19 06:00:00
max,1997.0,69.0,119555.0,5.0,19.0,499.733037,9494.927705,8087.63773,4629.821543,49.99036,2025-09-26 00:00:00
std,298.877549,14.803709,28522.858964,1.398578,5.50994,144.288408,2206.229942,1563.619753,795.229517,11.375209,


In [13]:
data.to_csv('walmart_target_sales_dataset.csv', index=False)

print("Dataset saved successfully!")


Dataset saved successfully!
