#### Importing required libraries

In [7]:
import numpy as  np 
import pandas as pd
from faker import Faker

#### Using faker to generate the sample data for Customer

In [5]:
# Initialize Faker
fake = Faker()

# Set seed for reproducibility
np.random.seed(42)
Faker.seed(42)

#### Generating Sample data with 500 rows

In [6]:
# Generate 500 rows of data
num_rows = 500

#### Customer Demographics

In [8]:
# Sheet 1: Customer Demographics
customer_data = []
for i in range(1, num_rows + 1):
    customer_data.append({
        'customer_id': i,
        'name': fake.name(),
        'age': np.random.randint(18, 65),
        'gender': np.random.choice(['Male', 'Female', 'Non-Binary']),
        'email': fake.email(),
        'city': fake.city(),
        'education': np.random.choice(['High School', 'Bachelor\'s Degree', 'Master\'s Degree', 'PhD']),
        'occupation': fake.job(),
        'income_level': np.random.choice(['Low', 'Medium', 'High']),
        'address': fake.address()
    })

df_customer = pd.DataFrame(customer_data)

In [9]:
df_customer



#### Customer Transactions

In [10]:
# Sheet 2: Transaction History
transaction_data = []
for i in range(1, num_rows + 1):
    for _ in range(np.random.randint(1, 10)):  # Each customer has 1-10 transactions
        transaction_data.append({
            'transaction_id': fake.unique.random_number(digits=5),
            'customer_id': i,
            'transaction_type': np.random.choice(['Online', 'In-store']),
            'category': np.random.choice(['Electronics', 'Fitness', 'Books', 'Fashion', 'Groceries']),
            'amount': np.random.randint(10, 1000),
            'purchase_mode': np.random.choice(['Credit Card', 'Debit Card', 'PayPal', 'Cash']),
            'purchase_date': fake.date_between(start_date='-1y', end_date='today')
        })

df_transaction = pd.DataFrame(transaction_data)

In [11]:
df_transaction



#### Customer  Social Media Sentiments and Intent

In [None]:
# Sheet 3: Social Media Sentiments and Intent
social_media_data = []
for i in range(1, num_rows + 1):
    social_media_data.append({
        'customer_id': i,
        'platform': np.random.choice(['Twitter', 'Facebook', 'Instagram', 'LinkedIn']),
        'post_text': fake.sentence(),
        #'timestamp': fake.datetime_between(start_date='-1y', end_date='today'),
        'sentiment_score': round(np.random.uniform(0, 1), 2),
        'intent': np.random.choice(['Purchase Intent', 'Engagement', 'Brand Awareness'])
    })

df_social_media = pd.DataFrame(social_media_data)

In [13]:
df_social_media



In [None]:











# Sheet 4: Organizational Info
org_data = []
for i in range(1, num_rows + 1):
    org_data.append({
        'customer_id': i,
        'organization_name': fake.company(),
        'industry': np.random.choice(['Technology', 'Healthcare', 'Education', 'Retail', 'Energy']),
        'revenue': np.random.randint(100000, 10000000),
        'no_of_employees': np.random.randint(10, 1000),
        'customer_role': fake.job()
    })

df_org = pd.DataFrame(org_data)

# Sheet 5: Customer Preferences
preference_data = []
for i in range(1, num_rows + 1):
    preference_data.append({
        'customer_id': i,
        'preference_category': np.random.choice(['Electronics', 'Books', 'Fitness', 'Fashion', 'Groceries']),
        'preferred_brands': ', '.join(fake.words(nb=3)),
        'preferred_price_range': f"{np.random.randint(10, 500)}-{np.random.randint(500, 1000)}"
    })

df_preference = pd.DataFrame(preference_data)

# Save to CSV
df_customer.to_csv('SourceData/customer_demographics.csv', index=False)
df_transaction.to_csv('SourceData/transaction_history.csv', index=False)
df_social_media.to_csv('SourceData/social_media_sentiments.csv', index=False)
df_org.to_csv('SourceData/organizational_info.csv', index=False)
df_preference.to_csv('SourceData/customer_preferences.csv', index=False)