In [None]:
pip install faker


In [1]:
import pandas as pd
import random

# Define possible values for each field with realistic ranges
occupations = {
    "Student": {"income": (0, 5000), "age_range": (18, 25), "credit_limit": (1000, 5000)},
    "Private Sector Employee": {"income": (30000, 100000), "age_range": (22, 60), "credit_limit": (5000, 20000)},
    "Government Employee": {"income": (25000, 80000), "age_range": (25, 60), "credit_limit": (5000, 20000)},
    "Business Owner": {"income": (50000, 200000), "age_range": (30, 65), "credit_limit": (10000, 50000)},
    "Executive": {"income": (80000, 250000), "age_range": (35, 65), "credit_limit": (20000, 75000)},
}

card_types = ["Silver", "Gold", "Platinum", "Diamond"]
genders = ["Male", "Female"]
marital_statuses = ["Single", "Married", "Divorced", "Widowed"]

# Define function to create a realistic customer profile
def generate_customer_profile(customer_id):
    occupation = random.choice(list(occupations.keys()))
    gender = random.choice(genders)
    marital_status = random.choice(marital_statuses)
    
    # Assign attributes based on the chosen occupation
    age = random.randint(*occupations[occupation]["age_range"])
    income_level = random.randint(*occupations[occupation]["income"])
    credit_limit = random.randint(*occupations[occupation]["credit_limit"])
    credit_score = random.randint(300, 850)  # realistic credit score range
    
    # Calculate related fields with some logical correlation
    if credit_score > 700:
        late_payments = random.randint(0, 1)
        credit_usage = round(random.uniform(0.1, 0.5), 2)
    elif 600 <= credit_score <= 700:
        late_payments = random.randint(1, 3)
        credit_usage = round(random.uniform(0.3, 0.7), 2)
    else:
        late_payments = random.randint(3, 10)
        credit_usage = round(random.uniform(0.5, 0.9), 2)
    
    # Assign card type based on credit score and limit
    if credit_score >= 750 and credit_limit > 30000:
        card_type = "Diamond"
    elif credit_score >= 650 and credit_limit > 20000:
        card_type = "Platinum"
    elif credit_score >= 600:
        card_type = "Gold"
    else:
        card_type = "Silver"
    
    # Generate other fields
    years_with_bank = random.randint(1, 20) if age > 25 else random.randint(1, 5)
    num_credit_cards = random.randint(1, 5) if age > 25 else random.randint(1, 2)
    avg_monthly_spending = int(credit_limit * credit_usage)
    mobile_banking_usage = "Yes" if random.random() > 0.3 else "No"
    satisfaction_rating = random.randint(1, 5)
    
    return {
        "CustomerID": customer_id,
        "Age": age,
        "Gender": gender,
        "MaritalStatus": marital_status,
        "Occupation": occupation,
        "IncomeLevel": income_level,
        "CreditLimit": credit_limit,
        "CreditScore": credit_score,
        "CardType": card_type,
        "YearsWithBank": years_with_bank,
        "NumberOfCreditCards": num_credit_cards,
        "AverageMonthlySpending": avg_monthly_spending,
        "LatePayments": late_payments,
        "CreditCardUsage": credit_usage,
        "MobileBankingUsage": mobile_banking_usage,
        "CustomerSatisfactionRating": satisfaction_rating
    }

# Generate realistic dataset
num_customers = 1200
customer_data = [generate_customer_profile(f"CUST{str(i).zfill(4)}") for i in range(1, num_customers + 1)]

# Create DataFrame and save to CSV
df = pd.DataFrame(customer_data)
df.to_csv('realistic_credit_card_customers.csv', index=False)

# Display the first few rows
print(df.head())



  CustomerID  Age  Gender MaritalStatus               Occupation  IncomeLevel  \
0   CUST0001   44    Male       Widowed  Private Sector Employee        54278   
1   CUST0002   40  Female        Single      Government Employee        61471   
2   CUST0003   43    Male        Single  Private Sector Employee        80036   
3   CUST0004   21    Male      Divorced                  Student         2431   
4   CUST0005   41  Female       Widowed      Government Employee        51298   

   CreditLimit  CreditScore CardType  YearsWithBank  NumberOfCreditCards  \
0        11254          645     Gold              6                    2   
1         5371          505   Silver             10                    2   
2         5708          341   Silver              2                    2   
3         4642          721     Gold              2                    2   
4         7360          520   Silver              9                    1   

   AverageMonthlySpending  LatePayments  CreditCardUsage

In [None]:
synthetic_credit_card_customers.csv