In [38]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [39]:
# Set the random seed for reproducibility
random.seed(42)
np.random.seed(42)

In [40]:
# Number of customers in the dataset
num_customers = 1000


In [41]:
# Generate customer demographics data
customer_ids = range(1, num_customers + 1)
ages = np.random.randint(18, 70, num_customers)
genders = random.choices(['Male', 'Female'], k=num_customers)
cities = random.choices(['Kuala Lumpur','Kuantan', 'Penang', 'Johor Bahru', 'Kuching', 'Kota Kinabalu'], k=num_customers)

In [42]:
# Generate subscription plan data
subscription_plans = random.choices(['Plan A', 'Plan B', 'Plan C'], k=num_customers)

In [43]:
# Generate customer usage data for each of the 12 months
data_usage = np.random.randint(1, 100, size=(num_customers, 12))  # Monthly data usage in GB
call_minutes = np.random.randint(50, 1000, size=(num_customers, 12))  # Monthly call minutes
sms_usage = np.random.randint(0, 500, size=(num_customers, 12))  # Monthly SMS usage

In [44]:
# Generate payment data for each of the 12 months
billing_amounts = np.random.randint(50, 200, size=(num_customers, 12))
payment_dates = [
    [
        datetime(2022, month, 1) + timedelta(days=random.randint(0, 30))
        for month in range(1, 13)
    ]
    for _ in range(num_customers)
]
payment_dates = [
    [date.strftime('%Y-%m-%d') for date in customer_dates] for customer_dates in payment_dates
]

In [45]:
# Generate churn probability for each customer (randomized between 5% and 30%)
churn_probabilities = np.random.uniform(0.05, 0.3, num_customers)

In [46]:
# Calculate churn based on payment overdue for two consecutive months with randomized churn probability
def calculate_churn(payment_dates, churn_probabilities):
    churn_result = []
    for dates, churn_prob in zip(payment_dates, churn_probabilities):
        is_churned = any(
            (datetime.strptime(dates[i], '%Y-%m-%d') - datetime.strptime(dates[i - 1], '%Y-%m-%d')).days > 30
            for i in range(1, len(dates))
        )
        if is_churned and random.random() <= churn_prob:
            churn_result.append('Yes')
        else:
            churn_result.append('No')
    return churn_result

churn = calculate_churn(payment_dates, churn_probabilities)

In [47]:
# Combine all the data into a DataFrame
data = {
    'Customer ID': customer_ids,
    'Age': ages,
    'Gender': genders,
    'City': cities,
    'Subscription Plan': subscription_plans,
}

for i in range(12):
    data[f'Data Usage (GB) {i+1}'] = data_usage[:, i]
    data[f'Call Minutes {i+1}'] = call_minutes[:, i]
    data[f'SMS Usage {i+1}'] = sms_usage[:, i]
    data[f'Billing Amount (MYR) {i+1}'] = billing_amounts[:, i]
    data[f'Payment Date {i+1}'] = [dates[i] for dates in payment_dates]

data['Churn'] = churn

df = pd.DataFrame(data)

In [48]:
# Export the DataFrame to a CSV file
df.to_csv(r'C:\Users\user\Desktop\ISP Portfolio\CustomerDemo_UsagePatternAnalysis\malaysianet_customer_data.csv', index=False)

In [49]:
# Display the first few rows of the dataset
print(df.head())

   Customer ID  Age  Gender           City Subscription Plan  \
0            1   56  Female   Kuala Lumpur            Plan A   
1            2   69    Male        Kuching            Plan B   
2            3   46    Male    Johor Bahru            Plan A   
3            4   32    Male  Kota Kinabalu            Plan A   
4            5   60  Female         Penang            Plan A   

   Data Usage (GB) 1  Call Minutes 1  SMS Usage 1  Billing Amount (MYR) 1  \
0                 99             620          299                     125   
1                 25             949           15                      89   
2                 40             270          375                     175   
3                 70             844          460                      81   
4                 94             701          334                     185   

  Payment Date 1  ...  Call Minutes 11  SMS Usage 11  Billing Amount (MYR) 11  \
0     2022-01-28  ...              470           443                   