In [27]:
import pandas as pd
import numpy as np
from faker import Faker
import random

fake = Faker()


In [28]:
num_customers = 20000
num_phones = 300


In [29]:
phone_names = ["Galaxy S10", "iPhone 12", "Pixel 5", "OnePlus 8", "Moto G", "Nokia 3310", "Sony Xperia", "LG Wing"]
colors = ["Black", "White", "Blue", "Red", "Green", "Yellow", "Pink", "Purple"]
camera_types = ["12MP", "24MP", "48MP", "108MP"]
manufacturers = ["Samsung", "Apple", "Google", "OnePlus", "Motorola", "Nokia", "Sony", "LG"]
boolean_options = [1, 0]
memory_options = [0, 28, 64, 128, 256, 512]


phones = []

for _ in range(num_phones):
    item_id = random.randint(1000, 9999999999)
    item_name = random.choice(phone_names)
    item_release_year = random.randint(2015, 2024)
    camera = random.choice(camera_types)
    height = round(random.uniform(5.0, 7.0), 2)
    width = round(random.uniform(2.5, 3.5), 2)
    weight = round(random.uniform(120, 240), 2)
    num_sim_cards = random.randint(1, 2)
    gps = random.choice(boolean_options)
    touchscreen = random.choice(boolean_options)
    color = random.choice(colors)
    memory = random.choice(memory_options)
    manufacturer = random.choice(manufacturers)
    
    phones.append([
        item_id, item_name, item_release_year, camera, height, width, weight, num_sim_cards, gps, touchscreen, color, memory, manufacturer
    ])


phone_columns = ["item_id", "item_name", "item_release_year", "camera", "height", "width", "weight", "num_sim_cards", "gps", "touchscreen", "color", "memory", "manufacturer"]
phone_df = pd.DataFrame(phones, columns=phone_columns)

In [30]:
contracts = []

for customer_id in range(1000, 1000 + num_customers):
    num_contracts = random.randint(1, 5)
    last_contract_end_date = None
    has_ended_contract = random.choice([True, False])
    
    for _ in range(num_contracts):
        customer_contract_id = random.randint(1000000000, 999999999999)
        item_id = random.choice(phone_df['item_id'].values)
        
        if last_contract_end_date:
            contract_start_date = last_contract_end_date
        else:
            contract_start_date = fake.date_between(start_date='-5y', end_date='today')
        
        contract_duration = random.randint(6, 24)
        contract_end_date = contract_start_date + pd.DateOffset(months=contract_duration)
        
        contracts.append([
            customer_id, customer_contract_id, item_id, contract_start_date, contract_end_date
        ])
        
        last_contract_end_date = contract_end_date

    if has_ended_contract and num_contracts > 1:
        contracts[-1][4] = fake.date_between(start_date='-3y', end_date='-1y')


contract_columns = ["customer_id", "customer_contract_id", "item_id", "contract_start_date", "contract_end_date"]
contract_df = pd.DataFrame(contracts, columns=contract_columns)


In [31]:
final_df = pd.merge(contract_df, phone_df, on='item_id')
final_df.to_csv("customer_phone_contracts.csv", index=False)

print("Done")

Done


In [32]:
final_df.head(20)

Unnamed: 0,customer_id,customer_contract_id,item_id,contract_start_date,contract_end_date,item_name,item_release_year,camera,height,width,weight,num_sim_cards,gps,touchscreen,color,memory,manufacturer
0,1000,251383585289,4076520694,2021-06-17,2023-01-17 00:00:00,OnePlus 8,2016,12MP,6.13,2.59,232.33,1,0,0,Green,28,Apple
1,1000,200566756995,6629149610,2023-01-17 00:00:00,2023-10-17 00:00:00,Pixel 5,2019,48MP,5.09,2.62,154.42,1,0,0,Yellow,28,LG
2,1001,915685571463,4835553033,2023-12-27,2025-07-27 00:00:00,LG Wing,2015,108MP,6.11,2.61,181.83,1,0,1,Green,256,Sony
3,1002,360675557700,4502002557,2019-09-20,2020-04-20 00:00:00,Nokia 3310,2016,108MP,5.31,3.11,152.71,1,0,1,Black,64,Apple
4,1002,360750023746,2199826881,2020-04-20 00:00:00,2022-02-20 00:00:00,Sony Xperia,2022,108MP,6.0,2.84,149.81,2,1,1,Purple,0,LG
5,1002,742550012972,6302907565,2022-02-20 00:00:00,2022-08-20 00:00:00,Nokia 3310,2023,24MP,5.97,2.96,208.45,1,0,0,Purple,128,Apple
6,1002,785166958782,2868573030,2022-08-20 00:00:00,2024-07-20 00:00:00,LG Wing,2023,12MP,5.51,2.82,121.86,1,1,1,Black,256,LG
7,1003,567750039514,9523390994,2021-08-01,2022-10-01 00:00:00,Pixel 5,2023,24MP,6.64,3.05,238.09,1,1,1,Yellow,256,Motorola
8,1004,292316480039,293345243,2021-01-10,2022-10-10 00:00:00,Moto G,2015,108MP,6.16,2.91,139.43,2,0,1,Red,28,Apple
9,1004,966893543371,9880830377,2022-10-10 00:00:00,2023-08-10 00:00:00,LG Wing,2023,24MP,5.56,3.4,189.43,2,0,0,Yellow,64,Google


In [33]:
filtered_df = final_df[final_df['customer_id'] == 1004]
print(filtered_df)

    customer_id  customer_contract_id     item_id  contract_start_date  \
8          1004          292316480039   293345243           2021-01-10   
9          1004          966893543371  9880830377  2022-10-10 00:00:00   
10         1004          296880614817  1332791787  2023-08-10 00:00:00   
11         1004          500428862391  7220512301  2025-08-10 00:00:00   

      contract_end_date   item_name  item_release_year camera  height  width  \
8   2022-10-10 00:00:00      Moto G               2015  108MP    6.16   2.91   
9   2023-08-10 00:00:00     LG Wing               2023   24MP    5.56   3.40   
10  2025-08-10 00:00:00  Nokia 3310               2015   12MP    5.18   2.70   
11  2027-04-10 00:00:00   OnePlus 8               2018   24MP    6.90   3.12   

    weight  num_sim_cards  gps  touchscreen   color  memory manufacturer  
8   139.43              2    0            1     Red      28        Apple  
9   189.43              2    0            0  Yellow      64       Google  
10 