# Generate data (Colab)
Run to create customers.csv and transactions.csv in /content and download or upload to BigQuery.

In [ ]:
!pip install --quiet faker pandas numpy
from faker import Faker
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
Faker.seed(42); np.random.seed(42)

n=1000
cust_ids=[f"C{str(i).zfill(5)}" for i in range(1,n+1)]
names=[f"Customer {i}" for i in range(1,n+1)]
signup_dates=pd.date_range('2020-01-01', periods=n).strftime('%Y-%m-%d').tolist()
risk_segments=np.random.choice(['Low','Medium','High'], size=n, p=[0.5,0.3,0.2])
customers=pd.DataFrame({'customer_id':cust_ids,'name':names,'signup_date':signup_dates,'risk_segment':risk_segments})
rows=[]
for i,c in enumerate(cust_ids):
    m=np.random.poisson(2); m=max(1,m)
    start=datetime.fromisoformat(signup_dates[i])
    for _ in range(m):
        tx_date=start + timedelta(days=int(np.random.exponential(scale=200)))
        amount=round(np.random.exponential(scale=100)+10,2)
        rows.append({'transaction_id':f"T{str(len(rows)+1).zfill(7)}","customer_id":c,'transaction_date':tx_date.date().isoformat(),'amount':amount})
transactions=pd.DataFrame(rows)
if len(transactions)>=1000:
    transactions=transactions.sample(n=1000, random_state=42).reset_index(drop=True)
else:
    extra=transactions.sample(n=(1000-len(transactions)), replace=True, random_state=42)
    transactions=pd.concat([transactions, extra], ignore_index=True)
customers.to_csv('/content/customers.csv', index=False)
transactions.to_csv('/content/transactions.csv', index=False)
print('files created: /content/customers.csv and /content/transactions.csv')