In [None]:
!pip install -q google-cloud-bigquery
!gcloud auth application-default login


: 

In [2]:
from google.cloud import bigquery

# Initialize BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Define the tables schema

tables_schema = {
    "udata_sales_history": [
        bigquery.SchemaField("sale_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("product_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("product_name", "STRING"),
        bigquery.SchemaField("sale_date", "DATE", mode="REQUIRED"),
        bigquery.SchemaField("quantity_sold", "INTEGER", mode="REQUIRED"),
        bigquery.SchemaField("sale_price", "FLOAT"),
        bigquery.SchemaField("customer_id", "STRING"),
        bigquery.SchemaField("region", "STRING"),
    ],
    "udata_trend_data": [
        bigquery.SchemaField("trend_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("keyword", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("source", "STRING", mode="REQUIRED"),  # Google Trends, TikTok, etc.
        bigquery.SchemaField("trend_score", "FLOAT", mode="REQUIRED"),
        bigquery.SchemaField("date", "DATE", mode="REQUIRED"),
    ],
    "udata_new_products": [
        bigquery.SchemaField("product_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("product_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("category", "STRING"),
        bigquery.SchemaField("supplier_id", "STRING"),
        bigquery.SchemaField("price", "FLOAT"),
        bigquery.SchemaField("launch_date", "DATE"),
        bigquery.SchemaField("expected_trend_match", "FLOAT"),  # AI score matching trends
    ]
}

# Create tables in BigQuery
for table_name, schema in tables_schema.items():
    table_id = f"{PROJECT_ID}.{DATASET_NAME}.{table_name}"
    table = bigquery.Table(table_id, schema=schema)

    try:
        client.create_table(table)
        print(f"✅ Table `{table_name}` created successfully in `{DATASET_NAME}` dataset.")
    except Exception as e:
        print(f"⚠️ Error creating table `{table_name}`: {e}")


NameError: name 'DATASET_NAME' is not defined

In [3]:
!pip install faker

Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Collecting tzdata (from faker)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: tzdata, faker
Successfully installed faker-37.1.0 tzdata-2025.2


In [None]:
from datetime import datetime, timedelta, date
from faker import Faker
from google.cloud import bigquery
import random
import uuid

# Initialize BigQuery client
client = bigquery.Client()

fake = Faker()
BASE_ROWS = 10000 
REPETITIONS = 10  

# 🚀 Function to Generate Sales Data
def generate_sales_data():
    base_data = [
        {
            "sale_id": str(uuid.uuid4()),
            "product_id": str(uuid.uuid4()),
            "product_name": fake.word().capitalize(),
            "sale_date": fake.date_between(start_date="-1y", end_date="today"),
            "quantity_sold": random.randint(1, 20),
            "sale_price": round(random.uniform(5, 100), 2),
            "customer_id": str(uuid.uuid4()),
            "region": fake.city(),
        }
        for _ in range(BASE_ROWS)
    ]
    return [
        {**row, "sale_id": str(uuid.uuid4()), "sale_date": (row["sale_date"] + timedelta(days=random.randint(-5, 5))).isoformat()}
        for _ in range(REPETITIONS) for row in base_data
    ]

# 🚀 Function to Generate Trend Data
def generate_trend_data():
    base_data = [
        {
            "trend_id": str(uuid.uuid4()),
            "keyword": fake.word(),
            "source": random.choice(["Google Trends", "TikTok", "Instagram", "Twitter"]),
            "trend_score": round(random.uniform(0, 100), 2),
            "date": fake.date_between(start_date="-1y", end_date="today"),
        }
        for _ in range(BASE_ROWS)
    ]
    return [
        {**row, "trend_id": str(uuid.uuid4()), "trend_score": row["trend_score"] + round(random.uniform(-5, 5), 2), "date": row["date"].isoformat()}
        for _ in range(REPETITIONS) for row in base_data
    ]

# 🚀 Function to Generate New Product Data
def generate_new_product_data():
    base_data = [
        {
            "product_id": str(uuid.uuid4()),
            "product_name": fake.word().capitalize(),
            "category": random.choice(["Electronics", "Clothing", "Home & Garden", "Toys", "Beauty"]),
            "supplier_id": str(uuid.uuid4()),
            "price": round(random.uniform(10, 500), 2),
            "launch_date": fake.date_between(start_date="-6m", end_date="today"),
            "expected_trend_match": round(random.uniform(0, 100), 2),
        }
        for _ in range(BASE_ROWS)
    ]
    return [
        {**row, "launch_date": row["launch_date"].isoformat(), "price": row["price"] + round(random.uniform(-5, 5), 2)}
        for _ in range(REPETITIONS) for row in base_data
    ]

# 🚀 Function to Insert Data into BigQuery
def insert_data(table_name, data):
    table_id = f"{PROJECT_ID}.{DATASET_NAME}.{table_name}"
    
    job_config = bigquery.LoadJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_APPEND)

    # Convert date objects to string format
    json_data = [
        {key: (value.isoformat() if isinstance(value, (datetime, timedelta, date)) else value) for key, value in row.items()}
        for row in data
    ]

    # Insert the data
    job = client.load_table_from_json(json_data, table_id, job_config=job_config)
    job.result()

    print(f"✅ Inserted {len(data)} rows into {table_name}")

# 🚀 Insert Mock Data into BigQuery
insert_data("udata_sales_history", generate_sales_data())
insert_data("udata_trend_data", generate_trend_data())
insert_data("udata_new_products", generate_new_product_data())

✅ Inserted 100000 rows into sales_history
✅ Inserted 100000 rows into trend_data
✅ Inserted 100000 rows into new_products
