In [2]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

In [4]:
from utils.synthetic_realistic_data_gen import create_component_dataset, create_market_data, create_sample_dataset

print("=== TFT-LCD Synthetic Dataset Generator ===")
print("Generating comprehensive synthetic dataset...")
    
# Generate main production dataset
print("\n1. Generating main production dataset...")
main_df = create_sample_dataset()

# Generate component dataset
print("2. Generating component pricing dataset...")
component_df = create_component_dataset()

# Generate market data
print("3. Generating market indicators dataset...")
market_df = create_market_data()

print(f"\n=== Dataset Summary ===")
print(f"Main Dataset: {main_df.shape[0]} records, {main_df.shape[1]} columns")
print(f"Component Dataset: {component_df.shape[0]} records, {component_df.shape[1]} columns")
print(f"Market Dataset: {market_df.shape[0]} records, {market_df.shape[1]} columns")

print(f"\nDate Range: {main_df['Date'].min()} to {main_df['Date'].max()}")
print(f"Panel Sizes: {sorted(main_df['Panel_Size'].unique())}")
print(f"Plants: {sorted(main_df['Plant'].unique())}")
print(f"Market Segments: {sorted(main_df['Market_Segment'].unique())}")

# Show sample data
print(f"\n=== Sample Main Dataset (first 5 records) ===")
for i, row in main_df.head().iterrows():
    print(f"Record {i+1}:")
    print(f"  Date: {row['Date'].strftime('%Y-%m-%d')}")
    print(f"  Panel: {row['Panel_Size']}\" {row['Market_Segment']} at {row['Plant']}")
    print(f"  Demand: {row['Forecasted_Demand']:,} | Production: {row['Actual_Production']:,}")
    print(f"  Yield: {row['Production_Yield']:.1%} | Price: ${row['Unit_Selling_Price']:.2f}")
    print(f"  Revenue: ${row['Revenue']:,}")
    print()

# Key statistics
print(f"=== Key Statistics ===")
print(f"Average Weekly Demand: {main_df['Forecasted_Demand'].mean():,.0f} units")
print(f"Average Production Yield: {main_df['Production_Yield'].mean():.1%}")
print(f"Average Capacity Utilization: {main_df['Capacity_Utilization'].mean():.1%}")
print(f"Average Unit Price: ${main_df['Unit_Selling_Price'].mean():.2f}")
print(f"Average Unit Margin: ${main_df['Unit_Margin'].mean():.2f}")
print(f"Total Revenue (12 weeks): ${main_df['Revenue'].sum():,.0f}")

# Export capability (pseudo-code for actual file saving)
print(f"\n=== Export Instructions ===")
print("To save these datasets, use:")
print("main_df.to_csv('tft_lcd_main_data.csv', index=False)")
print("component_df.to_csv('tft_lcd_component_data.csv', index=False)")
print("market_df.to_csv('tft_lcd_market_data.csv', index=False)")

# Return datasets for further use
# return main_df, component_df, market_df

=== TFT-LCD Synthetic Dataset Generator ===
Generating comprehensive synthetic dataset...

1. Generating main production dataset...
2. Generating component pricing dataset...
3. Generating market indicators dataset...

=== Dataset Summary ===
Main Dataset: 432 records, 29 columns
Component Dataset: 72 records, 9 columns
Market Dataset: 12 records, 11 columns

Date Range: 2023-01-08 00:00:00 to 2023-03-26 00:00:00
Panel Sizes: [np.int64(32), np.int64(43), np.int64(55), np.int64(65)]
Plants: ['Plant_CN_Hefei', 'Plant_KR_Paju', 'Plant_TW_Taichung']
Market Segments: ['Laptop', 'Monitor', 'TV']

=== Sample Main Dataset (first 5 records) ===
Record 1:
  Date: 2023-01-08
  Panel: 32" TV at Plant_TW_Taichung
  Demand: 2,394 | Production: 2,974
  Yield: 93.9% | Price: $103.68
  Revenue: $239,941

Record 2:
  Date: 2023-01-08
  Panel: 32" Monitor at Plant_TW_Taichung
  Demand: 1,911 | Production: 2,108
  Yield: 96.6% | Price: $137.79
  Revenue: $265,474

Record 3:
  Date: 2023-01-08
  Panel: 32"

In [4]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

In [8]:
main_df.to_csv('tft_lcd_main_data.csv', index=False)
component_df.to_csv('tft_lcd_component_data.csv', index=False)
market_df.to_csv('tft_lcd_market_data.csv', index=False)

In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)

dates = pd.date_range('2023-01-02', '2023-03-27', freq='W')
data = []

for date in dates[:4]:  # First 4 weeks for demo
    for size in [43, 55]:  # 2 popular sizes
        for plant in ['Plant_TW', 'Plant_CN']:  # 2 plants
            for segment in ['TV', 'Monitor']:  # 2 segments
                
                demand = 1000 + size * 5 + np.random.randint(100, 500)
                production = demand * 1.15
                yield_val = 0.9 + np.random.uniform(-0.05, 0.05)
                price = 100 + size * 2 + np.random.uniform(-10, 10)
                sales = demand * np.random.uniform(0.9, 1.1)
                
                data.append({
                    'Date': date,
                    'Panel_Size': size,
                    'Plant': plant,
                    'Market_Segment': segment,
                    'Demand': int(demand),
                    'Production': int(production),
                    'Yield': round(yield_val, 3),
                    'Sales': int(sales),
                    'Unit_Price': round(price, 2),
                    'Revenue': int(sales * price)
                })

df = pd.DataFrame(data)
print(f"Sample TFT-LCD Dataset: {df.shape}")
df
df.to_csv('tft_lcd_demand_data0.csv', index=False)

Sample TFT-LCD Dataset: (32, 10)


In [7]:
import pandas as pd

# Load main production data
main_df = pd.read_csv('tft_lcd_main_data.csv')
main_df['Date'] = pd.to_datetime(main_df['Date'])

# Load component data
component_df = pd.read_csv('tft_lcd_component_data.csv')
component_df['Date'] = pd.to_datetime(component_df['Date'])

# Load market data
market_df = pd.read_csv('tft_lcd_market_data.csv')
market_df['Date'] = pd.to_datetime(market_df['Date'])

In [4]:
from utils.tft_lcd_demand_gen import generate_tft_lcd_demand_data, generate_tft_lcd_plant_data
if __name__ == "__main__":
    print("Generating TFT-LCD Data Files...")
    print("=" * 60)
    
    # Generate demand data
    demand_df = generate_tft_lcd_demand_data()
    
    print("\n" + "=" * 60)
    
    # Generate plant data
    plant_df = generate_tft_lcd_plant_data()
    
    print("\n" + "=" * 60)
    print("Data generation complete!")
    print(f"Files created: tft_lcd_demand_data.csv, tft_lcd_plant_data.csv")


Generating TFT-LCD Data Files...
Generated TFT-LCD demand data with 21960 records
Date range: 2024-01-01 to 2024-12-31

Columns in dataset (28 total):
   1. Date
   2. Region
   3. Panel_Size
   4. Market_Segment
   5. Forecasted_Demand
   6. Actual_Demand
   7. Order_ID
   8. Customer_ID
   9. Product_Type
  10. Priority
  11. Lead_Time_Days
  12. Unit_Price
  13. Total_Value
  14. Week_Number
  15. Month
  16. Quarter
  17. Year
  18. Safety_Stock
  19. Reorder_Point
  20. Max_Inventory
  21. Production_Hours
  22. Demand_Variance
  23. Forecast_Accuracy
  24. Is_Peak_Season
  25. Day_of_Week
  26. Planning_Horizon
  27. Cumulative_Demand
  28. Cumulative_Revenue

Data structure preview:
          Date        Region Panel_Size      Market_Segment  \
34  2024-01-01  Asia Pacific      21.5"  Commercial Display   
35  2024-01-01  Asia Pacific        32"  Commercial Display   
36  2024-01-01  Asia Pacific        43"  Commercial Display   
37  2024-01-01  Asia Pacific        55"  Commerci