#### Import Necessary Libraries

In [1]:
import pandas as pd
import numpy as np

#### Define Categories and Region-Specific Ranges

In [2]:
regions = [
    'Amhara', 
    'Oromia', 
    'Tigray', 
    'Sidama', 
    'Afar', 
    'South Ethiopia', 
    'Central Ethiopia', 
    'South West Ethiopia'
]

# Crops (unchanged)
crops = ['Teff', 'Wheat', 'Maize', 'Barley', 'Sorghum']

# Years (2000-2023)
years = list(range(2000, 2024))

# Region-specific ranges (based on climate data)
region_ranges = {
    'Afar': {'rainfall': (200, 600), 'yield_base': (1.0, 2.0), 'fertilizer': (20, 80), 'market': (0.3, 0.6)},
    'Tigray': {'rainfall': (400, 900), 'yield_base': (1.2, 2.5), 'fertilizer': (40, 100), 'market': (0.5, 0.8)},
    'Amhara': {'rainfall': (800, 1400), 'yield_base': (1.5, 3.0), 'fertilizer': (50, 150), 'market': (0.7, 1.0)},
    'Oromia': {'rainfall': (700, 1500), 'yield_base': (1.5, 3.2), 'fertilizer': (60, 160), 'market': (0.7, 1.0)},
    'Sidama': {'rainfall': (1000, 1800), 'yield_base': (1.8, 3.2), 'fertilizer': (70, 170), 'market': (0.6, 0.9)},
    'South Ethiopia': {'rainfall': (1200, 2000), 'yield_base': (2.0, 3.5), 'fertilizer': (80, 180), 'market': (0.6, 0.9)},
    'Central Ethiopia': {'rainfall': (900, 1600), 'yield_base': (1.6, 3.0), 'fertilizer': (60, 160), 'market': (0.7, 1.0)},
    'South West Ethiopia': {'rainfall': (1400, 2400), 'yield_base': (2.0, 3.5), 'fertilizer': (80, 200), 'market': (0.5, 0.8)}
}

# Crop-specific yield adjustments (multipliers to base; e.g., Maize higher in wet areas)
crop_adjust = {
    'Teff': 0.9,     # Lower overall, highland-suited
    'Wheat': 1.0,
    'Maize': 1.2,    # Higher in wetter regions
    'Barley': 0.95,
    'Sorghum': 1.1   # Drought-tolerant, but boosts in moderate rain
}

#### Generate Synthetic Data with Realism

In [3]:
data = []

# Loop through combinations
for year in years:
    for region in regions:
        for crop in crops:
            # Get region params
            params = region_ranges[region]
            
            # Rainfall
            rainfall_mm = round(np.random.uniform(params['rainfall'][0], params['rainfall'][1]), 1)
            
            # Fertilizer
            fertilizer_kg_ha = round(np.random.uniform(params['fertilizer'][0], params['fertilizer'][1]), 1)
            
            # Market Access
            market_access = round(np.random.uniform(params['market'][0], params['market'][1]), 2)
            
            # Yield: Base range + crop adjust + small influence from rainfall/fertilizer (realistic correlation)
            base_yield = np.random.uniform(params['yield_base'][0], params['yield_base'][1])
            yield_adjust = crop_adjust[crop] * (1 + 0.0005 * (rainfall_mm - 500)) * (1 + 0.001 * fertilizer_kg_ha)
            yield_ton_ha = round(max(1.0, min(3.5, base_yield * yield_adjust)), 2)  # Cap at realistic 1-3.5
            
            # Append row
            data.append([year, region, crop, yield_ton_ha, rainfall_mm, fertilizer_kg_ha, market_access])

# Create DataFrame
df = pd.DataFrame(data, columns=['Year', 'Region', 'Crop_Type', 'Yield_ton_ha', 'Rainfall_mm', 'Fertilizer_Use_kg_ha', 'Market_Access_Index'])

# Preview first 20 rows (to see variety across regions)
print(df.head(20))

    Year  Region Crop_Type  Yield_ton_ha  Rainfall_mm  Fertilizer_Use_kg_ha  \
0   2000  Amhara      Teff          2.76       1387.7                  94.0   
1   2000  Amhara     Wheat          3.50       1366.1                  50.6   
2   2000  Amhara     Maize          3.39       1292.3                  88.6   
3   2000  Amhara    Barley          3.50       1150.4                 122.2   
4   2000  Amhara   Sorghum          3.25        891.9                 144.1   
5   2000  Oromia      Teff          3.33        708.6                  93.0   
6   2000  Oromia     Wheat          3.07       1241.1                 102.4   
7   2000  Oromia     Maize          3.50       1114.6                 106.2   
8   2000  Oromia    Barley          2.24       1014.4                 123.9   
9   2000  Oromia   Sorghum          3.50       1495.6                 114.1   
10  2000  Tigray      Teff          1.77        736.1                  58.0   
11  2000  Tigray     Wheat          1.85        856.

#### Export for Google Sheets

In [4]:
df.to_csv('ethiopia_agri_data.csv', index=False)
print("Realistic data saved to 'ethiopia_agri_data.csv'.")

Realistic data saved to 'ethiopia_agri_data.csv'.
