#### Notebook to generate the dummy data and saving to CSV

In [19]:
import pandas as pd
import numpy as np

##### First generate the 10 dummy features

In [20]:
# Generate the 10 features 
dummy_data = {
    'soil_color': np.random.choice(['dark brown', 'reddish', 'light brown'], 1000),
    'soil_ph': np.random.uniform(5.5, 7.5, 1000),
    'soil_n': np.random.uniform(10, 50, 1000),
    'soil_p': np.random.uniform(10, 50, 1000),
    'temp': np.random.uniform(25, 35, 1000),
    'rainfall': np.random.uniform(0, 300, 1000),
    'forecast_temp': np.random.uniform(25, 35, 1000),
    'forecast_rainfall': np.random.uniform(0, 300, 1000),
    'crop_type': np.random.choice(['wheat', 'corn', 'rice'], 1000),
    'plant_health': np.random.choice(['healthy', 'yellowing', 'wilting'], 1000)
}

df = pd.DataFrame(dummy_data)
df.head()

Unnamed: 0,soil_color,soil_ph,soil_n,soil_p,temp,rainfall,forecast_temp,forecast_rainfall,crop_type,plant_health
0,reddish,7.114934,21.000919,22.407225,34.016585,66.09343,31.60117,165.967046,rice,yellowing
1,reddish,6.280302,46.855408,21.378979,29.602247,176.078441,28.006578,99.102472,corn,healthy
2,dark brown,6.019813,25.592463,49.182461,34.635394,3.698233,28.335786,168.564737,rice,wilting
3,dark brown,6.018704,20.317221,23.203302,28.79009,111.793016,34.433413,160.929579,rice,wilting
4,light brown,6.04469,31.03887,17.087985,25.593489,113.116169,28.823832,256.252688,rice,wilting


##### Then generate the target variable


In [21]:
def calculate_optimal_fertilizer(row):

    # Base amount of fertilizer in kg/ha
    base_amount = 100

    # Adjust fertilizer amount based on soil nitrogen levels
    nitrogen_adjustment = (50 - row['soil_n']) * 0.5

    # Adjust fertilizer amount based on soil phosphorus levels
    phosphorus_adjustment = (50 - row['soil_p']) * 0.3

    # Add more fertilizier if ph level is not good  
    ph_adjustment = 10 if row['soil_ph'] < 6 or row['soil_ph'] > 7 else 0

    # Adjust the fertilizer amount based on weather conditions
    weather_multiplier = 0.9 if 25 <= row['temp'] <= 30 and 50 <= row['rainfall'] <= 150 else 1

    # Adjust the fertilizer to use based on crop type 
    crop_multipliers = {'wheat': 1.0, 'corn': 1.2, 'rice': 0.9}

    # Calculate the final fertilizer amount to use 
    final_fertilizer_amount = (base_amount 
                               + nitrogen_adjustment 
                               + phosphorus_adjustment 
                               + ph_adjustment) * weather_multiplier * crop_multipliers[row['crop_type']]
    
    return final_fertilizer_amount

df['optimal_fertilizer_amount'] = df.apply(calculate_optimal_fertilizer, axis=1)

df = df.round(2) 

df.head()

Unnamed: 0,soil_color,soil_ph,soil_n,soil_p,temp,rainfall,forecast_temp,forecast_rainfall,crop_type,plant_health,optimal_fertilizer_amount
0,reddish,7.11,21.0,22.41,34.02,66.09,31.6,165.97,rice,yellowing,119.5
1,reddish,6.28,46.86,21.38,29.6,176.08,28.01,99.1,corn,healthy,132.19
2,dark brown,6.02,25.59,49.18,34.64,3.7,28.34,168.56,rice,wilting,101.2
3,dark brown,6.02,20.32,23.2,28.79,111.79,34.43,160.93,rice,wilting,99.53
4,light brown,6.04,31.04,17.09,25.59,113.12,28.82,256.25,rice,wilting,96.68


#### Save dummy data to csv

In [22]:
df.to_csv('../data/dummy_data.csv', index=False)