## Data Generation

In this notebook, we generate a new data set to train the Agriculture_Optimization model.
The new dataset comprises of foods that can be grown locally in small gardens at home and are used daily in the household. 

In [1]:
#importing libraries
import pandas as pd 
import numpy as np

In [2]:
# Outline of data collected
crop_data= {'plantain': {'N': [150, 200], 'P': [40, 60], 'K': [200, 300], 'temperature': [26, 30], 'humidity': [80, 90], 'ph': [5.5, 7.0], 'rainfall': [60, 100]},
'tomato': {'N': [120, 150], 'P': [40, 60], 'K': [120, 150], 'temperature': [18, 32], 'humidity': [65, 85], 'ph': [6.0, 6.8], 'rainfall': [50, 60]},
'pepper': {'N': [120, 150], 'P': [30, 50], 'K': [150, 200], 'temperature': [21, 27], 'humidity': [60, 70], 'ph': [6.0, 7.0], 'rainfall': [40, 80]}, 
'ugwu': {'N': [100, 150], 'P': [30, 50], 'K': [150, 200], 'temperature': [21, 27], 'humidity': [60, 70], 'ph': [6.0, 7.0], 'rainfall': [40, 80]}, 
'potato': {'N': [100, 150], 'P': [20, 30], 'K': [150, 200], 'temperature': [15, 21], 'humidity': [40, 60], 'ph': [4.5, 6.0], 'rainfall': [60, 100]}, 
'corn': {'N': [120, 150], 'P': [60, 80], 'K': [150, 200], 'temperature': [21, 29], 'humidity': [60, 70], 'ph': [6.0, 7.0], 'rainfall': [60, 100]}, 
'ginger': {'N': [60, 100], 'P': [20, 30], 'K': [40, 60], 'temperature': [30, 35], 'humidity': [50, 70], 'ph': [5.5, 6.5], 'rainfall': [59, 100]}}

#creating dataframe for the data
df = pd.DataFrame()

#creating for loop to loop through data for different crops
for crop, condition in crop_data.items():
    df_label= pd.DataFrame(data=[crop],columns=['label'])
    df_lab=pd.concat([df_label]*100, ignore_index=True)

    # Generating dataset from range of conditions for each crop
    
    #nitrogen
    df_nit = pd.DataFrame(np.random.randint(condition['N'][0], condition['N'][1],size=(100, 1)), columns=['Nitrogen'])
    #phophorus
    df_pho = pd.DataFrame(np.random.randint(condition['P'][0], condition['P'][1],size=(100, 1)), columns=['Phosphorus'])
    #potassium
    df_k = pd.DataFrame(np.random.randint(condition['K'][0], condition['K'][1],size=(100, 1)), columns=['Potassium'])
    #temperature
    df_temp = pd.DataFrame(np.random.uniform(condition['temperature'][0], condition['temperature'][1],size=(100, 1)), columns=['temperature'])
    #ph
    df_ph = pd.DataFrame(np.random.uniform(condition['ph'][0], condition['ph'][1],size=(100, 1)), columns=['ph'])
    #humidity
    df_hum = pd.DataFrame(np.random.uniform(condition['humidity'][0], condition['humidity'][1],size=(100, 1)), columns=['humidity'])
    #rainfall
    df_rain = pd.DataFrame(np.random.uniform(condition['rainfall'][0], condition['rainfall'][1],size=(100, 1)), columns=['rainfall'])

    #combining the columns into one dataframe
    frames = [df_nit,df_pho,df_k,df_temp,df_hum,df_ph,df_rain,df_lab]
    df_combine = pd.concat(frames,axis=1, join='inner')

    df = pd.concat([df, df_combine], ignore_index = True)

df.shape

(700, 8)

In [3]:
# Importing existing data and combining it with newly created data
data = pd.read_csv('Agric_dataset.csv')

final_data = pd.concat([df, data], ignore_index = True)

In [4]:
final_data.shape

(3500, 8)

In [5]:
# Exporting data to csv
final_data.to_csv('Updated_Agric_dataset.csv', index = False, encoding='utf-8')