In [1]:
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
scaler = MinMaxScaler()
dataset_path = "drive/MyDrive/crop_yield/"
dataset = pd.read_csv("{}cleaned_dataset.csv".format(dataset_path))

In [3]:
dataset.head(2)

Unnamed: 0.1,Unnamed: 0,county,year,area (HA),production (MT),yield (MT/HA),crop
0,0,Baringo,2012,1110.0,906.0,0.82,sorghum
1,1,Bomet,2012,426.0,584.0,1.37,sorghum


In [4]:
dataset = dataset.rename(columns={
    "county": "county", 
    "year": "year", 
    "area (HA)": "area", 
    "production (MT)": "production", 
    "yield (MT/HA)": "yield", 
    "crop": "crop"
})
dataset.head(2)

Unnamed: 0.1,Unnamed: 0,county,year,area,production,yield,crop
0,0,Baringo,2012,1110.0,906.0,0.82,sorghum
1,1,Bomet,2012,426.0,584.0,1.37,sorghum


In [5]:
crop_ne = LabelEncoder()
county_ne = LabelEncoder()

In [6]:
dataset['crop'] = crop_ne.fit_transform(dataset['crop'])
dataset['county'] = county_ne.fit_transform(dataset['county'])

In [7]:
dataset.head()

Unnamed: 0.1,Unnamed: 0,county,year,area,production,yield,crop
0,0,0,2012,1110.0,906.0,0.82,3
1,1,1,2012,426.0,584.0,1.37,3
2,2,2,2012,1034.0,1154.0,1.12,3
3,3,3,2012,7683.0,8627.0,1.12,3
4,4,5,2012,1187.0,2448.0,2.06,3


In [8]:
scaling_cols = ['year', 'area', 'production', 'yield']
dataset[scaling_cols] = pd.DataFrame(scaler.fit_transform(dataset[scaling_cols]), 
                                     columns=scaling_cols)
dataset = dataset.interpolate()
dataset.head()

Unnamed: 0.1,Unnamed: 0,county,year,area,production,yield,crop
0,0,0,0.0,0.014579,0.035588,0.046042,3
1,1,1,0.0,0.005595,0.02294,0.076923,3
2,2,2,0.0,0.013581,0.04533,0.062886,3
3,3,3,0.0,0.100913,0.338872,0.062886,3
4,4,5,0.0,0.015591,0.096158,0.115665,3


In [9]:
SEED = 42
X_train, X_test, y_train, y_test = train_test_split(dataset[['county', 'crop', 'year', 'area']], 
                                                    dataset[['production', 'yield']], 
                                                    test_size=0.2, 
                                                    random_state=SEED)

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(4,)), 
    tf.keras.layers.Dense(128, activation='relu'), 
    tf.keras.layers.Dense(128, activation='relu'), 
    tf.keras.layers.Dense(64, activation='relu'), 
    tf.keras.layers.Dense(2)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [12]:
model.fit(X_train, y_train, epochs=500, validation_data=(X_test, y_test), verbose=2)

Epoch 1/500
23/23 - 2s - loss: 0.2493 - mae: 0.2361 - val_loss: 0.0104 - val_mae: 0.0521 - 2s/epoch - 102ms/step
Epoch 2/500
23/23 - 0s - loss: 0.0084 - mae: 0.0540 - val_loss: 0.0083 - val_mae: 0.0467 - 144ms/epoch - 6ms/step
Epoch 3/500
23/23 - 0s - loss: 0.0070 - mae: 0.0472 - val_loss: 0.0079 - val_mae: 0.0462 - 137ms/epoch - 6ms/step
Epoch 4/500
23/23 - 0s - loss: 0.0071 - mae: 0.0475 - val_loss: 0.0096 - val_mae: 0.0541 - 164ms/epoch - 7ms/step
Epoch 5/500
23/23 - 0s - loss: 0.0085 - mae: 0.0587 - val_loss: 0.0094 - val_mae: 0.0519 - 152ms/epoch - 7ms/step
Epoch 6/500
23/23 - 0s - loss: 0.0064 - mae: 0.0425 - val_loss: 0.0101 - val_mae: 0.0570 - 149ms/epoch - 6ms/step
Epoch 7/500
23/23 - 0s - loss: 0.0070 - mae: 0.0478 - val_loss: 0.0078 - val_mae: 0.0424 - 139ms/epoch - 6ms/step
Epoch 8/500
23/23 - 0s - loss: 0.0066 - mae: 0.0445 - val_loss: 0.0073 - val_mae: 0.0377 - 139ms/epoch - 6ms/step
Epoch 9/500
23/23 - 0s - loss: 0.0058 - mae: 0.0405 - val_loss: 0.0074 - val_mae: 0.0401 

<keras.callbacks.History at 0x7f2d20011910>

In [13]:
loss, mae = model.evaluate(X_test, y_test)



In [14]:
import pickle

In [19]:
def save_components(target_dir: str):
  tf.keras.models.save_model(
    model,
    "{}model-v1/".format(target_dir),
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None,
    save_traces=True
  )
  pickle.dump(crop_ne, open("{}cropname_encoder.sav".format(target_dir), "wb"))
  pickle.dump(county_ne, open("{}county_encoder.sav".format(target_dir), "wb"))
  pickle.dump(scaler, open("{}scaler.sav".format(target_dir), "wb"))


In [20]:
save_components(dataset_path)



In [39]:
model.predict([[1, 2, 1, 1]])



array([[0.7330933, 0.0353546]], dtype=float32)

In [59]:
test = {
    "county": "Baringo", 
    "crop": "wheat", 
    "year": 2019, 
    "area": 1
}

loaded_scaler = pickle.load(open("{}scaler.sav".format(dataset_path), "rb"))
loaded_cropname_encoder = pickle.load(open("{}cropname_encoder.sav".format(dataset_path), "rb"))
loaded_countyname_encoder = pickle.load(open("{}county_encoder.sav".format(dataset_path), "rb"))
loaded_model = tf.keras.models.load_model("{}model-v1/".format(dataset_path))

def make_prediction(query: dict):
  year, area, _, _ = list(loaded_scaler.transform([[query["year"], 
                                                    query["area"], 
                                                    0, 0]])[0])
  crop = loaded_cropname_encoder.transform([query["crop"]])
  county = loaded_countyname_encoder.transform([query["county"]])
  inputs = np.array([[county[0], crop[0], year, area]], dtype=np.float32)
  preds = loaded_model.predict(inputs)
  raw_prod_res, raw_yield_res = tuple(preds[0])
  outputs = loaded_scaler.inverse_transform([[0, 0, raw_prod_res, raw_yield_res]])
  _, _, production, yields = list(outputs[0])
  return (production, yields)

In [60]:
make_prediction(test)





(23.129151429980997, 3.26003144711256)