## Import Libraries

In [4]:
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
#from tensorflow.keras.layers.experimental import preprocessing


## Data 

In [2]:
# Read in data as pandas dataframe and display first 5 rows
data = pd.read_csv('final-data-edited-nozero.csv')
data.describe()
# One-hot encode categorical features


Unnamed: 0,Average_avg-Temp,Average-Min Temp,Average-max-temp,avg-precipitation,avg-windSpeed,pH,Clay,Sand,Silt,Crop Yield,Hectare
count,1827.0,1827.0,1827.0,1827.0,1827.0,1827.0,1827.0,1827.0,1827.0,1827.0,1827.0
mean,26.322562,21.778559,30.868208,151.651866,1.598867,5.647546,23.849206,58.855866,14.915161,1.060098,0.636918
std,0.897819,1.005999,1.107627,41.440256,0.282128,0.320397,3.56462,6.863483,4.617212,1.029985,0.449801
min,21.258682,15.489182,26.763396,35.333333,1.283292,4.833333,8.166667,32.5,4.5,0.012362,0.005778
25%,26.05944,21.47875,30.367958,115.333333,1.414682,5.416667,21.833333,54.333333,11.5,0.466581,0.413982
50%,26.410911,21.993594,30.661375,150.25,1.500635,5.666667,24.0,59.333333,14.666667,0.709389,0.545489
75%,26.731812,22.374299,31.271596,180.041667,1.655992,5.866667,26.166667,64.0,18.0,1.332915,0.669091
max,29.500161,23.64066,36.186615,314.125,2.814604,6.783333,40.333333,81.333333,33.0,5.487014,2.622656


### Prepare Data

In [3]:
# one hot encode state variable
data = pd.get_dummies(data)
features.replace({False: 0, True: 1}, inplace=True)
data.head(5)

Unnamed: 0,Average_avg-Temp,Average-Min Temp,Average-max-temp,avg-precipitation,avg-windSpeed,pH,Clay,Sand,Silt,Crop Yield,...,State_Kebbi,State_Kwara,State_Lagos,State_Ogun,State_Ondo,State_Osun,State_Oyo,State_Plateau,State_Rivers,State_Taraba
0,21.258682,15.489182,27.02874,107.791667,2.258682,5.833333,29.5,43.166667,25.833333,2.603023,...,0,0,0,0,0,0,0,1,0,0
1,21.287979,15.765146,26.809448,136.270833,1.733292,5.4,37.833333,35.833333,25.0,2.389935,...,0,0,0,0,0,0,0,0,0,1
2,21.341573,16.420537,27.24726,103.395833,2.25851,6.2,27.5,44.5,29.333333,2.603023,...,0,0,0,0,0,0,0,1,0,0
3,21.474099,15.814495,27.124927,111.145833,2.237911,5.3,30.833333,43.666667,23.833333,2.603023,...,0,0,0,0,0,0,0,1,0,0
4,21.523187,15.900604,27.139937,101.75,2.40501,5.85,30.0,48.166667,23.833333,2.603023,...,0,0,0,0,0,0,0,1,0,0


In [4]:
#Drop Duplicate
data2 = data.drop_duplicates()

Unnamed: 0,Average_avg-Temp,Average-Min Temp,Average-max-temp,avg-precipitation,avg-windSpeed,pH,Clay,Sand,Silt,Crop Yield,...,State_Kebbi,State_Kwara,State_Lagos,State_Ogun,State_Ondo,State_Osun,State_Oyo,State_Plateau,State_Rivers,State_Taraba
0,21.258682,15.489182,27.02874,107.791667,2.258682,5.833333,29.5,43.166667,25.833333,2.603023,...,0,0,0,0,0,0,0,1,0,0
1,21.287979,15.765146,26.809448,136.270833,1.733292,5.4,37.833333,35.833333,25.0,2.389935,...,0,0,0,0,0,0,0,0,0,1
2,21.341573,16.420537,27.24726,103.395833,2.25851,6.2,27.5,44.5,29.333333,2.603023,...,0,0,0,0,0,0,0,1,0,0
3,21.474099,15.814495,27.124927,111.145833,2.237911,5.3,30.833333,43.666667,23.833333,2.603023,...,0,0,0,0,0,0,0,1,0,0
4,21.523187,15.900604,27.139937,101.75,2.40501,5.85,30.0,48.166667,23.833333,2.603023,...,0,0,0,0,0,0,0,1,0,0


In [7]:
#drop variables not selected after feature selection
data2= data2.drop(['Average_avg-Temp', 'Average-max-temp', 'Clay'], axis = 1)
# convert to float
data2 = data2.astype('float32')
# prepare target value
# Labels are the target values
labels = np.array(data2['Crop Yield'])
labels = np.reshape(labels, (-1,1))
# Remove the labels from the features
data2= data2.drop('Crop Yield', axis = 1)# axis 1 refers to the columns
# Convert to numpy array
data2 = np.array(data2)

### Split Data

In [None]:
# Split the data into training, validatio, and testing sets
train_features, val_test_features, train_labels, val_test_labels = train_test_split(data2, labels, test_size = 0.3,shuffle=True, random_state = 0)
X_val, X_test, Y_val, Y_test = train_test_split(val_test_features, val_test_labels, shuffle=True, test_size=0.5, random_state=0)
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', X_test.shape)
print('Testing Labels Shape:', Y_test.shape)
print('Validation Features Shape:', X_val.shape)
print('Validation Label Shape:', Y_val.shape)

### Model

##### DNN64

In [None]:
def create_model(data):
    model = keras.Sequential([
        data, 
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)  
    ])
    model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam(0.001))         
    return model
model64 = create_model(normalizer)
history64 = model64.fit(train_features, train_labels,validation_data=(X_val, Y_val),batch_size=100, verbose=1, epochs=60)

In [None]:
MODEL_PATH = "./DNNmodel64"
model64.save(MODEL_PATH)

##### DNN16

In [None]:
def create_model(data):
    model = keras.Sequential([
        data, 
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)  
    ])
    model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam(0.001))         
    return model
model16 = create_model(normalizer)
history16 = model16.fit(train_features, train_labels,validation_data=(X_val, Y_val),batch_size=100, verbose=1, epochs=60)

In [None]:
MODEL_PATH = "./DNNmodel16"
model16.save(MODEL_PATH)

In [None]:
print(model64.evaluate(train_features, train_labels))
print(model16.evaluate(train_features, train_labels))
print(model64.evaluate(X_val, Y_val))
print(model16.evaluate(X_val, Y_val))
print(model64.evaluate(X_test, Y_test))
print(model16.evaluate(X_test, Y_test))

#### Validate models performance through Cross-validation


In [None]:
def create_model(data, unit):
    model = keras.Sequential([
        data, 
        layers.Dense(64, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(unit, activation='relu'),
        layers.Dense(1)  
    ])
    model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam(0.001))         
    return model


units = [64,16]
historys = []
score = []

for i in range(0,10):
    print('*********************************************************************************************')
    # Using Skicit-learn to split data into training and testing sets
    from sklearn.model_selection import train_test_split

    # Split the data into training and testing sets
    train_features, val_test_features, train_labels, val_test_labels = train_test_split(data2, labels, test_size = 0.3,
                                                                              shuffle=True, random_state = i)
    X_val, X_test, Y_val, Y_test = train_test_split(val_test_features, val_test_labels, shuffle=True, test_size=0.5, random_state=i)
    
    normalizer = preprocessing.Normalization(axis=-1)
    normalizer.adapt(np.array(train_features))
   
    for unit in units:
        #Early stopping
        es = EarlyStopping(monitor='val_loss', mode='min', patience=7,  restore_best_weights=True)
        #rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=5, min_lr=0.01)
        
        model = create_model(normalizer, unit)
        history = model.fit(train_features, train_labels,validation_data=(X_val, Y_val),batch_size=100, verbose=1, epochs=60, callbacks=es)
        historys.append(history)
        score1 = model.evaluate(train_features, train_labels, verbose=1)
        score.append(score1)
        print('&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&')
    i += 1
#print('Mean MAE: %.3f (%.3f)' % (score.mean(), score.std()) )

In [204]:
layer = []
for i in range(0,10):
    for unit in units:
        layer.append(unit)
        
        
L = pd.DataFrame(layer)
B = pd.DataFrame(score)
C = pd.concat([L, B], axis=1)


C.columns = ["layer", "score"]
C = C.sort_values('layer')

C.to_csv('scorebylayer.csv')

## Testing

#### Generalisation to unseen set

In [4]:
model64 = tf.keras.models.load_model('DNNmodel64')
model16 = tf.keras.models.load_model('DNNmodel16')

In [26]:
Test = pd.DataFrame(X_test)

In [32]:
tt = np.array(X_test.reshape(245,30))

In [46]:
ttpred64 = model64.predict(np.array(tt).reshape(245,30))



In [51]:
ttpred16 = model16.predict(np.array(tt).reshape(245,30))



In [41]:
pred = pd.DataFrame(ttpred64)
yact = pd.DataFrame(Y_test)

In [43]:
all64 = pd.concat([pred, yact], axis=1)
all64.columns = ["y_pred", "y_actual"]
all64.to_csv('predicted64&actual.csv')

In [52]:
pred = pd.DataFrame(ttpred16)
yact = pd.DataFrame(Y_test)

In [53]:
all16 = pd.concat([pred, yact], axis=1)
all16.columns = ["y_pred", "y_actual"]
all16.to_csv('predicted16&actual.csv')

#### Generalisation to Unforeseen set

###### change to precipitation

In [None]:
tf = np.array([ 21.69209,   13.52083,     1.4988489,   5.4666667,  59.833332,   10.166667,   0.5454889,   0. ,         0.,          0.,          0.,          0.,   0.,          0.,          0.,          0.,          0.,          0.,1.,          0.,          0.,          0.,          0.,          0.,  0.,          0.,          0.,          0.,          0. ,         0. ])
model16.predict(np.array(tf).reshape(1,30))

In [None]:
tf = np.array([ 21.69209,   13.52083,     1.4988489,   5.4666667,  59.833332,   10.166667,   0.5454889,   0. ,         0.,          0.,          0.,          0.,   0.,          0.,          0.,          0.,          0.,          0.,1.,          0.,          0.,          0.,          0.,          0.,  0.,          0.,          0.,          0.,          0. ,         0. ])
model64.predict(np.array(tf).reshape(1,30))

In [None]:
tf2 = np.array([16.685463 , 9.125 ,     2.417177,   5.5666666 ,35.5   ,    27.333334,  1.6867675,  0.  ,       0.    ,     0. ,        0.         ,0.,  0.  ,       0.   ,      0.   ,      0.   ,      0.   ,      0.,  0.  ,       0.   ,      0.    ,     0.    ,     0.    ,     0.,  0.   ,      0.   ,      0.  ,       1.    ,     0.  ,       0.       ])
model16.predict(np.array(tf2).reshape(1,30))

In [None]:
tf2 = np.array([16.685463 , 9.125 ,     2.417177,   5.5666666 ,35.5   ,    27.333334,  1.6867675,  0.  ,       0.    ,     0. ,        0.         ,0.,  0.  ,       0.   ,      0.   ,      0.   ,      0.   ,      0.,  0.  ,       0.   ,      0.    ,     0.    ,     0.    ,     0.,  0.   ,      0.   ,      0.  ,       1.    ,     0.  ,       0.       ])
model64.predict(np.array(tf2).reshape(1,30))

##### change silt

In [None]:
tf = np.array([ 21.69209,   133.52083,     1.4988489,   5.4666667,  59.833332,   29.166667,   0.5454889,   0. ,         0.,          0.,          0.,          0.,   0.,          0.,          0.,          0.,          0.,          0.,1.,          0.,          0.,          0.,          0.,          0.,  0.,          0.,          0.,          0.,          0. ,         0. ])
model64.predict(np.array(tf).reshape(1,30))

In [None]:
tf2 = np.array([16.685463 , 99.125 ,     2.417177,   5.5666666 ,35.5   ,    50.333334,  1.6867675,  0.  ,       0.    ,     0. ,        0.         ,0.,  0.  ,       0.   ,      0.   ,      0.   ,      0.   ,      0.,  0.  ,       0.   ,      0.    ,     0.    ,     0.    ,     0.,  0.   ,      0.   ,      0.  ,       1.    ,     0.  ,       0.       ])
model16.predict(np.array(tf2).reshape(1,30))

In [None]:
tf2 = np.array([16.685463 , 99.125 ,     2.417177,   5.5666666 ,35.5   ,    50.333334,  1.6867675,  0.  ,       0.    ,     0. ,        0.         ,0.,  0.  ,       0.   ,      0.   ,      0.   ,      0.   ,      0.,  0.  ,       0.   ,      0.    ,     0.    ,     0.    ,     0.,  0.   ,      0.   ,      0.  ,       1.    ,     0.  ,       0.       ])
model64.predict(np.array(tf2).reshape(1,30))