In [1]:
import pandas as pd
import numpy as np
from pathlib import PurePosixPath, PureWindowsPath

In [2]:
def get_directory(path):
    if os.name == 'posix':
        return str(PurePosixPath(path))
    else:
        return str(PureWindowsPath(path))

In [3]:
# Variables
FERTILIZER_LOCATION = get_directory('./dataset/fertilizer')

In [4]:
_fert = pd.read_csv(get_directory(FERTILIZER_LOCATION + '/Crop_recommendation.csv'))

In [5]:
_fert.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [6]:
# Statistics
print(_fert.describe())

                 N            P            K  temperature     humidity  \
count  2200.000000  2200.000000  2200.000000  2200.000000  2200.000000   
mean     50.551818    53.362727    48.149091    25.616244    71.481779   
std      36.917334    32.985883    50.647931     5.063749    22.263812   
min       0.000000     5.000000     5.000000     8.825675    14.258040   
25%      21.000000    28.000000    20.000000    22.769375    60.261953   
50%      37.000000    51.000000    32.000000    25.598693    80.473146   
75%      84.250000    68.000000    49.000000    28.561654    89.948771   
max     140.000000   145.000000   205.000000    43.675493    99.981876   

                ph     rainfall  
count  2200.000000  2200.000000  
mean      6.469480   103.463655  
std       0.773938    54.958389  
min       3.504752    20.211267  
25%       5.971693    64.551686  
50%       6.425045    94.867624  
75%       6.923643   124.267508  
max       9.935091   298.560117  


In [7]:
# Discretisation
_fert_labels = _fert.drop_duplicates(subset='label').label
_iteration = 0
LABEL_DICTIONARY = {}
for i in _fert_labels:
    LABEL_DICTIONARY[i] = _iteration
    _iteration += 1

# Applying discretisation
_fert['label'] = _fert['label'].apply(lambda x: LABEL_DICTIONARY[x])
_fert

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,0
1,85,58,41,21.770462,80.319644,7.038096,226.655537,0
2,60,55,44,23.004459,82.320763,7.840207,263.964248,0
3,74,35,40,26.491096,80.158363,6.980401,242.864034,0
4,78,42,42,20.130175,81.604873,7.628473,262.717340,0
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,21
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,21
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,21
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,21


In [8]:
# Normalisation and discretisation
_fert['ph'] = _fert['ph'] / 14      # PH
_fert['humidity'] = _fert['humidity'] / 100
_fert['rainfall'] = _fert['rainfall'] / 300
_fert['N'] = _fert['N'] / 140
_fert['P'] = _fert['P'] / 145
_fert['K'] = _fert['K'] / 205
_fert['temperature'] = _fert['temperature'] / 50
_fert['label'] = _fert['label'] / (_iteration - 1)

_fert

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,0.642857,0.289655,0.209756,0.417595,0.820027,0.464499,0.676452,0.0
1,0.607143,0.400000,0.200000,0.435409,0.803196,0.502721,0.755518,0.0
2,0.428571,0.379310,0.214634,0.460089,0.823208,0.560015,0.879881,0.0
3,0.528571,0.241379,0.195122,0.529822,0.801584,0.498600,0.809547,0.0
4,0.557143,0.289655,0.204878,0.402603,0.816049,0.544891,0.875724,0.0
...,...,...,...,...,...,...,...,...
2195,0.764286,0.234483,0.156098,0.535493,0.664133,0.484290,0.592582,1.0
2196,0.707143,0.103448,0.131707,0.548342,0.566364,0.434780,0.426415,1.0
2197,0.842857,0.227586,0.146341,0.482636,0.672251,0.454472,0.577743,1.0
2198,0.835714,0.220690,0.165854,0.525448,0.521274,0.482771,0.423918,1.0


In [9]:
# Label set
_fert_N = _fert.pop('N')
_fert_P = _fert.pop('P')
_fert_K = _fert.pop('K')

_fert   # This is our input dataset.

Unnamed: 0,temperature,humidity,ph,rainfall,label
0,0.417595,0.820027,0.464499,0.676452,0.0
1,0.435409,0.803196,0.502721,0.755518,0.0
2,0.460089,0.823208,0.560015,0.879881,0.0
3,0.529822,0.801584,0.498600,0.809547,0.0
4,0.402603,0.816049,0.544891,0.875724,0.0
...,...,...,...,...,...
2195,0.535493,0.664133,0.484290,0.592582,1.0
2196,0.548342,0.566364,0.434780,0.426415,1.0
2197,0.482636,0.672251,0.454472,0.577743,1.0
2198,0.525448,0.521274,0.482771,0.423918,1.0


In [10]:
import tensorflow as tf
dataset_N = tf.data.Dataset.from_tensor_slices((_fert.values, _fert_N.values))
dataset_N = dataset_N.shuffle(len(_fert)).batch(1)

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [11]:
# Define model
predict_N = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation=tf.nn.relu),
    tf.keras.layers.Dense(1),
    tf.keras.layers.Lambda(lambda x: x * 140)
])
predict_N.compile(
    optimizer = 'adam',
    loss = tf.keras.losses.MAE,
    metrics = ['acc']
)

predict_N.fit(dataset_N, epochs = 15)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x1b9852bb490>