<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/ageron/handson-ml3/blob/main/02_end_to_end_machine_learning_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  </td>
  <td>
    <a target="_blank" href="https://kaggle.com/kernels/welcome?src=https://github.com/ageron/handson-ml3/blob/main/02_end_to_end_machine_learning_project.ipynb"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" /></a>
  </td>
</table>

# Import the packege

In [56]:
import shap
import time
import logging
import warnings
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_validate, RepeatedKFold, GridSearchCV, KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder , LabelEncoder
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, median_absolute_error
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression, RFECV, SelectFromModel
from sklearn.decomposition import PCA
from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.multioutput import MultiOutputRegressor
from scipy.stats import wilcoxon, bootstrap
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer
from scipy.stats import wilcoxon, bootstrap, uniform, randint
import joblib
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation
from tensorflow.keras.utils import to_categorical

In [57]:
warnings.filterwarnings('ignore')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
SEED = 42
np.random.seed(SEED)

In [58]:
df = pd.read_csv('../../data/train.csv')

In [59]:
df = df.drop('id', axis=1)

In [60]:
df.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,37,70,36,Clayey,Sugarcane,36,4,5,28-28
1,27,69,65,Sandy,Millets,30,6,18,28-28
2,29,63,32,Sandy,Millets,24,12,16,17-17-17
3,35,62,54,Sandy,Barley,39,12,4,10-26-26
4,35,58,43,Red,Paddy,37,2,16,DAP


In [61]:
df.shape

(750000, 9)

In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750000 entries, 0 to 749999
Data columns (total 9 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   Temparature      750000 non-null  int64 
 1   Humidity         750000 non-null  int64 
 2   Moisture         750000 non-null  int64 
 3   Soil Type        750000 non-null  object
 4   Crop Type        750000 non-null  object
 5   Nitrogen         750000 non-null  int64 
 6   Potassium        750000 non-null  int64 
 7   Phosphorous      750000 non-null  int64 
 8   Fertilizer Name  750000 non-null  object
dtypes: int64(6), object(3)
memory usage: 51.5+ MB


In [63]:
categorical_cols = ['Soil Type', 'Crop Type']
df_encoded = pd.get_dummies(df, columns=categorical_cols)

In [64]:
df_encoded

Unnamed: 0,Temparature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous,Fertilizer Name,Soil Type_Black,Soil Type_Clayey,Soil Type_Loamy,...,Crop Type_Cotton,Crop Type_Ground Nuts,Crop Type_Maize,Crop Type_Millets,Crop Type_Oil seeds,Crop Type_Paddy,Crop Type_Pulses,Crop Type_Sugarcane,Crop Type_Tobacco,Crop Type_Wheat
0,37,70,36,36,4,5,28-28,False,True,False,...,False,False,False,False,False,False,False,True,False,False
1,27,69,65,30,6,18,28-28,False,False,False,...,False,False,False,True,False,False,False,False,False,False
2,29,63,32,24,12,16,17-17-17,False,False,False,...,False,False,False,True,False,False,False,False,False,False
3,35,62,54,39,12,4,10-26-26,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,35,58,43,37,2,16,DAP,False,False,False,...,False,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
749995,25,69,30,8,16,6,28-28,False,True,False,...,False,False,True,False,False,False,False,False,False,False
749996,37,64,58,38,8,20,17-17-17,False,False,True,...,False,False,False,False,False,False,False,True,False,False
749997,35,68,59,6,11,29,10-26-26,False,False,False,...,False,True,False,False,False,False,False,False,False,False
749998,31,68,29,9,11,12,20-20,False,False,False,...,True,False,False,False,False,False,False,False,False,False


In [66]:
label_encoder = LabelEncoder()
df['Fertilizer Name'] = label_encoder.fit_transform(df['Fertilizer Name'])

In [71]:
df['Fertilizer Name'].head()

0    4
1    4
2    2
3    0
4    5
Name: Fertilizer Name, dtype: int64

In [82]:
X = df_encoded.drop('Fertilizer Name', axis=1).values
y = df['Fertilizer Name'].values
y_cat = to_categorical(y)

In [83]:
y_cat

array([[0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [84]:
X.shape[1]

22

In [88]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)

In [92]:
model = Sequential([
	Dense(128, input_shape=(X.shape[1],)),
	BatchNormalization(),
	Activation('relu'),
	Dense(62),
	BatchNormalization(),
	Activation('relu'),
 	Dense(32),
	BatchNormalization(),
	Activation('relu'),
 	Dense(y_cat.shape[1], activation='softmax')
])  