### Model

In [1]:
import pandas as pd
import numpy as np

In [2]:
avo_data = pd.read_csv("avocado.csv")

In [3]:
avo_data.drop(["Unnamed: 0",'4046', '4225','4770' ],  axis = 1, inplace = True)

In [4]:
cat = avo_data.select_dtypes(include = ["object"]).columns

cat = list(cat)

cat

['Date', 'type', 'region']

In [5]:
from sklearn.preprocessing import LabelEncoder

label = LabelEncoder()

avo_data['type'] = label.fit_transform(avo_data['type'])

avo_data['region'] = label.fit_transform(avo_data['region'])

avo_data['Date'] = label.fit_transform(avo_data['Date'])

In [6]:
avo_data = avo_data.astype(float)

In [7]:
Q1 = avo_data.quantile(0.25)

Q3 = avo_data.quantile(0.75)


IQR = Q3 - Q1

print(IQR)


Date                84.00
AveragePrice         0.56
Total Volume    422123.71
Total Bags      105694.73
Small Bags       80488.25
Large Bags       21901.78
XLarge Bags        132.50
type                 1.00
year                 2.00
region              27.00
dtype: float64


In [8]:
avo_data = avo_data[~((avo_data < (Q1 - 1.5*IQR)) | (avo_data > (Q3 + 1.5*IQR))).any(axis = 1)]

In [9]:
features = avo_data[["year", "region", "type", "Total Volume", "Total Bags"]]

In [10]:
target = avo_data["AveragePrice"]

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 0)

In [12]:
from sklearn.feature_selection import SelectKBest
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestRegressor

In [13]:
forest = make_pipeline(SelectKBest(k = "all"), StandardScaler(), 
                       RandomForestRegressor(max_depth = 150, n_jobs = -1, random_state = 64))

In [14]:
forest.fit(X_train, y_train)


Pipeline(steps=[('selectkbest', SelectKBest(k='all')),
                ('standardscaler', StandardScaler()),
                ('randomforestregressor',
                 RandomForestRegressor(max_depth=150, n_jobs=-1,
                                       random_state=64))])

In [15]:
# import sklearn_json as skjson

# skjson.to_json(forest, "model_j.json")


In [16]:
import pickle as pkl

pkl.dump(forest, open("model.pkl", "wb"))