In [35]:
import pandas as pd
import numpy as np 
import joblib
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import cross_val_score


In [36]:
# training land price predictoion dataset
data = pd.read_csv('/media/prince/5A4E832F4E83034D/testing /new_train.csv')

In [37]:
# seprating features and labels
land_prices = data['land_value']
data = data.drop('land_value', axis=1)

In [38]:
# creating pipline function 
def build_pipline(num_attribs, cat_atribs):
    # lets make the pipline 
    num_pipline = Pipeline([
        ("imputer" , SimpleImputer(strategy="median")),
        ("scaler" , StandardScaler())
    ])

    cat_pipline = Pipeline([
        ("onehot" , OneHotEncoder(handle_unknown="ignore"))
    ])

    # constructing full pipline 
    full_pipline = ColumnTransformer([
        ("num" , num_pipline, num_attribs),
        ("cat", cat_pipline, cat_atribs)    
        ])
    return full_pipline

In [39]:
# seprating attribs
num_attribs = data.drop('ocean_proximity', axis=1).columns.to_list()
cat_attribs = ['ocean_proximity']

# building pipline
full_pipline = build_pipline(num_attribs, cat_attribs)
data_prepared = full_pipline.fit_transform(data)

In [40]:
MODEL_FILE = "model.pkl"
PIPLINE_FILE = "pipline.pkl"

In [41]:
# training the models 
# choosing random forrest 
model = RandomForestRegressor()
model.fit(data_prepared, land_prices)
joblib.dump(model , MODEL_FILE)
joblib.dump(full_pipline , PIPLINE_FILE)
print("done")

done


In [43]:
model = joblib.load(MODEL_FILE)
pipline = joblib.load(PIPLINE_FILE)
input_data = pd.read_csv('input.csv')
transformed_input = pipline.transform(input_data)
prediction = model.predict(transformed_input)
input_data['land_value'] = prediction
input_data.to_csv('output.csv', index = False)
print("inference is completed")

inference is completed
