In [1]:
#importing for system
import os, sys, pickle
from dataclasses import dataclass
import pandas as pd
import numpy as np

### Production code Deployment Steps

- Training pipeline
1. Data Ingestion
2. Data Transformation
3. Model Evaulation 

- Predicted pipeline

- Running in app

# Training Pipeline

# 1. Data Ingestion

In [2]:
# For defining dataclass with Dataingestion config
@dataclass
class DataIngestionconfig:
    train_data_path:str = os.path.join("artifacts", "train.csv")
    test_data_path:str = os.path.join("artifacts", "test.csv")
    raw_data_path:str = os.path.join("artifacts", "raw.csv")

In [3]:
#for data ingestion step 
from sklearn.model_selection import train_test_split

In [4]:
#Data ingestion class

class DataIngestion:
    def __init__(self):
        self.ingestion_config = DataIngestionconfig()
        
    
    def initiate_data_ingestion(self):        
        
        df = pd.read_csv(os.path.join("data", "mushrooms.csv"))
        df["class"] = df["class"].apply(lambda x: {"p" : 1, "e": 0}[x])
        
        os.makedirs(os.path.dirname(self.ingestion_config.raw_data_path), exist_ok= True)
        df.to_csv(self.ingestion_config.raw_data_path, index = False)
        train_set, test_set = train_test_split(df, test_size = 0.3, random_state = 10)
        
        train_set.to_csv(self.ingestion_config.train_data_path, index = False, header = "True")
        test_set.to_csv(self.ingestion_config.test_data_path, index = False, header = True)
        
        return(
            self.ingestion_config.train_data_path,
            self.ingestion_config.test_data_path
        )

In [5]:
#Function for saving the object
def save_object(file_path, obj):
    dir_path = os.path.dirname(file_path)
    
    os.makedirs(dir_path, exist_ok= True)
    
    with open(file_path, "wb") as file_obj:
        pickle.dump(obj, file_obj)

# 2. Data Transformation

In [6]:
#Data class for data transformation
@dataclass
class DataTransformationConfig:
    preprocessor_obj_file_path = os.path.join("artifacts", "preprocessor.pkl")

In [7]:
# For Data transformation
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer

In [8]:
#Class for Data Transformation
class DataTransformation:
    
    def __init__(self):
        self.data_transformation_config = DataTransformationConfig()
        
    def get_data_transformation_object(self):

        lab_cols = ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
                    'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
                    'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
                    'stalk-surface-below-ring', 'stalk-color-above-ring',
                    'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
                    'ring-type', 'spore-print-color', 'population', 'habitat']
        
        cap_shape = ['b','c','x','f','k','s']
        cap_surface = ['f','g','y','s']
        cap_color = ['n','b','c','g','r','p','u','e','w','y']
        bruises = ['t','f']
        odor = ['a','l','c','y','f','m','n','p','s']
        gill_attachment = ['a','f']
        gill_spacing = ['c','w']
        gill_size = ['b','n']
        gill_color = ['k','n','b','h','g','r','o','p','u','e','w','y']
        stalk_shape = ['e','t']
        stalk_root = ['b','c','e','r','?']
        stalk_surface_above_ring = ['f','y','k','s']
        stalk_surface_below_ring = ['f','y','k','s']
        stalk_color_above_ring = ['n','b','c','g','o','p','e','w','y']
        stalk_color_below_ring = ['n','b','c','g','o','p','e','w','y']
        veil_type = ['p']
        veil_color = ['w','n','o','y']
        ring_number = ['n','o','t']
        ring_type = ['e','f','l','n','p']
        spore_print_color = ['k','n','b','h','r','o','u','w','y']
        population = ['a','c','n','s','v','y']
        habitat = ['g','l','m','p','u','w','d']
        
        target_pipeline = Pipeline(
            steps = [
                ("ordinalencoder", OrdinalEncoder(categories=[cap_shape, cap_surface, cap_color, bruises, odor, gill_attachment, gill_spacing, 
                                                                gill_size, gill_color, stalk_shape, stalk_root, stalk_surface_above_ring,
                                                                stalk_surface_below_ring, stalk_color_above_ring, stalk_color_below_ring, veil_type,
                                                                veil_color,ring_number, ring_type, spore_print_color, population, habitat])),
                                                                
                ("PCA", PCA(n_components=10))
            ]
        )
        
        preprocessor = ColumnTransformer([
            ("lab_pipeline", target_pipeline, lab_cols)
        ])
        
        return preprocessor
                        
    def initiate_data_transformation(self, train_path, test_path):

        train_df = pd.read_csv(train_path)
        test_df = pd.read_csv(test_path)

        preprocessing_obj = self.get_data_transformation_object()
        
        target_column = "class"
        drop_columns = [target_column]
        
        input_feature_train_df = train_df.drop(columns = drop_columns, axis = 1)
        target_feature_train_df = train_df[target_column]
        
        input_feature_test_df = test_df.drop(columns = drop_columns, axis = 1)
        target_feature_test_df = test_df[target_column]
        
        input_feature_train_arr = preprocessing_obj.fit_transform(input_feature_train_df)
        input_feature_test_arr = preprocessing_obj.transform(input_feature_test_df)
        
        train_arr = np.c_[input_feature_train_arr, np.array(target_feature_train_df)]
        test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)]
        
        save_object(
            file_path=self.data_transformation_config.preprocessor_obj_file_path,
            obj = preprocessing_obj
        )
        
        return (
            train_arr,
            test_arr,
            self.data_transformation_config.preprocessor_obj_file_path
        ) 

In [9]:
#for accuracy score
from sklearn.metrics import accuracy_score

In [10]:
#Function for evaluating the model
def evaluate_model(X_train, y_train, X_test, y_test, models):
    
    report = {}
    for i in range(len(models)):
        model = list(models.values())[i]
        
        model.fit(X_train, y_train)
        
        #Predicting value
        y_test_pred = model.predict(X_test)
        
        #getting accuracy score
        test_model_score = accuracy_score(y_test, y_test_pred)
        
        report[list(models.keys())[i]] = test_model_score
        
    return report

# 3. Model Trainer

In [11]:
# For model evaluation
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [12]:
#Data class for model evaluation
@dataclass
class ModelTrainerConfig:
    trained_model_file_path = os.path.join("artifacts", "model.pkl")

In [13]:
#Class for model trainer

class ModelTrainer:
    def __init__(self):
        self.model_trainer_config = ModelTrainerConfig()
        
    def initiate_model_training(self, train_array, test_array):
        
        X_train, y_train, X_test, y_test = (
            train_array[:,:-1],
            train_array[:, -1],
            test_array[:, :-1],
            test_array[:, -1]
        )
        
        models = {
            "LogisticRegression" : LogisticRegression(),
            "LogisticRegressionCV" : LogisticRegressionCV(),
            "KNN" : KNeighborsClassifier(),
            "Decision Tree" : DecisionTreeClassifier(),
            "SVC" : SVC(),
            "RandomForest" : RandomForestClassifier(),
            "GradientBoosting" : GradientBoostingClassifier()
            
        }
        
        model_report:dict=evaluate_model(X_train, y_train, X_test, y_test, models)
        print(model_report)

        best_model_score = max(sorted(model_report.values()))
        
        best_model_name = list(model_report.keys())[
            list(model_report.values()).index(best_model_score)
        ]
        
        best_model = models[best_model_name]
        
        save_object(
            
            file_path = self.model_trainer_config.trained_model_file_path,
            obj = best_model
        )

## Connecting with Training Pipeline

In [14]:
#to connect all above with pipeline
if __name__ == "__main__":
    obj = DataIngestion()
    train_data_path, test_data_path = obj.initiate_data_ingestion()
    
    data_transformation = DataTransformation()
    train_arr, test_arr, _ = data_transformation.initiate_data_transformation(train_data_path, test_data_path)
    
    model_trainer=ModelTrainer()
    model_trainer.initiate_model_training(train_arr,test_arr)

{'LogisticRegression': 0.8613617719442166, 'LogisticRegressionCV': 0.8584905660377359, 'KNN': 0.9995898277276456, 'Decision Tree': 0.9856439704675964, 'SVC': 0.9971287940935193, 'RandomForest': 0.9987694831829368, 'GradientBoosting': 0.9840032813781788}


# Prediction Pipeline

In [15]:
#function to load data
def load_object(file_path):
    
    with open(file_path, "rb") as file_obj:
        return pickle.load(file_obj)

In [16]:
#Class for predicting pipeline

class PredictPipeline:
    
    def __init__(self):
        pass    
    def predict(self, features):
        
        preprocessor_path = os.path.join("artifacts", "preprocessor.pkl")
        model_path = os.path.join("artifacts", "model.pkl")
        
        preprocessor = load_object(preprocessor_path)
        model = load_object(model_path)
        
        data_scaled = preprocessor.transform(features)
        
        pred = model.predict(data_scaled)
        
        return pred

In [17]:
# Custom data class with the form

class CustomData:
    
    def __init__(self,
                 cap_shape:str,
                 cap_surface:str,
                 cap_color:str,
                 bruises:str,
                 odor:str,
                 gill_attachment:str,
                 gill_spacing:str,
                 gill_size:str,
                 gill_color:str,
                 stalk_shape:str,
                 stalk_root:str,
                 stalk_surface_above_ring:str,
                 stalk_surface_below_ring:str,
                 stalk_color_above_ring:str,
                 stalk_color_below_ring:str,
                 veil_type:str,
                 veil_color:str,
                 ring_number:str,
                 ring_type:str,
                 spore_print_color:str,
                 population:str,
                 habitat:str):
        
        self.cap_shape = cap_shape
        self.cap_surface = cap_surface
        self.cap_color = cap_color
        self.bruises = bruises
        self.odor = odor
        self.gill_attachment = gill_attachment
        self.gill_spacing = gill_spacing
        self.gill_size = gill_size
        self.gill_color = gill_color
        self.stalk_shape = stalk_shape
        self.stalk_root = stalk_root
        self.stalk_surface_above_ring = stalk_surface_above_ring
        self.stalk_surface_below_ring = stalk_surface_below_ring
        self.stalk_color_above_ring = stalk_color_above_ring
        self.stalk_color_below_ring = stalk_color_below_ring
        self.veil_type = veil_type
        self.veil_color = veil_color
        self.ring_number = ring_number
        self.ring_type = ring_type
        self.spore_print_color = spore_print_color
        self.population = population
        self.habitat = habitat
        
    
    def get_data_as_dataframe(self):
        
        custom_data_input_dict = {
            "cap-shape" : [self.cap_shape],
            "cap-surface" : [self.cap_surface],
            "cap-color" : [self.cap_color],
            "bruises" : [self.bruises],
            "odor" : [self.odor],
            "gill-attachment" : [self.gill_attachment],
            "gill-spacing" : [self.gill_spacing],
            "gill-size" : [self.gill_size],
            "gill-color" : [self.gill_color],
            "stalk-shape" : [self.stalk_shape],
            "stalk-root" : [self.stalk_root],
            "stalk-surface-above-ring" : [self.stalk_surface_above_ring],
            "stalk-surface-below-ring" : [self.stalk_surface_below_ring],
            "stalk-color-above-ring" : [self.stalk_color_above_ring],
            "stalk-color-below-ring" : [self.stalk_color_below_ring],
            "veil-type" : [self.veil_type],
            "veil-color" : [self.veil_color],
            "ring-number" : [self.ring_number],
            "ring-type" : [self.ring_type],
            "spore-print-color" : [self.spore_print_color],
            "population" : [self.population],
            "habitat" : [self.habitat]      
        }
        df = pd.DataFrame(custom_data_input_dict)
        return df

# App runner



In [18]:
#for flask & app runner
from flask import Flask,request,render_template

In [19]:
application=Flask(__name__)

app=application

@app.route('/')
def home_page():
    return render_template('index.html')

@app.route("/predict", methods = ["GET", "POST"])

def predict_datapoint():
    if request.method == "GET":
        return render_template("form.html")
    
    else:
        data = CustomData(
            cap_shape = request.form.get("cap_shape"),
            cap_surface = request.form.get("cap_surface"),
            cap_color = request.form.get("cap_color"),
            bruises = request.form.get("bruises"),
            odor = request.form.get("odor"),
            gill_attachment = request.form.get("gill_attachment"),
            gill_spacing = request.form.get("gill_spacing"),
            gill_size = request.form.get("gill_size"),
            gill_color = request.form.get("gill_color"),
            stalk_shape = request.form.get("stalk_shape"),
            stalk_root = request.form.get("stalk_root"),
            stalk_color_above_ring = request.form.get("stalk_color_above_ring"),
            stalk_color_below_ring = request.form.get("stalk_color_below_ring"),
            stalk_surface_above_ring = request.form.get("stalk_surface_above_ring"),
            stalk_surface_below_ring = request.form.get("stalk_surface_below_ring"),
            veil_type = request.form.get("veil_type"),
            veil_color = request.form.get("veil_color"),
            ring_number = request.form.get("ring_number"),
            ring_type = request.form.get("ring_type"),
            spore_print_color = request.form.get("spore_print_color"),
            population = request.form.get("population"),
            habitat = request.form.get("habitat")            
        )
        
        final_new_data = data.get_data_as_dataframe()
        predict_pipeline = PredictPipeline()
        pred = predict_pipeline.predict(final_new_data)
        
        if pred == 0:
            results = "Edible"
            return render_template("edible.html", final_result = results)
        elif pred == 1:
            results = "Poisonous"
            return render_template("poisonous.html", final_result = results)
        else:
            result = "Data not found"
            return render_template("results.html", final_result = results)

if __name__=="__main__":
    app.run(host='0.0.0.0',port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.43.246:5000
Press CTRL+C to quit
