In [8]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import os
import joblib

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

# File paths for saving model and pipeline
MODEL_FILE = "model.pkl"
PIPELINE_FILE = "pipeline.pkl"

# Function to build the pipeline for text processing
def build_pipeline():
    cat_pipeline = Pipeline([
        ("tfidf", TfidfVectorizer(max_features=1000)),
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler(with_mean=False))  # with_mean=False due to sparse input
    ])
    return cat_pipeline

# User interface
print('Hello Sir, How can I help you!')
print("Press---> 1 [For Train the Model]\nPress---> 2 [To Start Forecasting]")

try:
    a = int(input("Enter : "))
except ValueError:
    print("Please enter a valid number.")
    exit()

match a:
    case 1:
        if not os.path.exists(MODEL_FILE):
            print("Training the model...")

            # Load the dataset aslo take user input file
            file_name = input("Enter the path or name of the CSV file: ")
            df = pd.read_csv(file_name)

            # Create income category for stratified sampling
            df['income_cat'] = pd.cut(df['Discount Prices(?)'],
                                      bins=[0.0, 1.5, 3.0, 4.5, 6.0, np.inf],
                                      labels=[1, 2, 3, 4, 5])

            # Stratified train-test split
            split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
            for train_index, test_index in split.split(df, df['income_cat']):
                strat_train_set = df.loc[train_index].drop('income_cat', axis=1)
                strat_test_set = df.loc[test_index].drop('income_cat', axis=1)

            # Prepare training features and labels
            health_features = strat_train_set['Product Name']
            health_labels = strat_train_set.drop(['Product Name', 'Weight/Type'], axis=1)

            # Build and fit pipeline
            pipeline = build_pipeline()
            health_feature_transformed = pipeline.fit_transform(health_features)

            # Train model
            model = MultiOutputRegressor(RandomForestRegressor(random_state=42))
            model.fit(health_feature_transformed, health_labels)

            # Save model and pipeline
            joblib.dump(model, MODEL_FILE)
            joblib.dump(pipeline, PIPELINE_FILE)

            print("Model training complete and saved.")
        else:
            print("Model already exists. Delete 'model.pkl' if you want to retrain.")

    case 2:
        if not os.path.exists(MODEL_FILE) or not os.path.exists(PIPELINE_FILE):
            print("Model not found. Please train it first using option 1.")
            exit()

        # Load model and pipeline
        model = joblib.load(MODEL_FILE)
        pipeline = joblib.load(PIPELINE_FILE)

        # User input for forecasting
        user_input = input("Enter the Product Name: ")
        X_input = pipeline.transform([user_input])
        prediction = model.predict(X_input)[0]

        # Define the columns (same order as training labels)
        predicted_columns = ['Discount Prices(?)', 'Actual Prices', 'Discount Percentage(%)', 'Rating', 'Users']

        print("\n[O_O] Forecasted Output:")
        for col, val in zip(predicted_columns, prediction):
            print(f"{col}: {round(val, 2)}")

    case _:
        print("Please enter a correct option (1 or 2).")


Hello Sir, How can I help you!
Press---> 1 [For Train the Model]
Press---> 2 [To Start Forecasting]


Enter :  2
Enter the Product Name:  Baidyanath Neem Tablets- A known Ayurvedic Her...



[O_O] Forecasted Output:
Discount Prices(?): 419.02
Actual Prices: 433.08
Discount Percentage(%): 37.88
Rating: 4.22
Users: 2825.98
