In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pickle
from sklearn.preprocessing import LabelEncoder

In [None]:
def train_and_save_model(csv_file_path, model_output_path):
    """Trains a Random Forest Regressor model and saves it as a pickle file."""
    try:
        # Load the data
        df = pd.read_csv(csv_file_path)

        # Handle missing values (important for robust models)
        df.fillna(0, inplace=True)  # Or use other imputation strategies if appropriate

        # Convert categorical features to numerical using Label Encoding
        categorical_cols = df.select_dtypes(include=['object']).columns
        for col in categorical_cols:
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])

        # Define features (X) and target (y)
        X = df.drop('discount_price', axis=1)  # Features (all columns except discount_price)
        y = df['discount_price']  # Target variable

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 80% train, 20% test

        # Create and train the Random Forest Regressor model
        rf_model = RandomForestRegressor(n_estimators=100, random_state=42)  # You can tune hyperparameters here
        rf_model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = rf_model.predict(X_test)

        # Evaluate the model
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        print(f"Mean Squared Error: {mse}")
        print(f"R-squared: {r2}")

        # Save the trained model to a pickle file
        with open(model_output_path, 'wb') as file:
            pickle.dump(rf_model, file)

        print(f"Model saved to {model_output_path}")

    except FileNotFoundError:
        print(f"Error: CSV file '{csv_file_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
def main():
    try:
        csv_file_path = "/content/drive/MyDrive/postrgesqldata.csv"  # Path to your extracted CSV
        model_output_path = "/content/drive/MyDrive/randfor_model.pkl"  # Path to save the model
        train_and_save_model(csv_file_path, model_output_path)

    except Exception as e:
        print(f"An error occurred in main: {e}")

if __name__ == "__main__":
    main()

Mean Squared Error: 26202114.629885416
R-squared: 0.6793048861296682
Model saved to /content/drive/MyDrive/randfor_model.pkl


In [None]:
def predict_discount_price(input_data, model_path, required_features):
    """Loads a trained model and makes predictions on new data with specified features."""
    try:
        # Load the trained model
        with open(model_path, 'rb') as file:
            model = pickle.load(file)

        # Ensure input_data is a DataFrame
        if not isinstance(input_data, pd.DataFrame):
            input_data = pd.DataFrame([input_data])

        # Handle categorical features using the SAME LabelEncoders used during training
        try:
            with open("label_encoders.pkl", 'rb') as f:
                label_encoders = pickle.load(f)
        except FileNotFoundError:
            raise FileNotFoundError("label_encoders.pkl file not found. Make sure you saved it during training.")

        categorical_cols = input_data.select_dtypes(include=['object']).columns
        for col in categorical_cols:
            if col in label_encoders:
                input_data[col] = label_encoders[col].transform(input_data[col])
            else:
                raise ValueError(f"Label encoder for column '{col}' not found. Ensure it was saved during training.")

        # Select only the required features
        input_data = input_data[required_features]

        # Make predictions
        predictions = model.predict(input_data)
        return predictions

    except FileNotFoundError:
        return "Error: Model file not found."
    except Exception as e:
        return f"An error occurred: {e}"


In [None]:
def main():
    try:
        model_path = "/content/drive/MyDrive/randfor_model.pkl"

        # Define the required features (only festival and actual_price)
        required_features = ['festival', 'actual_price']

        # Example input data with only festival and actual_price
        input_data = {
            'festival': 1,
            'actual_price': 15000
        }

        predictions = predict_discount_price(input_data, model_path, required_features)
        print(f"Predicted discount price: {predictions}")

        # Example with multiple rows
        input_data_multiple = pd.DataFrame([
            {'festival': 0, 'actual_price': 5000},
            {'festival': 1, 'actual_price': 25000}
        ])
        predictions_multiple = predict_discount_price(input_data_multiple, model_path, required_features)
        print(f"Predicted discount prices (multiple): {predictions_multiple}")

    except Exception as e:
        print(f"An error occurred in main: {e}")

if __name__ == "__main__":
    main()

Predicted discount price: Error: Model file not found.
Predicted discount prices (multiple): Error: Model file not found.


In [None]:
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder

def predict_discount_price(input_data, model_path):
    """Loads a trained model and makes predictions on new data."""
    try:
        # Load the trained model
        with open(model_path, 'rb') as file:
            model = pickle.load(file)

        # Ensure input_data is a DataFrame
        if not isinstance(input_data, pd.DataFrame):
            input_data = pd.DataFrame([input_data])  # Convert to DataFrame if it's a dictionary

        # Handle categorical features using the SAME LabelEncoders used during training
        # IMPORTANT: You MUST save the fitted LabelEncoders along with the model
        try:
            with open("label_encoders.pkl", 'rb') as f:
                label_encoders = pickle.load(f)
        except FileNotFoundError:
            raise FileNotFoundError("label_encoders.pkl file not found. Make sure you saved it during training.")

        categorical_cols = input_data.select_dtypes(include=['object']).columns
        for col in categorical_cols:
            if col in label_encoders:
                input_data[col] = label_encoders[col].transform(input_data[col])
            else:
                raise ValueError(f"Label encoder for column '{col}' not found. Ensure it was saved during training.")


        # Make predictions
        predictions = model.predict(input_data)
        return predictions

    except FileNotFoundError:
        return "Error: Model file not found."
    except Exception as e:
        return f"An error occurred: {e}"

def main():
    try:
        model_path = "/content/drive/MyDrive/randfor_model.pkl"  # Path to your saved model

        # Example input data (replace with your actual data)
        input_data = {
            'date': '2024-10-27',
            'name': 'Example Product',
            'main_category': 'Electronics',
            'sub_category': 'Mobile',
            'ratings': 4.5,
            'no_of_ratings': 1000,
            'festival': 0,
            'no_of_purchases': 5000,
            'actual_price': 20000
        }

        # Convert date to the correct format:
        input_data['date'] = pd.to_datetime(input_data['date']).strftime('%Y-%m-%d')

        predictions = predict_discount_price(input_data, model_path)
        print(f"Predicted discount price: {predictions}")

        # Example with multiple rows of input data
        input_data_multiple = pd.DataFrame([
            {'date': '2024-10-27', 'name': 'Product A', 'main_category': 'Electronics', 'sub_category': 'Mobile', 'ratings': 4.2, 'no_of_ratings': 500, 'festival': 1, 'no_of_purchases': 2000, 'actual_price': 15000},
            {'date': '2024-10-28', 'name': 'Product B', 'main_category': 'Clothing', 'sub_category': 'Men', 'ratings': 3.8, 'no_of_ratings': 250, 'festival': 0, 'no_of_purchases': 1000, 'actual_price': 5000}
        ])
        input_data_multiple['date'] = pd.to_datetime(input_data_multiple['date']).dt.strftime('%Y-%m-%d')
        predictions_multiple = predict_discount_price(input_data_multiple, model_path)
        print(f"Predicted discount prices (multiple): {predictions_multiple}")

    except Exception as e:
        print(f"An error occurred in main: {e}")

if __name__ == "__main__":
    main()

Model Path: /content/randfor_model.pkl
Predicted discount price: Error: Model file not found.
Predicted discount prices (multiple): Error: Model file not found.


In [None]:
rf_model

NameError: name 'rf_model' is not defined