In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# --- CNN Code for Vegetable Classification ---
def create_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Parameters
input_shape = (224, 224, 3)
num_classes = 15

# Create and train the CNN model
cnn_model = create_cnn_model(input_shape, num_classes)

# Data augmentation and loading
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
    'C:/Users/HP/OneDrive/Desktop/dataset/Vegetable Images', 
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Train the model
cnn_model.fit(train_generator, epochs=10)

# Save the model
cnn_model.save('vegetable_cnn_model.h5')

# Function to classify an image using the trained CNN model
def classify_vegetable(image_path):
    model = load_model('vegetable_cnn_model.h5')

    # Load and preprocess the image
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) / 255.0

    # Make prediction
    prediction = model.predict(img_array)
    predicted_vegetable_idx = np.argmax(prediction, axis=1)[0]

    # Get class labels from the train_generator
    class_labels = list(train_generator.class_indices.keys())
    class_labels.sort()  # Ensure labels are sorted to match indices

    predicted_vegetable = class_labels[predicted_vegetable_idx]

    print(f"Predicted vegetable: {predicted_vegetable}")
    return predicted_vegetable


Found 3000 images belonging to 15 classes.
Epoch 1/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 1s/step - accuracy: 0.1437 - loss: 4.1713
Epoch 2/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 1s/step - accuracy: 0.4722 - loss: 1.6295
Epoch 3/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 1s/step - accuracy: 0.6466 - loss: 1.1276
Epoch 4/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 1s/step - accuracy: 0.7348 - loss: 0.7936
Epoch 5/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 1s/step - accuracy: 0.7896 - loss: 0.6278
Epoch 6/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 1s/step - accuracy: 0.8269 - loss: 0.5050
Epoch 7/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 1s/step - accuracy: 0.8794 - loss: 0.3652
Epoch 8/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 1s/step - accuracy: 0.8902 - loss: 0.3023
Epoch 9/



In [46]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler

# --- Machine Learning Code (ML part for price prediction) ---

# Function to handle one-hot encoding with consistent columns
def onehot_encode(df, column, training_columns=None):
    dummies = pd.get_dummies(df[column], prefix=column)
    df = df.drop(column, axis=1)
    df = pd.concat([df, dummies], axis=1)

    # Align the columns if training_columns is provided
    if training_columns is not None:
        for col in training_columns:
            if col not in df.columns:
                df[col] = 0  # Add missing columns with default value 0
        df = df[training_columns]  # Reorder columns to match training

    return df

def preprocess_inputs(df, scaler=None, expected_columns=None):
    df = df.copy()

    # Clean 'Vegetable condition' column
    df['Vegetable condition'] = df['Vegetable condition'].replace({'scarp': 'scrap'})

    # Ensure the correct spelling for 'Disaster Happen in last 3 months'
    df['Disaster Happen in last 3 months'] = df['Disaster Happen in last 3 months'].replace({
        'no': 0, 'yes': 1, 'no ': 0
    })

    # Ordinal encoding for 'Month'
    month_mapping = {
        'jan': 1, 'feb': 2, 'march': 3, 'apr': 4, 'may': 5, 'june': 6, 'july': 7,
        'aug': 8, 'sept': 9, 'oct': 10, 'nov': 11, 'dec': 12
    }
    df['Month'] = df['Month'].str.lower().map(month_mapping)

    # Handle missing values in the 'Month' column
    if df['Month'].isnull().any():
        if not df['Month'].mode().empty:
            df['Month'] = df['Month'].fillna(df['Month'].mode()[0])
        else:
            df['Month'] = df['Month'].fillna(1)  # Default to January if mode is empty

    # Handle missing values in numerical columns
    numerical_columns = ['Temp', 'Disaster Happen in last 3 months']
    for col in numerical_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
        df[col] = df[col].fillna(df[col].mean())

    # One-hot encoding for categorical variables
    if expected_columns is None:
        # First call, use the expected columns from training
        df = onehot_encode(df, 'Vegetable')
        expected_columns = df.columns.tolist()  # Store the columns for alignment
        df = onehot_encode(df, 'Season', expected_columns)
        df = onehot_encode(df, 'Vegetable condition', expected_columns)
    else:
        # Subsequent calls, use the stored expected columns
        df = onehot_encode(df, 'Vegetable', expected_columns)
        df = onehot_encode(df, 'Season', expected_columns)
        df = onehot_encode(df, 'Vegetable condition', expected_columns)

    # Ensure all remaining columns are numeric
    df = df.apply(pd.to_numeric, errors='coerce')
    df = df.fillna(0)

    # Split df into X and y
    if 'Price per kg' in df.columns:
        y = df['Price per kg']
        X = df.drop('Price per kg', axis=1)
    else:
        X = df
        y = None

    # Scaling
    if scaler is None:
        scaler = StandardScaler()
        X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
    else:
        # Ensure the input has the expected columns
        X_scaled = pd.DataFrame(scaler.transform(X), columns=X.columns)

    # Ensure the input has the expected columns
    if expected_columns is not None:
        for col in expected_columns:
            if col not in X_scaled.columns:
                X_scaled[col] = 0  # Assign 0 if the column is missing
        X_scaled = X_scaled[expected_columns]  # Reorder columns to match expected

    return X_scaled, y, scaler

# Load dataset
data = pd.read_csv('C:/Users/HP/OneDrive/Desktop/Vegetable_market1.csv')

# Preprocess the data
X_scaled, y, scaler = preprocess_inputs(data)

# Train the XGBoost model
ml_model = XGBRegressor()
ml_model.fit(X_scaled, y)

# Function to predict price
def predict_price(predicted_vegetable, season, month, temperature, disaster_happened, condition):
    input_data = pd.DataFrame({
        'Vegetable': [predicted_vegetable],
        'Season': [season],
        'Month': [month],
        'Temp': [temperature],
        'Disaster Happen in last 3 months': [disaster_happened],
        'Vegetable condition': [condition]
    })

    # Preprocess the input data without 'Price per kg'
    input_data_processed, _, _ = preprocess_inputs(input_data, scaler=scaler, expected_columns=X_scaled.columns)

    # Debugging information
#     print("Input Data for Prediction:")
#     print(input_data_processed)
#     print("Columns in Input Data:", input_data_processed.columns.tolist())

    # Predict the price
    predicted_price = ml_model.predict(input_data_processed)
    return predicted_price[0]

# Step 1: Use the DL model to classify the vegetable
image_path = 'C:/Users/HP/OneDrive/Desktop/dataset/Vegetable Images/Cucumber/1072.jpg'  # Update with your image path
predicted_vegetable = classify_vegetable(image_path)  # Function call to classify the vegetable

# Step 2: Get additional user inputs
season = input("Enter the season (e.g., winter, summer): ").strip().lower()
month = input("Enter the month (e.g., jan, feb): ").strip().lower()
temperature = float(input("Enter the temperature in °C: "))
disaster_happened_input = input("Did any disaster happen in the last 3 months? (yes/no): ").strip().lower()
disaster_happened = 1 if disaster_happened_input == 'yes' else 0
condition = input("Enter the vegetable condition (e.g., fresh, scrap): ").strip().lower()

# Predict the price
predicted_price_value = predict_price(predicted_vegetable, season, month, temperature, disaster_happened, condition)
print(f'Predicted Price per kg for {predicted_vegetable}: {predicted_price_value:.2f}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step
Predicted vegetable: Cucumber
Enter the season (e.g., winter, summer): winter
Enter the month (e.g., jan, feb): feb
Enter the temperature in °C: 23
Did any disaster happen in the last 3 months? (yes/no): no
Enter the vegetable condition (e.g., fresh, scrap): fresh
Predicted Price per kg for Cucumber: 30.12
