In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/commodity_data_with_adjusted_prices.csv')

# Mapping month abbreviations to numbers
month_mapping = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
                 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}

# Convert 'Month' to numerical format using the mapping
data['Month'] = data['Month'].map(month_mapping)

# Encode the 'Commodity' column
label_encoder = LabelEncoder()
data['Commodity_Encoded'] = label_encoder.fit_transform(data['Commodity'])

# Prepare the features (X) and target (y)
X = data[['Year', 'Month', 'Commodity_Encoded']]
y = data['Adjusted Retail Price']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on the test set and evaluate performance
y_pred = rf_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Function to predict price based on user input for year, month, and commodity
def predict_price(year, month, commodity):
    # Convert month from number to corresponding format (1-12)
    month_num = month_mapping.get(month, month)

    # Encode the commodity name
    commodity_encoded = label_encoder.transform([commodity])[0]

    # Prepare the input data for prediction
    input_data = pd.DataFrame({'Year': [year], 'Month': [month_num], 'Commodity_Encoded': [commodity_encoded]})

    # Make the prediction
    predicted_price = rf_model.predict(input_data)[0]

    return predicted_price

# Get the list of commodities
commodities = label_encoder.classes_
print("Available commodities:")
for idx, commodity in enumerate(commodities, 1):
    print(f"{idx}. {commodity}")

# Get user input
try:
    commodity_choice = int(input(f"Enter the number corresponding to the commodity (1-{len(commodities)}): "))
    if commodity_choice < 1 or commodity_choice > len(commodities):
        raise ValueError("Invalid choice")
    commodity_input = commodities[commodity_choice - 1]
except ValueError as e:
    print(f"Error: {e}. Please enter a valid number.")
    exit()

year_input = int(input("Enter the year (e.g., 2023): "))
month_input = input("Enter the month (e.g., May): ")

# Predict price
predicted_price = predict_price(year_input, month_input, commodity_input)
print(f"Predicted price for {commodity_input} in {month_input} {year_input}: ₹{predicted_price:.2f}")


RMSE: 12.33133750481671
Available commodities:
1. Atta (Wheat)
2. Gram Dal
3. Groundnut Oil
4. Gur
5. Masoor Dal
6. Milk (Rs./Ltr)
7. Moong Dal
8. Mustard Oil
9. Onion
10. Palm Oil
11. Potato
12. Rice
13. Salt
14. Soya Oil
15. Sugar
16. Sunflower Oil
17. Tea
18. Tomato
19. Tur Dal
20. Urad Dal
21. Vanaspati
22. Wheat
Enter the number corresponding to the commodity (1-22): 2
Enter the year (e.g., 2023): 2028
Enter the month (e.g., May): 2
Predicted price for Gram Dal in 2 2028: ₹73.07
