In [None]:
import pandas as pd

In [None]:
df= pd.read_csv('/content/kalimati-tarkari-prices-from-may-2021-to-september-2023.csv')

In [None]:
df.head()

Unnamed: 0,Commodity,Date,Unit,mimimum,maximum,Average
0,Tomato Big(Nepali),1/5/2021,Kg,50,60,55.0
1,Tomato Big(Indian),1/5/2021,Kg,50,60,55.0
2,Tomato Small(Local),1/5/2021,Kg,30,35,32.5
3,Tomato Small(Tunnel),1/5/2021,Kg,30,35,32.5
4,Tomato Small(Indian),1/5/2021,KG,40,45,42.5


In [None]:
df.tail()

Unnamed: 0,Commodity,Date,Unit,mimimum,maximum,Average
52294,Capsicum,7/2/2022,KG,Rs 120.00,Rs 150.00,Rs 135.00
52295,Carrot(Local),7/2/2022,KG,Rs 90.00,Rs 110.00,Rs 100.00
52296,Cauli Local,7/2/2022,KG,Rs 40.00,Rs 50.00,Rs 45.00
52297,Celery,7/2/2022,KG,Rs 200.00,Rs 250.00,Rs 233.33
52298,Chilli Dry,7/2/2022,KG,Rs 370.00,Rs 380.00,Rs 376.67


In [None]:
# Print unique varieties of Commodity
print(df['Commodity'].unique())

['Tomato Big(Nepali)' 'Tomato Big(Indian)' 'Tomato Small(Local)'
 'Tomato Small(Tunnel)' 'Tomato Small(Indian)' 'Tomato Small(Terai)'
 'Potato Red' 'Potato Red(Indian)' 'Potato White' 'Onion Dry (Indian)'
 'Carrot(Local)' 'Carrot(Terai)' 'Cabbage(Local)' 'Cabbage(Terai)'
 'Cabbage' 'Cauli Local' 'Cauli Local(Jyapu)' 'Cauli Terai' 'Raddish Red'
 'Raddish White(Local)' 'Raddish White(Hybrid)' 'Brinjal Long'
 'Brinjal Round' 'Green Peas' 'French Bean(Local)' 'French Bean(Hybrid)'
 'Sword Bean' 'Bitter Gourd' 'Bottle Gourd' 'Pumpkin' 'Squash(Long)'
 'Squash(Round)' 'Turnip' 'Okara' 'Sweet Potato' 'Barela' 'Arum'
 'Christophine' 'Brd Leaf Mustard' 'Spinach Leaf' 'Cress Leaf'
 'Mustard Leaf' 'Fenugreek Leaf' 'Onion Green' 'Bakula' 'Yam'
 'Mushroom(Kanya)' 'Mushroom(Button)' 'Brocauli' 'Sugarbeet'
 'Red Cabbbage' 'Lettuce' 'Knolkhol' 'Celery' 'Parseley' 'Fennel Leaf'
 'Mint' 'Turnip A' 'Tamarind' 'Bamboo Shoot' 'Tofu' 'Gundruk'
 'Apple(Jholey)' 'Apple(Fuji)' 'Banana' 'Lime' 'Pomegranate'
 'Gr

In [None]:
# Clean numeric columns (remove 'Rs' and convert to float if needed)
df['mimimum'] = df['mimimum'].str.replace('Rs ', '').astype(float)
df['maximum'] = df['maximum'].str.replace('Rs ', '').astype(float)
df['Average'] = df['Average'].str.replace('Rs ', '').astype(float)

In [None]:
# Convert date
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
# Feature and target selection
X = df[['Commodity', 'Unit', 'Date', 'mimimum', 'Average']]
y = df['maximum']

In [None]:
# Extract useful date features (year, month, day)
X['Year'] = X['Date'].dt.year
X['Month'] = X['Date'].dt.month
X['Day'] = X['Date'].dt.day
X = X.drop('Date', axis=1)  # Drop original Date column

In [None]:
# Define column types
categorical_cols = ['Commodity', 'Unit']
numeric_cols = ['mimimum', 'Average', 'Year', 'Month', 'Day']

We need column transformation here for Numeric features (like price or temperature),Categorical features (like names of vegetables or units) so that we have to make the numerical data for each.
**Categorical Transformation:**
We use OneHotEncoder() to convert categorical columns like 'Commodity' and 'Unit' into a binary vector form.

For example, "Tomato" → [1, 0, 0], "Potato" → [0, 1, 0], etc.

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
# Transformers
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_cols),
    ('cat', categorical_transformer, categorical_cols)
])

In [None]:
# Pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [None]:
from sklearn.model_selection import train_test_split
# Split and fit
# Train-test split and model fitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

In [None]:
from datetime import datetime

def predict_max_price_interactive():
    # Take inputs
    commodity = input("Enter Commodity (e.g., Cauli Local): ")
    date_str = input("Enter future date (YYYY-MM-DD): ")

    # Parse and validate date
    date = pd.to_datetime(date_str)
    today = pd.to_datetime(datetime.today().date())

    if date < today:
        print(f"The date {date.date()} is in the past. Please enter a future date.")
        return

    # Filter most recent known record of the commodity from the dataset
    latest_record = df[df['Commodity'] == commodity].sort_values(by='Date', ascending=False).head(1)

    if latest_record.empty:
        print(f"Commodity '{commodity}' not found in the dataset.")
        return

    unit = latest_record['Unit'].values[0]
    mimimum = latest_record['mimimum'].values[0]
    average = latest_record['Average'].values[0]

    # Prepare input for model
    input_data = pd.DataFrame({
        'Commodity': [commodity],
        'Unit': [unit],
        'mimimum': [mimimum],
        'Average': [average],
        'Year': [date.year],
        'Month': [date.month],
        'Day': [date.day]
    })

    # Predict
    predicted = model.predict(input_data)[0]
    print(f"\n🔮 Predicted Maximum Price for {commodity} on {date.date()} is: Rs {predicted:.2f}")

# Call the function
predict_max_price_interactive()



In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Predict on test set
y_pred = model.predict(X_test)

# Calculate errors
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Display results
print(f" Model Evaluation Metrics:")
print(f" Mean Absolute Error (MAE): Rs {mae:.2f}")
print(f" Mean Squared Error (MSE): Rs {mse:.2f}")
print(f" Root Mean Squared Error (RMSE): Rs {rmse:.2f}")
print(f" R² Score: {r2:.4f}")


 Model Evaluation Metrics:
 Mean Absolute Error (MAE): Rs 0.25
 Mean Squared Error (MSE): Rs 0.65
 Root Mean Squared Error (RMSE): Rs 0.81
 R² Score: 1.0000
