In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score
from datetime import timedelta

# Step 1: Load the CSV file
df = pd.read_csv(r"E:\pbl temprun 3\Data\original.csv")

# Step 2: Clean column names and drop unnecessary columns
df.columns = df.columns.str.strip()
df = df.drop(['Code No', 'Unit','Date'], axis=1)

# Step 3: Add today's date for feature engineering
df['Date'] = pd.to_datetime('today').normalize()

# Step 4: Extract date-based features
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month
df['year'] = df['Date'].dt.year
df['dayofweek'] = df['Date'].dt.dayofweek

# Step 5: Encode the 'Item' (vegetable name)
le = LabelEncoder()
df['Item'] = le.fit_transform(df['Item'])

# Step 6: Define features (X) and targets (y)
X = df[['Item', 'Quantity', 'day', 'month', 'year', 'dayofweek']]
y_min = df['Min']
y_max = df['Max']

# Step 7: Split the data into training and testing sets
X_train, X_test, y_min_train, y_min_test = train_test_split(X, y_min, test_size=0.2, random_state=42)
_, _, y_max_train, y_max_test = train_test_split(X, y_max, test_size=0.2, random_state=42)

# Step 8: Train Random Forest models
min_model = RandomForestRegressor()
max_model = RandomForestRegressor()
min_model.fit(X_train, y_min_train)
max_model.fit(X_train, y_max_train)

# Step 9: Evaluate the models
min_preds = min_model.predict(X_test)
max_preds = max_model.predict(X_test)

print("🔍 Min Price Prediction")
print("MAE:", mean_absolute_error(y_min_test, min_preds))
print("R² Score:", r2_score(y_min_test, min_preds))

print("\n🔍 Max Price Prediction")
print("MAE:", mean_absolute_error(y_max_test, max_preds))
print("R² Score:", r2_score(y_max_test, max_preds))

# Step 10: Prepare for future predictions
encoded_items = df['Item'].unique()
item_name_map = dict(zip(le.transform(le.classes_), le.classes_))
avg_quantity = df.groupby('Item')['Quantity'].mean().to_dict()

# Predict for the next 7 days with quantity variation
start_date = pd.to_datetime('today').normalize()
future_preds = []

for i in range(7):
    date = start_date + timedelta(days=i)
    for item_code in encoded_items:
        item_name = item_name_map.get(item_code, "Unknown")
        base_quantity = avg_quantity.get(item_code, 1000)
        
        # Add random variation to quantity (+/- 10%)
        quantity = base_quantity * np.random.uniform(0.9, 1.1)

        features = pd.DataFrame({
            'Item': [item_code],
            'Quantity': [quantity],
            'day': [date.day],
            'month': [date.month],
            'year': [date.year],
            'dayofweek': [date.dayofweek]
        })

        min_price = min_model.predict(features)[0]
        max_price = max_model.predict(features)[0]

        future_preds.append({
            'Date': date.date(),
            'Item': item_name,
            'Predicted Min': round(min_price, 2),
            'Predicted Max': round(max_price, 2)
        })

# Step 11: Convert predictions to DataFrame and display
future_df = pd.DataFrame(future_preds)
print("\n📅 Predicted Prices for the Next 7 Days:")
print(future_df)

# Save to CSV
future_df.to_csv("E:\\future_predictions.csv", index=False)


🔍 Min Price Prediction
MAE: 347.0547146897413
R² Score: 0.9356150684921194

🔍 Max Price Prediction
MAE: 559.647340413436
R² Score: 0.9080677897904381

📅 Predicted Prices for the Next 7 Days:
            Date        Item  Predicted Min  Predicted Max
0     2025-04-14       Onion        1379.00        2856.00
1     2025-04-14      Potato        1134.00        1922.00
2     2025-04-14      Garlic        4125.00       11090.00
3     2025-04-14      Ginger         985.00        2502.00
4     2025-04-14  Ladyfinger        2411.50        6125.08
...          ...         ...            ...            ...
1220  2025-04-20        आवऴा        1314.40        2958.92
1221  2025-04-20     Coconut         433.00         818.00
1222  2025-04-20       Aboli         416.00         709.00
1223  2025-04-20      Sesame        2292.00         664.15
1224  2025-04-20      Litchi        2663.65        2989.08

[1225 rows x 4 columns]


In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score
from datetime import timedelta

# Step 1: Load the CSV file
df = pd.read_csv(r"E:\pbl temprun 3\Data\original.csv")

# Step 2: Clean column names and drop unnecessary columns
df.columns = df.columns.str.strip()
df = df.drop(['Code No', 'Unit'], axis=1)

# Step 3: Convert 'Date' to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Step 4: Extract date-based features
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month
df['year'] = df['Date'].dt.year
df['dayofweek'] = df['Date'].dt.dayofweek
df['dayofyear'] = df['Date'].dt.dayofyear
df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)

# Optional: Rolling and Lag features
df.sort_values(['Item', 'Date'], inplace=True)
df['rolling_min_3'] = df.groupby('Item')['Min'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
df['rolling_max_3'] = df.groupby('Item')['Max'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
df['lag_min'] = df.groupby('Item')['Min'].shift(1)
df['lag_max'] = df.groupby('Item')['Max'].shift(1)

# Drop rows with NaN caused by lag
df.dropna(inplace=True)

# Step 5: Encode the 'Item' (vegetable name)
le = LabelEncoder()
df['Item'] = le.fit_transform(df['Item'])

# Step 6: Define features (X) and targets (y)
feature_cols = [
    'Item', 'Quantity', 'day', 'month', 'year', 'dayofweek', 'dayofyear', 'is_weekend',
    'rolling_min_3', 'rolling_max_3', 'lag_min', 'lag_max'
]

X = df[feature_cols]
y_min = df['Min']
y_max = df['Max']

# Step 7: Split the data into training and testing sets
X_train, X_test, y_min_train, y_min_test = train_test_split(X, y_min, test_size=0.2, random_state=42)
_, _, y_max_train, y_max_test = train_test_split(X, y_max, test_size=0.2, random_state=42)

# Step 8: Train Random Forest models
min_model = RandomForestRegressor()
max_model = RandomForestRegressor()
min_model.fit(X_train, y_min_train)
max_model.fit(X_train, y_max_train)

# Step 9: Evaluate the models
min_preds = min_model.predict(X_test)
max_preds = max_model.predict(X_test)

print("🔍 Min Price Prediction")
print("MAE:", mean_absolute_error(y_min_test, min_preds))
print("R² Score:", r2_score(y_min_test, min_preds))

print("\n🔍 Max Price Prediction")
print("MAE:", mean_absolute_error(y_max_test, max_preds))
print("R² Score:", r2_score(y_max_test, max_preds))

# Step 10: Prepare for future predictions
encoded_items = df['Item'].unique()
item_name_map = dict(zip(le.transform(le.classes_), le.classes_))
avg_quantity = df.groupby('Item')['Quantity'].mean().to_dict()
avg_rolling_min = df.groupby('Item')['rolling_min_3'].mean().to_dict()
avg_rolling_max = df.groupby('Item')['rolling_max_3'].mean().to_dict()
avg_lag_min = df.groupby('Item')['lag_min'].mean().to_dict()
avg_lag_max = df.groupby('Item')['lag_max'].mean().to_dict()

# Predict for the next 7 days with quantity variation
start_date = pd.to_datetime('today').normalize()
future_preds = []

for i in range(7):
    date = start_date + timedelta(days=i)
    dayofweek = date.dayofweek
    dayofyear = date.timetuple().tm_yday
    is_weekend = int(dayofweek in [5, 6])

    for item_code in encoded_items:
        item_name = item_name_map.get(item_code, "Unknown")
        base_quantity = avg_quantity.get(item_code, 1000)

        # Add random variation to quantity (+/- 10%)
        quantity = base_quantity * np.random.uniform(0.9, 1.1)

        features = pd.DataFrame({
            'Item': [item_code],
            'Quantity': [quantity],
            'day': [date.day],
            'month': [date.month],
            'year': [date.year],
            'dayofweek': [dayofweek],
            'dayofyear': [dayofyear],
            'is_weekend': [is_weekend],
            'rolling_min_3': [avg_rolling_min.get(item_code, 0)],
            'rolling_max_3': [avg_rolling_max.get(item_code, 0)],
            'lag_min': [avg_lag_min.get(item_code, 0)],
            'lag_max': [avg_lag_max.get(item_code, 0)]
        })

        min_price = min_model.predict(features)[0]
        max_price = max_model.predict(features)[0]

        future_preds.append({
            'Date': date.date(),
            'Item': item_name,
            'Predicted Min': round(min_price, 2),
            'Predicted Max': round(max_price, 2)
        })

# Step 11: Convert predictions to DataFrame and display
future_df = pd.DataFrame(future_preds)
print("\n📅 Predicted Prices for the Next 7 Days:")
print(future_df)

# Save to CSV
future_df.to_csv("E:\pbl temprun 3\Data\future_predictions.csv", index=False)


  future_df.to_csv("E:\pbl temprun 3\Data\\future_predictions.csv", index=False)


🔍 Min Price Prediction
MAE: 158.73314722617354
R² Score: 0.9784104498473912

🔍 Max Price Prediction
MAE: 263.0056934566145
R² Score: 0.9657253544776554

📅 Predicted Prices for the Next 7 Days:
            Date                  Item  Predicted Min  Predicted Max
0     2025-04-14              Amaranth        3584.77        4268.00
1     2025-04-14       Amaranth Leaves        1231.00        2027.00
2     2025-04-14                  Amla        2425.00        3092.00
3     2025-04-14             Anthurium          73.70         125.40
4     2025-04-14        Apple - Shimla        3031.00       12407.00
...          ...                   ...            ...            ...
1178  2025-04-20   White Chrysanthemum          43.60          66.66
1179  2025-04-20            Wood Apple        1000.00        2083.00
1180  2025-04-20                   Yam         833.00        1502.00
1181  2025-04-20  Yellow Chrysanthemum          44.00          66.96
1182  2025-04-20              Zucchini        17