<a href="https://colab.research.google.com/github/Vinayyarakalapudi/miniproject/blob/main/price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
print("hello")

hello


In [2]:
!pip install scikit-learn pandas numpy matplotlib seaborn



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [4]:
df = pd.read_csv("crop_price_dataset.csv")

df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'crop_price_dataset.csv'

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')

df.head()

In [None]:
Q1 = df['Price'].quantile(0.25)
Q3 = df['Price'].quantile(0.75)
IQR = Q3 - Q1

df = df[(df['Price'] >= Q1 - 1.5*IQR) &
        (df['Price'] <= Q3 + 1.5*IQR)]

In [None]:
df['Month'] = df['Date'].dt.month

df['Lag1'] = df['Price'].shift(1)
df['Lag2'] = df['Price'].shift(2)
df['Lag3'] = df['Price'].shift(3)

df['Rolling_Mean_3'] = df['Price'].rolling(3).mean()
df['Rolling_Mean_6'] = df['Price'].rolling(6).mean()
df['Rolling_Std_3'] = df['Price'].rolling(3).std()

df = df.dropna()

df.head()

In [None]:
features = [
    'Month',
    'Lag1',
    'Lag2',
    'Lag3',
    'Rolling_Mean_3',
    'Rolling_Mean_6',
    'Rolling_Std_3'
]

X = df[features]
y = df['Price']

In [None]:
tscv = TimeSeriesSplit(n_splits=5)

In [None]:
param_grid = {
    'n_estimators': [400, 600],
    'max_depth': [15, 25, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt']
}

rf = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(
    rf,
    param_grid,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1
)

grid_search.fit(X, y)

best_model = grid_search.best_estimator_

print("Best Parameters:", grid_search.best_params_)

In [None]:
predictions = best_model.predict(X)

rmse = np.sqrt(mean_squared_error(y, predictions))
mae = mean_absolute_error(y, predictions)
r2 = r2_score(y, predictions)

print("RMSE:", rmse)
print("MAE:", mae)
print("R2 Score:", r2)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(y.values[-50:], label="Actual")
plt.plot(predictions[-50:], label="Predicted")
plt.legend()
plt.title("Actual vs Predicted Crop Prices")
plt.show()

In [None]:
importance = best_model.feature_importances_

plt.figure(figsize=(10,5))
plt.barh(features, importance)
plt.title("Feature Importance")
plt.show()

In [None]:
last_row = X.iloc[-1].values.tolist()

future_prices = []

for i in range(6):
    pred = best_model.predict([last_row])[0]
    future_prices.append(pred)

    # Update lags dynamically
    last_row[3] = last_row[2]
    last_row[2] = last_row[1]
    last_row[1] = pred

future_prices

In [None]:
slope = np.polyfit(range(len(future_prices)), future_prices, 1)[0]

if slope > 0:
    trend = "Upward ðŸ“ˆ"
elif slope < 0:
    trend = "Downward ðŸ“‰"
else:
    trend = "Stable âž–"

print("Predicted Trend:", trend)

In [None]:
plt.figure(figsize=(10,5))
plt.plot(range(1,7), future_prices, marker='o')
plt.title("6-Month Future Price Trend")
plt.xlabel("Future Months")
plt.ylabel("Predicted Price")
plt.show()

In [None]:
pickle.dump(best_model, open("rf_trend_model.pkl", "wb"))