In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt

# Step 1: Load and Preprocess the Data

# Load the CSV file
data = pd.read_csv(r'C:\Users\sarav\OneDrive\Desktop\SIH DATAS\tomato_whole_sail.csv', encoding='ISO-8859-1')

# Inspect the columns to see what they are named
print("Columns in the dataset:", data.columns)

# Clean up column names (remove leading/trailing spaces)
data.columns = data.columns.str.strip()

# Display the first few rows to understand the data structure
print("First few rows of the dataset:")
print(data.head())

# Assuming the first column is the date or month and the rest are the prices
# Adjust this according to the actual structure of your CSV
# Replace 'Date' with the correct column name if it's different
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'], format='%Y-%m')
    data.set_index('Date', inplace=True)
elif 'Month' in data.columns:  # If the column is named 'Month'
    data['Month'] = pd.to_datetime(data['Month'], format='%Y-%m')
    data.set_index('Month', inplace=True)
else:
    print("Date column not found. Please check the column names.")
    print(data.columns)

# Select the relevant columns (e.g., Zone-wise prices)
# Modify this part according to your dataset structure
prices = data.iloc[:, :].values  # Assuming all columns are prices

# Step 2: Feature Scaling
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(prices)

# Step 3: Prepare the Data for LSTM Model
X = []
y = []
n_future = 1   # Number of months we want to predict into the future
n_past = 12    # Number of past months we want to use to predict the future

for i in range(n_past, len(scaled_prices) - n_future + 1):
    X.append(scaled_prices[i - n_past:i, 0:data.shape[1]])
    y.append(scaled_prices[i + n_future - 1:i + n_future, 0])

X, y = np.array(X), np.array(y)

# Step 4: Build the LSTM Model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(n_past, data.shape[1])))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=data.shape[1]))

model.compile(optimizer='adam', loss='mean_squared_error')

# Step 5: Train the Model
model.fit(X, y, epochs=20, batch_size=32)

# Step 6: Make Predictions for the Future
predictions = model.predict(X)

# Inverse transform to get actual prices
predicted_prices = scaler.inverse_transform(predictions)

# Step 7: Visualize the Results
plt.figure(figsize=(10, 6))
plt.plot(data.index[-len(predicted_prices):], predicted_prices, color='red', label='Predicted Prices')
plt.plot(data.index, prices, color='blue', label='Actual Prices')
plt.title('Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


Columns in the dataset: Index(['Department of Consumer Affairs (Price Monitoring Division)',
       'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5',
       'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10',
       'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14',
       'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18',
       'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22',
       'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26',
       'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30',
       'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34',
       'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38',
       'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42',
       'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46',
       'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50',
       'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54',
       'Unnamed

ValueError: could not convert string to float: 'Zonewise Monthly Average Wholesale Prices of\xa0\xa0 Onion '