In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib

In [None]:
# Load dataset
data = pd.read_csv('dataset.csv')

In [None]:
X = data[['Commodity']]
y = data['Average']  # Predicting the average price

In [None]:
# Encode the 'Commodity' column using one-hot encoding
X_encoded = pd.get_dummies(X, columns=['Commodity'], drop_first=True)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [None]:
# Create a linear regression model
model = LinearRegression()

In [None]:
# Fit the model on the training data
model.fit(X_train, y_train)

In [None]:
joblib.dump(model, 'trained_model.joblib')

In [None]:
# Predict the price for new data 
vegetable_name = "Onion Green"

In [None]:
# Create a DataFrame with the same columns as X_encoded
input_data_encoded = pd.DataFrame(columns=X_encoded.columns)

In [None]:
# Set the value to 1 for the commodity you're predicting
input_data_encoded.loc[0, f'Commodity_{vegetable_name}'] = 1

In [None]:
# Ensure that the input data does not contain missing values
input_data_encoded.fillna(0, inplace=True)

In [None]:
# Predict the price
predicted_price = model.predict(input_data_encoded)
print(f"Predicted average price of {vegetable_name} is: {predicted_price[0]:.2f}")