In [1]:
# Cell 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pickle
import io

In [17]:
df = pd.read_csv('price_prediction.csv')

In [18]:
# Cell 3: Reshape the data for time series analysis
df_melted = pd.melt(df, id_vars=['Crop'], var_name='Year', value_name='Price Index')

In [19]:
# Cell 4: Convert 'Year' to numeric
df_melted['Year'] = df_melted['Year'].str.replace('-', '').astype(int)
df_melted

Unnamed: 0,Crop,Year,Price Index
0,Rice,2004,100
1,Wheat,2004,100
2,Coarse Cereals,2004,100
3,Pulses,2004,100
4,Vegetables,2004,100
...,...,...,...
91,"Eggs, Fish and Meat",2011,137
92,Oilseeds,2011,102
93,Sugarcane,2011,107
94,Fibers,2011,140


In [20]:
# Cell 5: One-hot encode the 'Crop' column
encoder = OneHotEncoder(sparse_output=False)
encoded_crops = encoder.fit_transform(df_melted[['Crop']])
encoded_df = pd.DataFrame(encoded_crops, columns=encoder.get_feature_names_out(['Crop']))
df_encoded = pd.concat([df_melted.drop('Crop', axis=1), encoded_df], axis=1)

In [21]:
# Cell 6: Split data into features (X) and target (y)
X = df_encoded.drop('Price Index', axis=1)
y = df_encoded['Price Index']

In [22]:
# Cell 7: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
# Cell 8: Scale the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [24]:
# Cell 9: Initialize Gradient Boosting Regressor
gbr = GradientBoostingRegressor(random_state=42)

In [25]:
# Cell 10: Train the model
gbr.fit(X_train, y_train)

In [26]:
# Cell 11: Make predictions on the test set
y_pred = gbr.predict(X_test)

In [27]:
# Cell 12: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 55.65365518402736
R-squared: 0.7189883478255098


In [28]:
# Cell 13: Store the trained model using pickle
with open('gradient_boosting_model_onehot.pkl', 'wb') as file:
    pickle.dump(gbr, file)

In [32]:
# Cell 14: Example Predictions: predicting rice price index for year 201213.
example_input = pd.DataFrame({
    'Year': [2010],
})

crop_feature = pd.DataFrame(encoder.transform([['Rice']]), columns=encoder.get_feature_names_out(['Crop']))

example_input = pd.concat([example_input, crop_feature], axis=1)

predicted_price_index = gbr.predict(example_input)
print(f"Predicted Rice Price Index for 2010: {predicted_price_index[0]}")

Predicted Rice Price Index for 2010: 117.67191673511438


