In [10]:
# importing the libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score


### MAE=Measures the average absolute difference between actual and predicted values.

## RMSE =Interpretable in the same unit as the output variable.

## R2=Represents the proportion of variance explained by the model.

In [11]:
pre=pd.read_csv('Prediction.csv')

Feature=pre[['Area harvested','Yield','Year']]
Target=pre['Production']

Feature_train,Feature_test,Target_train,Target_test=train_test_split(Feature,Target,test_size=0.2,random_state=42)

## Linear Regression

In [12]:
lr = LinearRegression()
lr.fit(Feature_train, Target_train)
y_pred_lr = lr.predict(Feature_test)

print("Linear Regression:")
print("R2 Score:", r2_score(Target_test, y_pred_lr))
print("MAE:", mean_absolute_error(Target_test, y_pred_lr))
print("MSE:", mean_squared_error(Target_test, y_pred_lr))

Linear Regression:
R2 Score: 0.56492334463544
MAE: 775200.9003662724
MSE: 32454962924921.99


## Random_Forest

In [13]:
RF_Model=RandomForestRegressor(n_estimators=100,random_state=42)
RF_Model.fit(Feature_train,Target_train)

Target_predict=RF_Model.predict(Feature_test)

print(f"The Mean Absolute Error: {mean_absolute_error(Target_test,Target_predict)}")
print(f"The Mean Square Error : {mean_squared_error(Target_test,Target_predict)}")
print(f"The Root Mean Square Error: {np.sqrt(mean_absolute_error(Target_test,Target_predict))}")
print(f"The R2 Score : {r2_score(Target_test,Target_predict)}")

The Mean Absolute Error: 239580.6109116484
The Mean Square Error : 3419431519988.837
The Root Mean Square Error: 489.4697242032937
The R2 Score : 0.9541606369291986


In [14]:
results = {
    'Model': ['Linear Regression', 'Random Forest'],
    'R2 Score': [r2_score(Target_test, y_pred_lr), r2_score(Target_test, Target_predict)],
    'MAE': [mean_absolute_error(Target_test, y_pred_lr), mean_absolute_error(Target_test, Target_predict)],
    'MSE': [mean_squared_error(Target_test, y_pred_lr), mean_squared_error(Target_test, Target_predict)],
}

import pandas as pd
results_df = pd.DataFrame(results)
print(results_df)


               Model  R2 Score            MAE           MSE
0  Linear Regression  0.564923  775200.900366  3.245496e+13
1      Random Forest  0.954161  239580.610912  3.419432e+12


In [15]:
joblib.dump(RF_Model, 'random_forest_model.pkl')

['random_forest_model.pkl']