In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
import joblib

In [13]:
df = pd.read_csv('fruit_vegetable_freshness_dataset.csv')

In [9]:
df.shape

(100, 6)

In [10]:
df.head()

Unnamed: 0,item,color_score,shape_score,wrinkle_score,mark_score,freshness_score
0,Apple,16.18987,16.640118,-1167.152144,85.278758,10
1,Banana,29.197536,12.398433,-1451.876934,81.03326,5
2,Tomato,29.190293,10.794806,-1409.98124,80.442877,2
3,Potato,9.411628,18.285855,-1134.794454,95.33217,9
4,Carrot,20.703719,13.238283,-1164.451912,98.568411,3


In [14]:
df.value_counts()

item    color_score  shape_score  wrinkle_score  mark_score  freshness_score
Apple   10.19        17.44        -1410.75       82.79       4                  1
Potato  14.66        19.24        -1433.30       76.32       1                  1
        13.65        16.00        -1320.70       78.25       4                  1
        13.69        15.51        -1407.88       79.02       7                  1
        13.83        19.00        -1485.49       75.32       4                  1
                                                                               ..
Banana  19.33        16.68        -1364.29       82.24       7                  1
                     16.87        -1376.20       72.19       1                  1
        19.39        15.42        -1399.35       80.88       7                  1
        19.49        17.66        -1461.12       72.05       4                  1
Tomato  39.96        11.71        -1047.65       98.39       10                 1
Name: count, Length: 

In [15]:
label_encoder = LabelEncoder()
df['item_encoded'] = label_encoder.fit_transform(df['item'])

In [17]:
X = df[["color_score", "shape_score", "wrinkle_score", "mark_score", "item_encoded"]]
Y = df["freshness_score"]

In [18]:
joblib.dump(label_encoder, "label_encoder.pkl")

['label_encoder.pkl']

In [19]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=42)

In [20]:
model = LinearRegression()

In [21]:
model.fit(X_train, Y_train)

In [24]:
Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 4.962729252282254
R-squared: 0.5124433684522691


In [26]:
coefficients = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})

print(coefficients)

         Feature  Coefficient
0    color_score     0.146475
1    shape_score    -0.138801
2  wrinkle_score     0.005014
3     mark_score     0.042981
4   item_encoded     0.028261


In [None]:
new_item = "Banana"
encoded_item = label_encoder.transform([new_item])[0]

new_data = [[35.0, 12.0, -1100.0, 95.0, encoded_item]]

In [31]:
predicted_freshness = model.predict(new_data)

print(f"Predicted Freshness Score: {predicted_freshness[0]}")

Predicted Freshness Score: 9.815461966815313




In [38]:
import pickle
with open('regression_model.pkl','wb') as f:
    pickle.dump(model,f)

In [2]:
x = [39.10938145750104, 12.5, -1093.912836651054, 99.1455649882904]