In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

# Load the dataset
fish_data = pd.read_csv('Fish.csv')
fish_data.head()


Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [18]:

# Preprocessing the data
# We're now predicting weight, so we drop it from the features and use it as the target variable
X = fish_data.drop(['Species', 'Weight'], axis=1)  # Features
y = fish_data['Weight']  # Target variable (Weight)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building the RandomForest Regressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Making predictions
y_pred = regressor.predict(X_test)

# Evaluating the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5  # Root Mean Squared Error
print(f"Model RMSE: {rmse}")

# Save the trained model
model_filename = 'fish_weight_regressor.pkl'
joblib.dump(regressor, model_filename)

Model RMSE: 67.3207305924408


['fish_weight_regressor.pkl']