# Abalone Age Prediction using Linear Regression

In [1]:
###! METADATA !###

#     Author  :  Abu Bakar Siddique Arman
#      Email  :  abubakar.arman.cse@gmail.com
#     Github  :  https://github.com/abubakar-arman
#   LinkedIn  :  https://www.linkedin.com/in/abubakar-arman/
#    Youtube  :  https://www.youtube.com/@arman-bhaai
#   Facebook  :  https://www.facebook.com/arman.bhaai/

#    Dataset  :  https://www.kaggle.com/datasets/rodolfomendes/abalone-dataset

# Import Packages

In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load Dataset

In [3]:
# df = pd.read_csv('dataset/abalone.csv')
df = pd.read_csv('https://raw.githubusercontent.com/arman-bhaai/dataset/refs/heads/main/abalone.csv')
df

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


# Handle Null Values

In [4]:
df.isnull().sum()

Unnamed: 0,0
Sex,0
Length,0
Diameter,0
Height,0
Whole weight,0
Shucked weight,0
Viscera weight,0
Shell weight,0
Rings,0


# Convert Categorical Values into Numerical Values

In [5]:
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,2,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,2,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,0,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,2,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,1,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,0,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,2,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,2,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,0,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


# Separate Input Features and Target Variables

In [6]:
x = df.drop('Rings', axis=1)
y = df['Rings']
x, y

(      Sex  Length  Diameter  Height  Whole weight  Shucked weight  \
 0       2   0.455     0.365   0.095        0.5140          0.2245   
 1       2   0.350     0.265   0.090        0.2255          0.0995   
 2       0   0.530     0.420   0.135        0.6770          0.2565   
 3       2   0.440     0.365   0.125        0.5160          0.2155   
 4       1   0.330     0.255   0.080        0.2050          0.0895   
 ...   ...     ...       ...     ...           ...             ...   
 4172    0   0.565     0.450   0.165        0.8870          0.3700   
 4173    2   0.590     0.440   0.135        0.9660          0.4390   
 4174    2   0.600     0.475   0.205        1.1760          0.5255   
 4175    0   0.625     0.485   0.150        1.0945          0.5310   
 4176    2   0.710     0.555   0.195        1.9485          0.9455   
 
       Viscera weight  Shell weight  
 0             0.1010        0.1500  
 1             0.0485        0.0700  
 2             0.1415        0.2100  
 3    

# Split Train and Test Dataset

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# x_train, x_test, y_train, y_test


In [8]:
lr = LinearRegression()
lr

# Train Model

In [9]:
lr.fit(x_train, y_train)

# Model Evaluation

In [10]:
lr.score(x_test, y_test)

0.5323381317508205