In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [4]:
df = pd.read_csv(r"C:\Users\yashs\OneDrive\Desktop\MIDS  code\abalone - abalone.csv")

In [5]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [6]:
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])  # 'M':0, 'F':1, 'I':2

In [7]:
X = df.drop("Rings", axis=1)
y = df["Rings"]

In [8]:
age = y + 1.5

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [11]:
y_pred = reg.predict(X_test)

In [12]:
print("Linear Regression Results for Ring Prediction:")
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

Linear Regression Results for Ring Prediction:
MSE: 5.0625379540952355
R² Score: 0.5323381317508206


In [13]:
df['AgeGroup'] = pd.cut(y, bins=[0, 9, 19, df['Rings'].max()], labels=['Young', 'Adult', 'Old'])

In [14]:
y_class = df['AgeGroup']
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X, y_class, test_size=0.2, random_state=42)

In [15]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_class_train, y_class_train)

In [16]:
y_class_pred = clf.predict(X_class_test)

In [18]:
print("\nClassification Results:")
print(confusion_matrix(y_class_test, y_class_pred))
print(classification_report(y_class_test, y_class_pred, zero_division=0))
    



Classification Results:
[[320   0  81]
 [ 14   0   0]
 [ 94   0 327]]
              precision    recall  f1-score   support

       Adult       0.75      0.80      0.77       401
         Old       0.00      0.00      0.00        14
       Young       0.80      0.78      0.79       421

    accuracy                           0.77       836
   macro avg       0.52      0.52      0.52       836
weighted avg       0.76      0.77      0.77       836



In [19]:
X_age_train, X_age_test, y_age_train, y_age_test = train_test_split(X, age, test_size=0.2, random_state=42)

In [20]:
age_model = LinearRegression()
age_model.fit(X_age_train, y_age_train)

In [21]:
age_pred = age_model.predict(X_age_test)

In [22]:
print("Age Prediction Using Linear Regression:")
print("MSE:", mean_squared_error(y_age_test, age_pred))
print("R² Score:", r2_score(y_age_test, age_pred))

Age Prediction Using Linear Regression:
MSE: 5.0625379540952355
R² Score: 0.5323381317508206
