In [1]:
# import necessary libraries
import numpy as np
import pandas as pd  
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import accuracy_score, confusion_matrix  

In [2]:
# load the dataset from a CSV file into a pandas DataFrame
df = pd.read_csv('abalone.csv')

# add column names
df.columns = ['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 
              'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']

# test opening the file and column names
print(df.head())

  Sex  Length  Diameter  Height  Whole weight  Shucked weight  Viscera weight  \
0   M   0.350     0.265   0.090        0.2255          0.0995          0.0485   
1   F   0.530     0.420   0.135        0.6770          0.2565          0.1415   
2   M   0.440     0.365   0.125        0.5160          0.2155          0.1140   
3   I   0.330     0.255   0.080        0.2050          0.0895          0.0395   
4   I   0.425     0.300   0.095        0.3515          0.1410          0.0775   

   Shell weight  Rings  
0         0.070      7  
1         0.210      9  
2         0.155     10  
3         0.055      7  
4         0.120      8  


In [3]:
# get minimum and maximum Abalone rings from the dataset
min_rings, max_rings = min(df['Rings']), max(df['Rings'])

# print the minimum and maximum Abalone rings from the dataset
print(f'min: {min_rings}, max: {max_rings}')

# convert the numeric 'Rings' variable to a categorical representation by splitting the 
# values into five groups of rings.
for i in df.index:
    group = df.loc[i, 'Rings'] % 5
    df.loc[i, 'Rings'] = group

# test changed 'Rings' column
print(df.head())

min: 1, max: 29
  Sex  Length  Diameter  Height  Whole weight  Shucked weight  Viscera weight  \
0   M   0.350     0.265   0.090        0.2255          0.0995          0.0485   
1   F   0.530     0.420   0.135        0.6770          0.2565          0.1415   
2   M   0.440     0.365   0.125        0.5160          0.2155          0.1140   
3   I   0.330     0.255   0.080        0.2050          0.0895          0.0395   
4   I   0.425     0.300   0.095        0.3515          0.1410          0.0775   

   Shell weight  Rings  
0         0.070      2  
1         0.210      4  
2         0.155      0  
3         0.055      2  
4         0.120      3  


In [4]:
# convert all categorical variables in the dataset to numeric
for col in df.columns:
    labels, uniques = pd.factorize(df[col]) 
    df[col] = labels 

print(df.head())

y = df['Rings'] # define the target variable
X = df.drop(columns=['Rings', 'Sex']) # define the feature variables, 'Sex' has a low importance

# split the dataset into training and test subsets using 80-20 split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

   Sex  Length  Diameter  Height  Whole weight  Shucked weight  \
0    0       0         0       0             0               0   
1    1       1         1       1             1               1   
2    0       2         2       2             2               2   
3    2       3         3       3             3               3   
4    2       4         4       4             4               4   

   Viscera weight  Shell weight  Rings  
0               0             0      0  
1               1             1      1  
2               2             2      2  
3               3             3      0  
4               4             4      3  


In [5]:
# initialize a Random Forest Classifier
rfc = RandomForestClassifier(n_estimators=500, max_depth=10, random_state=42)

# train the Random Forest model on the training data
rfc.fit(X_train, y_train)

# evaluate the trained Random Forest model
predictions = rfc.predict(X_test)

# calculate the accuracy of the model
print("Accuracy score:", accuracy_score(y_test, predictions))

# calculate the confusion matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, predictions))

Accuracy score: 0.2834928229665072
Confusion Matrix:
 [[14 37 23 37 25]
 [ 4 71 25 40 31]
 [12 57 49 31 41]
 [16 59 18 54 16]
 [22 26 46 33 49]]
