In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Download the Abalone dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
column_names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight', 'Viscera_weight', 'Shell_weight', 'Rings']
abalone_data = pd.read_csv(url, names=column_names)

# Convert 'Sex' to numerical values using LabelEncoder
le = LabelEncoder()
abalone_data['Sex'] = le.fit_transform(abalone_data['Sex'])

# Split the dataset into features (X) and target variable (y)
X = abalone_data.drop('Rings', axis=1)
y = abalone_data['Rings']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear regression model
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Predict the number of rings
y_pred = linear_reg.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("a) Predict the number of rings either as a continuous value or as a classification problem.")
print("b) Predict the age of abalone from physical measurements using linear regression using python.")
print("\nMean Squared Error:", mse)
print("R-squared Score:", r2)


a) Predict the number of rings either as a continuous value or as a classification problem.
b) Predict the age of abalone from physical measurements using linear regression using python.

Mean Squared Error: 5.062537954095235
R-squared Score: 0.5323381317508207
