In [4]:
# Using Theil-Sen Regression

import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_percentage_error

# Load data
train_df = pd.read_csv('/kaggle/input/ml-datasets/train.csv')
test_df = pd.read_csv('/kaggle/input/ml-datasets/test.csv')

# Replace NaN values with 0
train_df.fillna(0, inplace=True)
test_df.fillna(0, inplace=True)

# Select features
X_train = train_df[['PRODUCT_TYPE_ID']]
X_test = test_df[['PRODUCT_TYPE_ID']]
y_train = train_df['PRODUCT_LENGTH']
y_test = test_df[['PRODUCT_TYPE_ID']]

# Train Theil-Sen Regression model
model = sm.RLM(y_train, sm.add_constant(X_train), M=sm.robust.norms.TukeyBiweight())
result = model.fit()

# Make predictions on test data
y_pred = result.predict(sm.add_constant(X_test))
score = max(0, 100*(1-mean_absolute_percentage_error(y_test, y_pred)))
print(score)

# Create submission file
sub_df = pd.DataFrame({'PRODUCT_ID': test_df['PRODUCT_ID'], 'PRODUCT_LENGTH': y_pred})
sub_df.set_index('PRODUCT_ID', inplace=True)
sub_df.to_csv('Theil-Sen Regression Result Submission.csv')


0
