In [4]:
import pandas as pd
import numpy as np
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_absolute_percentage_error

# Load data
train_df = pd.read_csv('/kaggle/input/ml-datasets/train.csv')
test_df = pd.read_csv('/kaggle/input/ml-datasets/test.csv')

# Combine train and test data
full_df = pd.concat([train_df, test_df], axis=0, ignore_index=True)

# Replace NaN values with 0
full_df.fillna(0, inplace=True)

# Select features
X_train = train_df[['PRODUCT_TYPE_ID']]
X_test = test_df[['PRODUCT_TYPE_ID']]
y_train = train_df['PRODUCT_LENGTH']
y_test = test_df[['PRODUCT_TYPE_ID']]

# Train PLS Regression model
n_components = 1  # Choose the number of components
pls = PLSRegression(n_components=n_components)
pls.fit(X_train, y_train)

# Make predictions on test data
y_pred = pls.predict(X_test)
score = max(0, 100*(1-mean_absolute_percentage_error(y_test, y_pred)))
print(score)

# Create submission file
sub_df = pd.DataFrame({'PRODUCT_ID': test_df['PRODUCT_ID'], 'PRODUCT_LENGTH': y_pred.flatten()})
sub_df.set_index('PRODUCT_ID', inplace=True)
sub_df.to_csv('PLS Regression Result Submission.csv')


0
