In [41]:
import pandas as pd
import numpy as np

X = pd.read_csv("train.csv").drop(columns=["SampleID", "target_price"])
y = pd.read_csv("train.csv")["target_price"]

def features(df):
    df["width"] = df["canvas_size"].str.split("x").str[0].astype(int)
    df["height"] = df["canvas_size"].str.split("x").str[1].astype(int)
    df.drop(columns=["canvas_size"], inplace=True)
       
    quality_map = {"low": 0, "medium": 1, "high": 2}
    
    df["image_quality"] = df["image_quality"].map(quality_map).fillna(0).astype(int)
    df["area"] = df["width"] * df["height"]

    t1 = 2 * (df["stroke_density"] > 0.7).astype(int)
    
    t2 = 2 * (df["complexity"] > 0.65).astype(int)
    
    t3 = 1 * (df["uses_gold_leaf"] == True).astype(int)
    
    t4 = 1 * (df["has_signature"] == True).astype(int)
    
    t5 = 2 * ((df["num_colors"] > 65) & (df["colorfulness"] > 0.7)).astype(int)
    
    bad_lighting = (df["contrast"] < 0.4) | (df["brightness"] < 0.45) | (df["brightness"] > 0.75)
    t6 = 1 * bad_lighting.astype(int)
    
    df["AAS"] = t1 + t2 + t3 + t4 + t5 - t6

    return df

# Run it
features(X)

Unnamed: 0,is_oil_painting,brush_type,num_colors,colorfulness,complexity,brightness,contrast,stroke_density,has_signature,is_framed,...,auction_house,image_quality,brightness_log,complexity_x_stroke,fake_style_score,painter_style_score,width,height,area,AAS
0,True,medium,71,0.616240,0.755582,0.647338,0.587923,0.702118,True,True,...,Online,0,0.499161,0.530508,0.349718,0.536437,60,50,3000,5
1,True,medium,60,0.660715,0.474923,0.538822,0.599076,0.528112,True,False,...,Online,0,0.431017,0.250813,0.258722,0.163906,80,90,7200,1
2,True,fine,64,0.684877,0.380591,0.608029,0.500152,0.508521,True,False,...,Sothebys,0,0.475009,0.193539,0.797662,0.137732,80,50,4000,1
3,True,medium,56,0.427938,0.581636,0.562086,0.483896,0.550152,True,False,...,Local,0,0.446022,0.319988,0.569981,0.394542,80,50,4000,1
4,True,fine,55,0.481406,0.629780,0.476093,0.493429,0.681710,True,True,...,Local,1,0.389398,0.429328,0.723147,0.321858,80,130,10400,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
955,True,medium,67,0.761182,0.740730,0.747183,0.635812,0.871171,True,False,...,Local,0,0.558005,0.645302,0.356524,0.734686,60,90,5400,7
956,True,mixed,44,0.406706,0.574378,0.558835,0.566975,0.746025,False,True,...,Sothebys,1,0.443938,0.428500,0.619575,0.362523,100,70,7000,2
957,True,medium,62,0.594876,0.715957,0.601200,0.548615,0.725513,False,True,...,Sothebys,1,0.470754,0.519436,0.442087,0.956256,80,110,8800,5
958,True,mixed,64,0.599719,0.732753,0.500665,0.532287,0.674759,False,False,...,Local,2,0.405908,0.494432,0.328414,0.952113,80,50,4000,2


In [42]:
X.head()

Unnamed: 0,is_oil_painting,brush_type,num_colors,colorfulness,complexity,brightness,contrast,stroke_density,has_signature,is_framed,...,auction_house,image_quality,brightness_log,complexity_x_stroke,fake_style_score,painter_style_score,width,height,area,AAS
0,True,medium,71,0.61624,0.755582,0.647338,0.587923,0.702118,True,True,...,Online,0,0.499161,0.530508,0.349718,0.536437,60,50,3000,5
1,True,medium,60,0.660715,0.474923,0.538822,0.599076,0.528112,True,False,...,Online,0,0.431017,0.250813,0.258722,0.163906,80,90,7200,1
2,True,fine,64,0.684877,0.380591,0.608029,0.500152,0.508521,True,False,...,Sothebys,0,0.475009,0.193539,0.797662,0.137732,80,50,4000,1
3,True,medium,56,0.427938,0.581636,0.562086,0.483896,0.550152,True,False,...,Local,0,0.446022,0.319988,0.569981,0.394542,80,50,4000,1
4,True,fine,55,0.481406,0.62978,0.476093,0.493429,0.68171,True,True,...,Local,1,0.389398,0.429328,0.723147,0.321858,80,130,10400,1


In [43]:
from sklearn.cluster import KMeans
from sklearn.compose import TransformedTargetRegressor, make_column_selector, make_column_transformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from xgboost import XGBRegressor

pipeline = TransformedTargetRegressor(
    regressor=
        make_pipeline(
            make_column_transformer(
                (OneHotEncoder(), make_column_selector(dtype_include=object)),
                ("passthrough", make_column_selector(dtype_exclude=object))
            ),
            XGBRegressor(
                n_estimators=3000,
                n_jobs=-1,
                learning_rate=0.01,
                max_depth=5
            )
        ),
    func=np.log1p,
    inverse_func=np.expm1
)


pipeline2 = make_pipeline(
            make_column_transformer(
                (StandardScaler(), make_column_selector(dtype_include=[np.number]))
            ),
            KMeans(
                n_clusters=5,
                random_state=42
            )
        )

-cross_val_score(pipeline, X, y, scoring="neg_mean_absolute_error", cv=5)

array([1032.6262207 , 1148.67333984, 1282.8572998 , 1024.89831543,
       1337.66052246])

In [44]:
test = pd.read_csv("test.csv")
X_test = test.drop(columns=["SampleID"])

pipeline.fit(X,y)

features(X_test)

y_pred = pipeline.predict(X_test)

pd.concat([
    pd.DataFrame({
        "SampleID": test["SampleID"],
        "subtaskID": "Task1",
        "Answer": X_test["AAS"].map(lambda val : "Autentic" if val >= 5 else "Incert")
    }),
    pd.DataFrame({
        "SampleID": test["SampleID"],
        "subtaskID": "Task2", 
        "Answer": pipeline2.fit_predict(X_test)
    }),
    pd.DataFrame({
        "SampleID": test["SampleID"],
        "subtaskID": "Task3",
        "Answer": y_pred 
    }),
]).to_csv("submission.csv", index=None)