In [1]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.tree import DecisionTreeRegressor
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
import numpy as np

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

#import lightgbm as lgb
import xgboost as xgb
from pandas import DataFrame
import math
import matplotlib.pyplot as plt


2022-10-10 01:03:26.321970: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
#cutdown_rows = 1000
cutdown_rows = 0

LABEL = 'Price'

floats = ['location.latitude', 'location.longitude', 'bedrooms', 'bathrooms',
          'nearestStation', 'nearestTram', 'nearestUnderground', 'nearestOverground',
          ]

categories = ['tenure.tenureType',
              'analyticsProperty.soldSTC',
              'analyticsProperty.preOwned',
              #'sharedOwnership.sharedOwnership',
              #
              'analyticsProperty.propertyType',  # 'propertyType',
              #'analyticsProperty.propertySubType',
              'borough',
              ]
custom = [
    'sharedOwnership.sharedOwnership',
    'analyticsProperty.priceQualifier',
    'keyFeatures'
]
#categories = []

features = floats.copy()
features.extend(categories)
features.extend(custom)
features.insert(0, LABEL)
features

['Price',
 'location.latitude',
 'location.longitude',
 'bedrooms',
 'bathrooms',
 'nearestStation',
 'nearestTram',
 'nearestUnderground',
 'nearestOverground',
 'tenure.tenureType',
 'analyticsProperty.soldSTC',
 'analyticsProperty.preOwned',
 'analyticsProperty.propertyType',
 'borough',
 'sharedOwnership.sharedOwnership',
 'analyticsProperty.priceQualifier',
 'keyFeatures']

In [3]:
def get_source_dataframe(rows=cutdown_rows):
    try:
        df = pd.read_csv('../data/source/df_listings.csv', on_bad_lines='skip', index_col=0)
    except:
        df = pd.read_csv('https://raw.githubusercontent.com/jayportfolio/capstone_streamlit/main/data/final/df_listings.csv', on_bad_lines='skip', index_col=0)
    df = df[features]

    if rows and rows > 0:
        df = df[:rows]
    return df


def create_train_test_data(df_orig, return_index=False, drop_nulls=True):
    df = df_orig.copy()

    if drop_nulls:
        df.dropna(inplace=True)

    if return_index:
        df.reset_index(inplace=True)

    for column in categories:
        df = pd.concat([df, pd.get_dummies(df[column], prefix=column)], axis=1)
        df.drop([column], axis=1, inplace=True)  # now drop the original column (you don't need it anymore),

    ins = df.pop('index')
    df.insert(1, 'index2', ins)
    df.insert(0, 'index', ins)

    #features = df[df.columns[1:]].values
    features = df[df.columns[2:]].values
    #labels = df[LABEL].values
    labels = df.iloc[:, 0:2].values

    if not return_index:
        return train_test_split(features, labels, train_size=0.9, random_state=RANDOM_STATE)
    else:
        X_train1, X_test1, y_train1, y_test1 = train_test_split(features, labels, train_size=0.9, random_state=RANDOM_STATE)
        X_train_index = X_train1[:, 0].reshape(-1, 1)
        y_train_index = y_train1[:, 0].reshape(-1, 1)
        X_test_index = X_test1[:, 0].reshape(-1, 1)
        y_test_index = y_test1[:, 0].reshape(-1, 1)
        #X_train1 = X_train1[:,3:]
        X_train1 = X_train1[:, 1:]
        y_train1 = y_train1[:, 1].reshape(-1, 1)
        #X_test1 = X_test1[:,3:]
        X_test1 = X_test1[:, 1:]
        y_test1 = y_test1[:, 1].reshape(-1, 1)

        # X_train_index = pd.to_numeric(X_train_index, 'coerce').astype(int)
        # y_train_index = pd.to_numeric(y_train_index, 'coerce').astype(int)
        # X_test_index = pd.to_numeric(X_test_index, 'coerce').astype(int)
        # y_test_index = pd.to_numeric(y_test_index, 'coerce').astype(int)

        return X_train1, X_test1, y_train1, y_test1, X_train_index, X_test_index, y_train_index, y_test_index


X_train, X_test, y_train, y_test, X_train_index, X_test_index, y_train_index, y_test_index = create_train_test_data(
    get_source_dataframe(), return_index=True, drop_nulls=False)
X_train.shape, X_test.shape, y_train.shape, y_test.shape, X_train_index.shape, X_test_index.shape, y_train_index.shape, y_test_index.shape,
X_train[0]

array([51.467869, -0.086037, 1.0, 1.0, 0.1523658055113279, nan, nan,
       0.1523658055113279, False, 'Offers Over',
       "['Bright One Bedroom Top Floor Flat', 'Scope For Further Upgrade To Taste','Light And Spacious Reception Room', 'Separate Well Equipped Kitchen','Bathroom With White Suite', 'Gas Central Heating', 'Close To Denmark HillStation And Bus Routes', 'Easy Access Shops And Amenities', 'No Onward Chain','Convenient Denmark Hill Location & Camberwell Grove Conservation Area']",
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0], dtype=object)

In [4]:
df = get_source_dataframe()
df_orig = df.copy()
print(df.shape)
df[:5]

(26633, 17)


Unnamed: 0,Price,location.latitude,location.longitude,bedrooms,bathrooms,nearestStation,nearestTram,nearestUnderground,nearestOverground,tenure.tenureType,analyticsProperty.soldSTC,analyticsProperty.preOwned,analyticsProperty.propertyType,borough,sharedOwnership.sharedOwnership,analyticsProperty.priceQualifier,keyFeatures
14520525,550000,51.52995,-0.20702,3.0,1.0,0.274316,,0.274316,0.274316,LEASEHOLD,False,Resale,Flats / Apartments,Westminster,False,,"['Maisonette', 'Private Balcony', 'Spacious / ..."
27953107,400000,51.54939,-0.4826,2.0,2.0,0.305845,,0.305845,,LEASEHOLD,False,Resale,Flats / Apartments,Hillingdon,False,Offers in Excess of,"['TWO DOUBLE BEDROOMS', 'LOUNGE AREA', 'MODERN..."
33593487,579950,51.44718,-0.33877,2.0,1.0,0.438045,,,0.438045,FREEHOLD,False,Resale,Houses,Richmond upon Thames,False,Offers in Excess of,"['Mid Terrace Victorian Home', '2 Double Bedro..."
35271294,370000,51.449568,-0.140154,2.0,1.0,0.399307,,0.399307,0.704048,LEASEHOLD,False,Resale,Flats / Apartments,Lambeth,False,Offers in Region of,"['2 bedroom second floor flat', 'Great locatio..."
35429088,599950,51.57703,-0.14123,2.0,1.0,0.238187,,0.238187,1.054653,,False,Resale,Flats / Apartments,Haringey,False,,[]


In [5]:
if 'nearestTram' in features:
    df['nearestTram'] = df['nearestTram'].fillna(99)
    df['nearestOverground'] = df['nearestOverground'].fillna(99)
    df['nearestUnderground'] = df['nearestUnderground'].fillna(99)
    df['nearestStation'] = df['nearestStation'].fillna(99)

    imputer = SimpleImputer(strategy='constant', fill_value=99)
    imputer.fit(df['nearestTram'].values.reshape(-1, 1))
    df['nearestTram'] = imputer.transform(df['nearestTram'].values.reshape(-1, 1))
    df['nearestUnderground'] = imputer.transform(df['nearestUnderground'].values.reshape(-1, 1))
    df['nearestOverground'] = imputer.transform(df['nearestOverground'].values.reshape(-1, 1))
    df['nearestStation'] = imputer.transform(df['nearestStation'].values.reshape(-1, 1))

if 'keyFeatures' in features:
    df['keyFeatures'] = df['keyFeatures'].str.lower()

if 'analyticsProperty.priceQualifier' in features:

    if 'keyFeatures' in features:

        # df[df['keyFeatures'].str.contains('shared ownership')]

        df['sharedOwnership'] = (
                (df['sharedOwnership.sharedOwnership'] == True) |
                (df['analyticsProperty.priceQualifier'] == 'Shared ownership') |
                (df['keyFeatures'].str.contains('shared ownership'))
        )

        df.drop(['keyFeatures'], axis=1, inplace=True)
    else:
        df['sharedOwnership'] = (
                (df['sharedOwnership.sharedOwnership'] == True) |
                (df['analyticsProperty.priceQualifier'] == 'Shared ownership')
        )

    df['sharedOwnership'] = pd.to_numeric(df['sharedOwnership'], 'coerce').dropna().astype(int)
    df.drop(['sharedOwnership.sharedOwnership'], axis=1, inplace=True)

    if 'analyticsProperty.priceQualifier' not in categories:
        df.drop(['analyticsProperty.priceQualifier'], axis=1, inplace=True)

    #df.drop(['shared_ownership'], axis=1, inplace=True)

df

Unnamed: 0,Price,location.latitude,location.longitude,bedrooms,bathrooms,nearestStation,nearestTram,nearestUnderground,nearestOverground,tenure.tenureType,analyticsProperty.soldSTC,analyticsProperty.preOwned,analyticsProperty.propertyType,borough,sharedOwnership
14520525,550000,51.529950,-0.207020,3.0,1.0,0.274316,99.000000,0.274316,0.274316,LEASEHOLD,False,Resale,Flats / Apartments,Westminster,0
27953107,400000,51.549390,-0.482600,2.0,2.0,0.305845,99.000000,0.305845,99.000000,LEASEHOLD,False,Resale,Flats / Apartments,Hillingdon,0
33593487,579950,51.447180,-0.338770,2.0,1.0,0.438045,99.000000,99.000000,0.438045,FREEHOLD,False,Resale,Houses,Richmond upon Thames,0
35271294,370000,51.449568,-0.140154,2.0,1.0,0.399307,99.000000,0.399307,0.704048,LEASEHOLD,False,Resale,Flats / Apartments,Lambeth,0
35429088,599950,51.577030,-0.141230,2.0,1.0,0.238187,99.000000,0.238187,1.054653,,False,Resale,Flats / Apartments,Haringey,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121677479,450000,51.483748,-0.388508,3.0,2.0,0.709062,99.000000,0.709062,99.000000,FREEHOLD,False,Resale,Houses,Hounslow,0
121677749,300000,51.532990,0.036340,2.0,1.0,0.178077,99.000000,0.178077,99.000000,,True,Resale,Flats / Apartments,Newham,0
121678004,475000,51.393990,0.045193,2.0,2.0,0.444410,99.000000,99.000000,0.444410,LEASEHOLD,False,Resale,Flats / Apartments,Bromley,0
121678103,200000,51.410742,-0.225795,,1.0,0.205738,0.917064,99.000000,0.205738,LEASEHOLD,True,Resale,Flats / Apartments,Merton,0


In [6]:
#df[df['keyFeatures'].str.contains('shared ownership')]
#df['keyFeatures'] = df['keyFeatures'].apply(lambda x: x.astype(str).str.upper())
#df

In [7]:

df

Unnamed: 0,Price,location.latitude,location.longitude,bedrooms,bathrooms,nearestStation,nearestTram,nearestUnderground,nearestOverground,tenure.tenureType,analyticsProperty.soldSTC,analyticsProperty.preOwned,analyticsProperty.propertyType,borough,sharedOwnership
14520525,550000,51.529950,-0.207020,3.0,1.0,0.274316,99.000000,0.274316,0.274316,LEASEHOLD,False,Resale,Flats / Apartments,Westminster,0
27953107,400000,51.549390,-0.482600,2.0,2.0,0.305845,99.000000,0.305845,99.000000,LEASEHOLD,False,Resale,Flats / Apartments,Hillingdon,0
33593487,579950,51.447180,-0.338770,2.0,1.0,0.438045,99.000000,99.000000,0.438045,FREEHOLD,False,Resale,Houses,Richmond upon Thames,0
35271294,370000,51.449568,-0.140154,2.0,1.0,0.399307,99.000000,0.399307,0.704048,LEASEHOLD,False,Resale,Flats / Apartments,Lambeth,0
35429088,599950,51.577030,-0.141230,2.0,1.0,0.238187,99.000000,0.238187,1.054653,,False,Resale,Flats / Apartments,Haringey,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121677479,450000,51.483748,-0.388508,3.0,2.0,0.709062,99.000000,0.709062,99.000000,FREEHOLD,False,Resale,Houses,Hounslow,0
121677749,300000,51.532990,0.036340,2.0,1.0,0.178077,99.000000,0.178077,99.000000,,True,Resale,Flats / Apartments,Newham,0
121678004,475000,51.393990,0.045193,2.0,2.0,0.444410,99.000000,99.000000,0.444410,LEASEHOLD,False,Resale,Flats / Apartments,Bromley,0
121678103,200000,51.410742,-0.225795,,1.0,0.205738,0.917064,99.000000,0.205738,LEASEHOLD,True,Resale,Flats / Apartments,Merton,0


In [8]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26633 entries, 14520525 to 121678256
Data columns (total 15 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Price                           26633 non-null  int64  
 1   location.latitude               26628 non-null  float64
 2   location.longitude              26633 non-null  float64
 3   bedrooms                        25670 non-null  float64
 4   bathrooms                       24915 non-null  float64
 5   nearestStation                  26633 non-null  float64
 6   nearestTram                     26633 non-null  float64
 7   nearestUnderground              26633 non-null  float64
 8   nearestOverground               26633 non-null  float64
 9   tenure.tenureType               24823 non-null  object 
 10  analyticsProperty.soldSTC       26633 non-null  bool   
 11  analyticsProperty.preOwned      26633 non-null  object 
 12  analyticsProperty.pro

Unnamed: 0,Price,location.latitude,location.longitude,bedrooms,bathrooms,nearestStation,nearestTram,nearestUnderground,nearestOverground,sharedOwnership
count,26633.0,26628.0,26633.0,25670.0,24915.0,26633.0,26633.0,26633.0,26633.0,26633.0
mean,410493.420719,51.497605,-0.109379,1.976237,1.180935,0.446379,92.810566,47.078052,11.806279,0.038073
std,113954.799595,0.075702,0.7243,0.828547,0.407462,1.259971,23.892593,49.146208,31.417627,0.191376
min,100000.0,51.29832,-0.497861,1.0,1.0,0.0,0.005282,0.006731,0.0,0.0
25%,325000.0,51.442726,-0.218288,1.0,1.0,0.220947,99.0,0.421345,0.273931,0.0
50%,415000.0,51.50105,-0.10618,2.0,1.0,0.359822,99.0,1.485177,0.460805,0.0
75%,500000.0,51.553926,-0.013215,2.0,1.0,0.556251,99.0,99.0,0.787954,0.0
max,600000.0,51.68306,51.558746,7.0,5.0,192.431869,99.0,99.0,192.431869,1.0


In [9]:
X_train, X_test, y_train, y_test, X_train_index, X_test_index, y_train_index, y_test_index = create_train_test_data(df,
                                                                                                                    return_index=True,
                                                                                                                    drop_nulls=True)
#X_train[:5]


print(X_train.shape, X_test.shape, y_train.shape, y_test.shape, X_train_index.shape, X_test_index.shape,
      y_train_index.shape, y_test_index.shape)
#X_train_index
print(type(X_train))
X_train[0]

(15826, 54) (6784, 54) (15826, 1) (6784, 1) (15826, 1) (6784, 1) (15826, 1) (6784, 1)
<class 'numpy.ndarray'>


array([ 5.14125350e+01, -6.63180000e-02,  2.00000000e+00,  1.00000000e+00,
        4.75393115e-02,  9.90000000e+01,  9.90000000e+01,  4.75393115e-02,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
        0.00000000e+00,  1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        1.00000000e+00,  0.00000000e+00,  1.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [10]:
#imputer = SimpleImputer(strategy='mean')
#imputer.fit(X_train[6])
#X_train[6] = imputer.transform(X_train[6])

In [11]:
from sklearn.tree import DecisionTreeRegressor

decision_tree_model = DecisionTreeRegressor()
decision_tree_model.fit(X_train, y_train)

model = decision_tree_model

In [12]:
result = model.predict(X_test)

In [13]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

result = decision_tree_model.predict(X_test)
R2 = r2_score(y_test,result)
MSE = mean_squared_error(y_test,result)
RMSE = math.sqrt(MSE)
print('-'*10+'XGB'+'-'*10)
print('R square Accuracy: ',R2)
print('Mean Squared Error Accuracy: ',MSE)
print('Root Mean Squared Error: ',RMSE)

----------XGB----------
R square Accuracy:  0.4321972175045714
Mean Squared Error Accuracy:  7160677775.06299
Root Mean Squared Error:  84620.78807871616


In [14]:
print(result)

[400000. 375000. 425000. ... 550000. 475000. 325000.]


In [15]:
result = result.reshape((-1, 1))

print(y_test_index.reshape((-1, 1)).shape)
print(result.reshape((-1, 1)).shape)
print(y_test.shape)

print(y_test_index.shape)
print(result.shape)
print(y_test.shape)

(6784, 1)
(6784, 1)
(6784, 1)
(6784, 1)
(6784, 1)
(6784, 1)


In [16]:
compare = np.hstack((y_test_index, y_test, result))
#compare[0:4]

In [17]:
compare_df = DataFrame(compare, columns=['reference', 'actual', 'predicted'])
compare_df['difference'] = abs(compare_df['actual'] - compare_df['predicted'])
compare_df['diff 1 %'] = abs((compare_df['actual'] - compare_df['predicted']) / compare_df['actual'] * 100)
compare_df['diff 2 %'] = abs((compare_df['actual'] - compare_df['predicted']) / compare_df['predicted']) * 100
compare_df['reference'] = compare_df['reference'].astype(int)
compare_df.set_index('reference', inplace=True)
compare_df

Unnamed: 0_level_0,actual,predicted,difference,diff 1 %,diff 2 %
reference,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
85649265,380000.0,400000.0,20000.0,5.263158,5.000000
116387315,515000.0,375000.0,140000.0,27.184466,37.333333
116397896,425000.0,425000.0,0.0,0.000000,0.000000
119980703,240000.0,270000.0,30000.0,12.500000,11.111111
112424627,400000.0,338000.0,62000.0,15.500000,18.343195
...,...,...,...,...,...
119998832,400000.0,250000.0,150000.0,37.500000,60.000000
114708059,375000.0,375000.0,0.0,0.000000,0.000000
87572395,499950.0,550000.0,50050.0,10.011001,9.100000
85679046,550000.0,475000.0,75000.0,13.636364,15.789474


In [18]:
compare_df.join(df_orig)
# 85514838
# 115470422

Unnamed: 0_level_0,actual,predicted,difference,diff 1 %,diff 2 %,Price,location.latitude,location.longitude,bedrooms,bathrooms,...,nearestUnderground,nearestOverground,tenure.tenureType,analyticsProperty.soldSTC,analyticsProperty.preOwned,analyticsProperty.propertyType,borough,sharedOwnership.sharedOwnership,analyticsProperty.priceQualifier,keyFeatures
reference,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85649265,380000.0,400000.0,20000.0,5.263158,5.000000,380000,51.379815,0.121288,3.0,1.0,...,,1.219964,FREEHOLD,True,Resale,Houses,Bromley,False,,"['3 Bedroom Semi-Detached House', 'Well Locate..."
116387315,515000.0,375000.0,140000.0,27.184466,37.333333,515000,51.413710,-0.306050,2.0,1.0,...,,0.188849,LEASEHOLD,True,Resale,Flats / Apartments,Kingston upon Thames,False,,"['2 Bedroom Apartment', 'Open Plan Living/Dini..."
116397896,425000.0,425000.0,0.0,0.000000,0.000000,425000,51.539220,-0.314613,2.0,1.0,...,0.419789,,LEASEHOLD,False,Resale,Flats / Apartments,Ealing,False,,"['Double Glazed', 'Wood Floors', 'Secured buil..."
119980703,240000.0,270000.0,30000.0,12.500000,11.111111,240000,51.483145,0.141816,2.0,1.0,...,,0.803049,LEASEHOLD,True,Resale,Flats / Apartments,Bexley,False,,"['Two double bedrooms', 'Second floor', '88 ye..."
112424627,400000.0,338000.0,62000.0,15.500000,18.343195,400000,51.548604,-0.007113,1.0,1.0,...,0.487875,0.273223,LEASEHOLD,True,Resale,Flats / Apartments,Newham,False,Guide Price,"['Stylish 1 Bedroom Apartment', 'Sleek And Mod..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119998832,400000.0,250000.0,150000.0,37.500000,60.000000,400000,51.481892,-0.035423,1.0,1.0,...,,1.207218,LEASEHOLD,False,Resale,Flats / Apartments,Lewisham,False,,[]
114708059,375000.0,375000.0,0.0,0.000000,0.000000,375000,51.485806,0.020277,2.0,1.0,...,,0.132331,LEASEHOLD,False,Resale,Flats / Apartments,Greenwich,False,Guide Price,['Beautiful Three Bedroom First Floor Maisonet...
87572395,499950.0,550000.0,50050.0,10.011001,9.100000,499950,51.465090,-0.080560,2.0,1.0,...,,0.242780,SHARE_OF_FREEHOLD,False,Resale,Flats / Apartments,Southwark,False,Offers in Excess of,"['Two bedrooms ', 'Private garden ', 'Victoria..."
85679046,550000.0,475000.0,75000.0,13.636364,15.789474,550000,51.405561,-0.022164,3.0,2.0,...,,0.421950,SHARE_OF_FREEHOLD,True,Resale,Flats / Apartments,Bromley,False,,"['Three Bedrooms', 'Central Heating & Double G..."


In [19]:
model.score(X_test, y_test)

0.4321972175045714