# 範例 : 計程車費率預測
https://www.kaggle.com/c/new-york-city-taxi-fare-prediction

# [作業目標]
- 使用並觀察特徵組合, 在計程車費率預測競賽的影響

# [作業重點]
- 仿造範例並參考今日課程內容, 使用經緯度一圈的長度比的概念造出新特徵, 觀察有什麼影響 (In[6], Out[6])
- 只使用上面所造的這個新特徵, 觀察有什麼影響 (In[7], Out[7])

In [1]:
# 做完特徵工程前的所有準備
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor

import warnings
warnings.filterwarnings('ignore')

data_path = '../data/'
df = pd.read_csv(data_path + 'taxi_data1.csv')

train_Y = df['fare_amount']
df = df.drop(['fare_amount'] , axis=1)
df.head()

Unnamed: 0,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
0,2011-10-21 23:54:10 UTC,-73.99058,40.761071,-73.981128,40.758634,2
1,2015-02-03 10:42:03 UTC,-73.988403,40.723431,-73.989647,40.741695,1
2,2014-03-16 18:58:58 UTC,-74.015785,40.71511,-74.012029,40.707888,2
3,2009-06-13 16:10:54 UTC,-73.977322,40.787275,-73.95803,40.778838,3
4,2014-06-12 03:25:56 UTC,-73.989683,40.729717,-73.98249,40.761887,3


In [2]:
# 時間特徵分解方式:使用datetime
df['pickup_datetime'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S UTC'))
df['pickup_year'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strftime(x, '%Y')).astype('int64')
df['pickup_month'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strftime(x, '%m')).astype('int64')
df['pickup_day'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strftime(x, '%d')).astype('int64')
df['pickup_hour'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strftime(x, '%H')).astype('int64')
df['pickup_minute'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strftime(x, '%M')).astype('int64')
df['pickup_second'] = df['pickup_datetime'].apply(lambda x: datetime.datetime.strftime(x, '%S')).astype('int64')
df.head()

Unnamed: 0,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count,pickup_year,pickup_month,pickup_day,pickup_hour,pickup_minute,pickup_second
0,2011-10-21 23:54:10,-73.99058,40.761071,-73.981128,40.758634,2,2011,10,21,23,54,10
1,2015-02-03 10:42:03,-73.988403,40.723431,-73.989647,40.741695,1,2015,2,3,10,42,3
2,2014-03-16 18:58:58,-74.015785,40.71511,-74.012029,40.707888,2,2014,3,16,18,58,58
3,2009-06-13 16:10:54,-73.977322,40.787275,-73.95803,40.778838,3,2009,6,13,16,10,54
4,2014-06-12 03:25:56,-73.989683,40.729717,-73.98249,40.761887,3,2014,6,12,3,25,56


In [3]:
# 將結果使用線性迴歸 / 梯度提升樹分別看結果
df = df.drop(['pickup_datetime'] , axis=1)
scaler = MinMaxScaler()
train_X = scaler.fit_transform(df)
Linear = LinearRegression()
print(f'Linear Reg Score : {cross_val_score(Linear, train_X, train_Y, cv=5).mean()}')
GDBT = GradientBoostingRegressor()
print(f'Gradient Boosting Reg Score : {cross_val_score(GDBT, train_X, train_Y, cv=5).mean()}')

Linear Reg Score : 0.026876871475641616
Gradient Boosting Reg Score : 0.7116501637791268


In [4]:
# 增加緯度差, 經度差, 座標距離等三個特徵
df['longitude_diff'] = df['dropoff_longitude'] - df['pickup_longitude']
df['latitude_diff'] = df['dropoff_latitude'] - df['pickup_latitude']
df['distance_2D'] = (df['longitude_diff']**2 + df['latitude_diff']**2)**0.5
df[['distance_2D', 'longitude_diff', 'latitude_diff', 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude']].head()

Unnamed: 0,distance_2D,longitude_diff,latitude_diff,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude
0,0.009761,0.009452,-0.002437,-73.99058,40.761071,-73.981128,40.758634
1,0.018307,-0.001244,0.018265,-73.988403,40.723431,-73.989647,40.741695
2,0.00814,0.003756,-0.007222,-74.015785,40.71511,-74.012029,40.707888
3,0.021056,0.019292,-0.008437,-73.977322,40.787275,-73.95803,40.778838
4,0.032964,0.007193,0.03217,-73.989683,40.729717,-73.98249,40.761887


In [5]:
# 結果 : 準確度上升
train_X = scaler.fit_transform(df)
print(f'Linear Reg Score : {cross_val_score(Linear, train_X, train_Y, cv=5).mean()}')
print(f'Gradient Boosting Reg Score : {cross_val_score(GDBT, train_X, train_Y, cv=5).mean()}')

Linear Reg Score : 0.027479693774541868
Gradient Boosting Reg Score : 0.8045463729988545


# 作業1
* 參考今日教材，試著使用經緯度一圈的長度比這一概念，組合出一個新特徵，再觀察原特徵加上新特徵是否提升了正確率?

In [30]:
for i in range(len( df)):
    t = df.loc[i, ["pickup_latitude","pickup_longitude"]]
    print(t["pickup_latitude"])
    print(t["pickup_longitude"])


40.761071
-73.99058000000002
40.72343063354492
-73.9884033203125
40.715109999999996
-74.015785
40.787275
-73.977322
40.729717
-73.989683
40.721804999999996
-73.99763100000001
40.789303000000004
-73.95474300000002
40.764668
-73.982279
40.769971999999996
-73.965039
40.734727
-73.98657
40.77404
-73.953742
40.76879
-73.982057
40.727033
-73.983368
40.728124
-74.005307
40.783983
-73.970473
40.714289
-73.951788
40.761494
-73.999353
40.730875
-74.001532
40.757627
-73.966994
40.803832
-73.96703199999997
40.731349
-73.98850300000002
40.810140000000004
-73.9585
40.761511999999996
-73.966323
40.767047999999996
-73.989962
40.781613
-73.95843
40.766
-73.979752
40.75905
-73.994117
40.780442
-73.980308
40.770263
-73.980436
40.769698
-73.95154000000002
40.778872
-73.956138
40.721392
-73.98824499999998
40.728907
-73.991792
40.782242
-73.979015
40.769675
-73.954822
40.719572
-73.991192
40.778162
-73.98218299999998
40.74077
-73.98186
40.740137
-73.995867
40.745531
-73.993543
40.780597
-73.95257
40.780448


40.733337
-73.867062
40.7594
-73.995457
40.750282287597656
-73.98795318603516
40.714278
-73.961434
40.730059000000004
-73.98343
40.726805999999996
-73.999902
40.757807
-73.986137
40.727102
-74.005612
40.76572
-73.992182
40.807656
-73.96442900000002
40.762646000000004
-74.001144
40.729865999999994
-73.993267
40.74195861816406
-74.00446319580078
40.774189
-73.873065
40.75918960571289
-73.97224426269531
40.781057000000004
-73.954459
40.802628999999996
-73.967287
40.77366
-73.870712
40.736629486083984
-73.9972915649414
40.745688
-73.944971
40.75395
-73.988682
40.774135
-73.954577
40.778937
-73.981815
40.75354
-73.98345
40.73284
-74.003888
40.793073
-73.940612
40.75736
-73.975841
40.762062
-73.969938
40.75546
-73.964977
40.744957
-74.008225
40.745413
-74.008452
40.75523
-73.98013
40.725921630859375
-73.98320770263672
40.752935
-73.974715
40.771815000000004
-73.956331
40.766510009765625
-73.98204040527344
40.744274
-73.97931
40.749024
-73.97992099999998
40.728752
-73.992367
40.762622
-73.968

40.753097
-73.978033
40.742082
-73.97466
40.78008300000001
-73.984478
40.755965
-73.973293
40.75796
-73.972815
40.75957800000001
-73.965417
40.766942
-73.956723
40.787603000000004
-73.974885
40.711017
-73.951015
40.756322
-73.940252
40.740898
-73.994105
40.742242
-73.974272
40.73881149291992
-73.9960708618164
40.70501327514648
-74.0106201171875
40.731725
-74.001037
40.776302
-73.953017
40.77101500000001
-73.979702
40.71374
-73.959412
40.764627000000004
-73.97271500000002
40.710384999999995
-74.007458
40.779576
-73.95782
40.750657000000004
-73.994497
40.764233000000004
-73.966645
40.738269
-74.008282
40.75639
-73.969971
40.76869
-73.987462
0.0
0.0
40.757065000000004
-73.96691700000002
40.740477
-73.996558
40.759774
-73.983903
40.665543
-73.78975799999998
40.79629
-73.97086
40.765732
-73.957463
40.773609
-73.870814
40.777891
-73.951916
40.744534
-73.983273
40.766428999999995
-73.921789
40.757507000000004
-73.989722
40.807732
-73.964338
40.76664
-73.98180500000002
40.736332
-73.994328
40.

40.753632
-73.978838
40.72053
-74.010212
40.775957
-73.947125
40.791554999999995
-73.95311
40.771829
-73.964938
40.78615
-73.952955
40.761053000000004
-73.975227
40.741351
-73.993853
40.779659
-73.97713
40.755316
-73.96513399999998
40.769076
-73.862677
40.719747
-73.99000500000002
40.751636
-73.980325
40.739588
-74.00223100000002
40.774296
-73.98917800000002
40.740284
-73.976112
40.74322
-73.99632700000002
40.76390900000001
-73.973338
40.729921000000004
-73.98683299999998
40.758663
-73.97225999999998
40.764058
-73.998514
40.717737
-74.000106
40.735834000000004
-74.001327
40.720861
-74.000861
40.76077651977539
-73.97310638427734
0.0
0.0
40.712548
-74.009567
40.782001
-73.957875
40.737164
-73.975112
40.741045
-74.00151
40.759625
-73.974497
40.787227
-73.968012
40.704758
-74.01674799999998
40.77410507202149
-73.87461853027344
40.737787
-74.001735
40.766059999999996
-73.954325
40.750165
-73.97480999999998
40.720287
-74.005167
40.744341
-73.988975
40.769056
-73.985395
40.767868
-73.98248000

40.725636
-74.005427
40.757049
-73.989915
40.78086471557617
-73.94967651367188
40.760589599609375
-73.99800872802734
40.747257
-73.981137
40.724452
-73.98472
40.75169372558594
-73.98087310791014
40.728515
-73.987767
40.772606
-73.982663
40.761983
-73.979502
40.764129
-73.961826
40.646122999999996
-73.776878
40.730275
-73.980464
40.76441
-73.964688
40.780864
-73.949663
40.748676
-73.973423
40.775293
-73.954842
40.734996
-73.990459
40.747067
-73.995195
40.781746999999996
-73.958239
40.752237
-73.993351
40.762007000000004
-73.993812
40.750887
-73.99070999999998
40.752642
-73.993515
40.71722000000001
-74.01643100000003
40.778797
-73.977633
40.711215
-74.01615699999998
40.752644000000004
-73.975139
40.73012924194336
-73.98348236083984
40.777387
-73.981935
40.754257
-73.94216999999998
40.739585
-73.989263
40.756242
-73.967598
40.778234999999995
-73.95235500000003
40.739995
-74.00588
40.738147
-73.992192
40.75289
-73.98049
40.782203674316406
-73.97606658935547
0.0
0.0
40.761635999999996
-73.9

40.760619
-73.99941700000002
40.75874
-73.97018100000003
0.0
0.0
40.7486572265625
-73.9760971069336
40.712799072265625
-74.00961303710938
40.751695
-73.993438
40.768335
-73.98269300000003
40.75605
-73.968414
40.768611
-73.982384
40.769708
-73.96452099999998
40.761457
-73.971172
40.73787700000001
-74.00012199999998
40.768291999999995
-73.981628
40.778743
-73.985136
40.76488300000001
-73.975465
40.762453
-73.96515500000002
40.766524
-73.965132
40.758012
-73.975527
40.767507
-73.964382
0.0
0.0
0.0
0.0
40.734074
-73.993044
40.750192
-73.993037
40.771259
-73.950631
40.740747999999996
-74.007702
40.745403
-74.00223199999998
40.751832
-74.00115100000002
40.756645
-73.97415699999998
40.781982
-73.981324
40.771291999999995
-73.956462
40.771558
-73.96529
40.789206
-73.954499
40.73922
-74.002668
40.800937
-73.957803
40.739470000000004
-73.987032
40.750374
-73.98283599999998
40.770525
-73.991788
40.764058
-73.966881
40.776161
-73.94999200000002
40.763987
-73.955993
40.77234
-73.98226700000002
40.7

40.742965999999996
-73.992563
40.769745
-73.86381800000002
40.753172
-73.98943
40.729606
-73.998595
40.721538
-74.000262
40.758497
-73.965852
40.748422999999995
-73.988798
40.704547
-74.014453
40.750002
-73.99330400000002
40.746503999999995
-73.988527
40.785032
-73.977508
40.777282
-73.97835699999997
40.747203999999996
-73.94241099999998
40.755727
-73.98751700000003
40.77339600000001
-73.945984
40.738928
-73.983198
40.735568
-73.98994
40.757687
-74.00045899999998
40.735433
-73.989698
40.764625
-73.96153699999998
40.783227000000004
-73.959149
40.724003
-73.99212299999998
40.783464
-73.980468
40.767925
-73.99603
40.738592
-73.980335
40.74847800000001
-73.98871899999997
40.81371
-73.959952
40.768072
-73.957227
40.750937
-73.970635
40.720384
-73.99799499999997
40.71805
-73.957479
40.760838
-73.96096899999998
40.740105
-73.97616500000002
40.756091999999995
-73.967225
40.767117
-73.97969599999998
40.767402000000004
-73.98249
40.769872
-73.863423
40.756065
-73.976849
40.72111892700195
-74.000

-73.984724
40.740604
-73.998225
40.744796
-74.006167
40.74097442626953
-74.00798034667969
40.774085
-73.872874
40.778532
-73.981483
40.752435999999996
-73.975433
40.791313
-73.973733
40.76943588256836
-73.9628677368164
40.763642
-73.977938
40.733090000000004
-73.993037
40.779393
-73.95555
40.740018
-73.9822
40.738049
-73.992485
40.777345000000004
-73.942415
40.724082
-73.954107
40.747873999999996
-73.97268299999998
40.738027
-74.00816
40.74518966674805
-73.99468231201173
40.75037800000001
-73.987983
40.756316999999996
-73.982457
40.759831
-73.976449
40.760644
-73.958398
40.75155
-73.99278699999998
40.782249
-73.972059
40.707221999999994
-74.00414
40.738533000000004
-73.991936
40.755755
-73.974017
40.766109
-73.98554
40.730614
-74.009006
40.763237
-73.97378
40.746009
-73.979588
40.727513
-73.988552
40.71976
-73.998403
40.731118
-73.982372
40.843037
-73.94391999999998
40.723167
-73.989572
40.768868
-73.992062
40.802512
-73.96422700000002
40.716858
-74.014315
40.774135
-73.87299200000002


40.752572
-73.97296700000003
40.724687
-73.987505
40.729316
-73.98928000000002
40.7784
-73.9779
40.752162
-73.97999300000002
40.766822
-73.982843
40.777986999999996
-73.951538
40.763954999999996
-73.96929300000002
40.768051
-73.981176
40.76943
-73.98916
40.731654
-73.997433
40.768516999999996
-73.955573
40.737874
-73.99229100000002
40.75018
-73.99431700000002
40.744049
-73.99578000000002
40.733015
-73.996317
40.737939000000004
-73.992326
40.686792
-73.996086
40.762795000000004
-73.98224599999998
40.734355
-73.98343100000002
40.718727
-74.000075
40.782252
-73.94868199999998
40.751557
-73.978158
40.750847
-73.991321
40.756536
-73.997294
40.715275
-74.015353
40.776449
-73.94369
40.711040000000004
-74.00950999999998
40.729378000000004
-73.984037
40.771144
-73.86455
40.764342
-73.973523
40.768871999999995
-73.955217
40.764965000000004
-73.984342
40.764435
-73.982497
40.726114
-73.989559
40.791363
-73.974277
40.715022999999995
-74.00806800000002
40.800656
-73.965905
40.766647
-73.98673199999

40.75666
-73.966432
40.759862
-73.969385
40.755293
-73.97959399999998
40.785913
-73.97856800000002
40.731386
-73.988677
40.736759
-73.99727299999998
40.759840000000004
-73.9723
40.644687
-73.781932
40.730545
-74.00160699999998
40.758927
-73.98883000000002
40.741372
-73.98983299999998
40.63019
-73.928115
40.751651
-73.974621
40.76100540161133
-73.96701049804686
40.75185
-73.970979
40.636441999999995
-73.916392
40.713489
-73.958286
40.773775
-73.87106
40.7386
-73.99969499999997
40.75235
-74.000945
40.744395000000004
-73.980655
40.74159
-74.001347
40.761862
-73.96535
40.733375
-73.989908
40.763021
-73.981881
40.733092
-74.000303
40.751163
-73.980259
40.777535
-73.97869
40.769486
-73.95810999999998
40.774842
-73.962689
40.770179
-73.86466300000002
40.781172
-73.95444499999998
40.768807
-73.862785
40.764891999999996
-73.987998
40.7771
-73.964113
40.733112
-74.005981
40.738849
-73.987228
0.0
0.0
40.744575
-73.998931
40.772324
-73.963072
40.753754
-73.97711
40.734797
-73.983407
40.747065
-73.

40.775395
-73.961015
40.767025
-73.971377
40.773668
-73.870852
40.728334000000004
-74.001811
40.756046999999995
-73.982602
40.79903
-73.969222
40.762955
-73.985028
40.73502
-73.998727
40.755093
-73.96528
40.788455
-73.97658
40.7552
-73.9731
40.772452
-73.967177
40.760938
-73.971338
40.734619140625
-73.98606872558595
40.744721999999996
-74.00277
0.0
0.0
40.760934999999996
-73.973058
40.751197999999995
-73.978813
40.737569
-73.9839
40.724396999999996
-73.981711
40.712885
-74.01055
40.764146000000004
-73.973085
40.721738
-74.00541
40.774203
-73.963823
40.746576
-73.987627
40.80727005004883
-73.96460723876955
40.770897
-73.95365699999998
40.720157
-73.988405
40.775089263916016
-73.9627914428711
40.76441
-73.968707
40.75606
-73.986496
40.810947
-74.08775200000002
40.773934999999994
-73.954422
40.738517
-73.980343
40.75830078125
-73.97068786621094
40.72055200000001
-74.010103
40.720079999999996
-74.012059
40.735721999999996
-73.979547
40.711771
-74.008401
40.724973
-73.992243
40.757469
-73.9

40.779927
-73.98459
40.750405
-73.99103000000002
40.755573
-73.990972
40.771738
-73.959692
40.773784
-73.945865
40.754452
-73.983947
40.74455
-74.002738
40.791357
-73.951757
40.780262
-73.956855
40.70933400000001
-74.014723
40.772123
-73.960852
40.748208
-73.977735
40.72153854370117
-73.989013671875
40.729608
-73.989857
40.71318054199219
-73.9584732055664
40.739815
-73.987427
40.795348
-73.944062
40.765168
-73.98236700000002
40.770555
-73.866212
40.74389266967773
-74.00655364990234
40.736302
-74.00666
40.754845
-73.975544
40.77302
-73.946143
40.760746999999995
-73.984735
40.750913
-73.97591
40.807446999999996
-73.964455
40.771361999999996
-73.953397
40.767037
-73.95360500000002
40.761796999999994
-73.96821800000002
40.740111999999996
-73.995005
40.739461999999996
-73.993732
40.747795
-73.989111
0.0
0.0
40.726246
-73.986388
40.782181
-73.955844
40.78499
-73.949253
40.761885
-73.979925
40.76146
-73.957823
40.76200485229492
-73.9750747680664
40.66765
-73.993573
40.779286
-73.962183
40.774

40.738224
-73.983549
40.722439
-73.987277
40.757771999999996
-73.973512
40.767171999999995
-73.967995
40.748187
-73.984875
40.756568
-73.985241
40.783687
-73.950237
40.784427642822266
-73.9471664428711
40.646913
-73.790018
40.78284
-73.959474
40.72661
-73.996278
40.746155
-73.981798
40.752767
-73.97476
40.792347
-73.973585
40.770403
-73.865401
40.73838
-73.989573
40.780747
-73.98156999999998
40.735731
-73.992397
40.745487
-73.99826999999998
40.7215
-73.8444
40.773982000000004
-73.872912
40.756444
-73.991461
40.778535
-73.982112
40.762621
-73.981984
40.756271999999996
-73.98756800000002
40.75864791870117
-73.97001647949219
40.737148
-73.979098
40.760436999999996
-73.978952
40.742385
-73.984911
40.763009999999994
-73.977322
40.714089
-73.951335
40.778549
-73.975312
40.790931
-73.96531999999998
40.755421999999996
-73.975515
40.725584999999995
-73.996553
40.74171
-73.98039
40.754486
-73.969077
40.760462
-73.982383
40.755527
-73.998457
40.75019836425781
-73.99170684814453
40.694715
-73.9837

40.741338
-73.98128299999998
0.0
0.0
40.750532
-73.968572
40.789374
-73.98754
40.72652
-73.99426
40.744068
-74.00674000000002
40.750135
-73.983272
40.752731
-73.99296
40.773996999999994
-73.87474499999998
40.71029
-74.011213
40.806212
-73.942453
40.740313
-73.98612800000002
40.739112
-73.98293199999998
40.74288940429688
-73.98224639892578
40.716892
-74.00822099999998
40.794872
-73.94433599999998
40.75117
-73.980568
40.710327
-74.01125
40.784415
-73.947125
40.767376
-73.98461
40.745528
-73.982535
40.74526596069336
-73.99472045898438
40.743445
-74.007568
40.720062
-73.98834699999998
40.748896
-74.003
40.755006
-73.982075
40.727235
-73.85380500000002
40.750211
-73.99129
40.711847
-74.007162
40.786848
-73.97773000000002
40.80305
-73.967935
40.774535
-73.961487
40.769920349121094
-73.86347961425781
40.773765999999995
-73.957769
40.759287
-73.991764
40.715137
-74.007777
40.726045
-73.996112
40.756953
-73.975698
40.752555
-73.990323
40.75519
-73.965067
40.76558
-73.97502800000002
40.739067999

In [37]:
import math
"""
Your Code Here, set new character at df['distance_real']
"""
# 地球的赤道半徑 (公里)
EARTH_RADIUS = 6378.137

def rad(d):
    return d * math.pi / 180.0

def GetDistance(lat1, lng1, lat2, lng2):
    radLat1 = rad(lat1)
    radLat2 = rad(lat2)
    #print(radLat1.describe())
    a = radLat1 - radLat2
    b = rad(lng1) - rad(lng2)
    s = 2 * math.asin(math.sqrt(math.pow(math.sin(a/2),2) + math.cos(radLat1)*math.cos(radLat2)*math.pow(math.sin(b/2),2)))
    s = s * EARTH_RADIUS
    #s = math.round(s * 10000) / 10000
    return s

Distance_Real = []

for i in range(len( df)):
    t = df.loc[i, ["pickup_latitude","pickup_longitude", "dropoff_latitude", "dropoff_longitude"]]
    r = GetDistance(t["pickup_latitude"], t["pickup_longitude"], t["dropoff_latitude"], t["dropoff_longitude"])
    Distance_Real.append(r)

df['distance_real'] = Distance_Real
print(df['distance_real'].head())

# 觀察結果 
train_X = scaler.fit_transform(df)
print(f'Linear Reg Score : {cross_val_score(Linear, train_X, train_Y, cv=5).mean()}')
print(f'Gradient Boosting Reg Score : {cross_val_score(GDBT, train_X, train_Y, cv=5).mean()}')

0    0.8419
1    2.0359
2    0.8642
3    1.8779
4    3.6322
Name: distance_real, dtype: float64
Linear Reg Score : 0.3673179345860566
Gradient Boosting Reg Score : 0.805564554406844


# 作業2
* 試著只使用新特徵估計目標值(忽略原特徵)，效果跟作業1的結果比較起來效果如何?

In [35]:
train_X = scaler.fit_transform(df[['distance_real']])
print(f'Linear Reg Score : {cross_val_score(Linear, train_X, train_Y, cv=5).mean()}')
print(f'Gradient Boosting Reg Score : {cross_val_score(GDBT, train_X, train_Y, cv=5).mean()}')

Linear Reg Score : 0.0011536096142396256
Gradient Boosting Reg Score : 0.715704780543987
