# Predicting Sale Price from NYC Property Sale Data
## Berkeley SAAS CX Fall 2020 Kaggle Competition
## Ming Fong and Yifan Zhang


Helpful:

https://www.kaggle.com/sahilrider/learn-regression-nyc#4.-Modelling

In [116]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import linear_model
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [117]:
# Display all columns of Dataframes
pd.set_option('display.max_columns', None)

In [118]:
X_train = pd.read_csv("data/train_features.csv")
y_train = pd.read_csv("data/train_targets.csv")
X_test = pd.read_csv("data/test_features.csv")
# sample_submission = pd.read_csv("output/sample_submission.csv")

In [119]:
# X_train.set_index("id")
# y_train.set_index("id")
# X_test.set_index("id")
# X_train.head()

In [120]:
# merge sale price onto feature data
df = X_train.merge(right = y_train, on = "id", how = "outer")

# train_merged = X_train.merge(right = y_train, on = "id")

In [121]:
# drop training rows with 0 gross or land sqft
df = df.drop(df[df["GROSS SQUARE FEET"] == 0].index)
df = df.drop(df[df["LAND SQUARE FEET"] == 0].index)

In [122]:
df = pd.concat([df, X_test], axis=0, ignore_index = True)

In [123]:
df = df.set_index("id")

In [124]:
# "EASE-MENT" is an empty column, delete it
del df["EASE-MENT"]

# Delete date column
del df["SALE DATE"]

del df["LOT"]
del df["ADDRESS"]
del df["APARTMENT NUMBER"]

In [125]:
# Correlation between features

# corr = train_merged.corr()
# print(corr['SALE PRICE'].sort_values(ascending=False))
# sns.heatmap(corr)

In [126]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31209 entries, train0 to test9288
Data columns (total 19 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   BOROUGH                         31209 non-null  int64  
 1   NEIGHBORHOOD                    31209 non-null  object 
 2   BUILDING CLASS CATEGORY         31209 non-null  object 
 3   TAX CLASS AT PRESENT            31209 non-null  object 
 4   BLOCK                           31209 non-null  int64  
 5   BUILDING CLASS AT PRESENT       31209 non-null  object 
 6   ZIP CODE                        31209 non-null  int64  
 7   RESIDENTIAL UNITS               31209 non-null  int64  
 8   COMMERCIAL UNITS                31209 non-null  int64  
 9   TOTAL UNITS                     31209 non-null  int64  
 10  LAND SQUARE FEET                31209 non-null  float64
 11  GROSS SQUARE FEET               31209 non-null  float64
 12  YEAR BUILT                   

In [127]:
categorical_features = ["BOROUGH", "ZIP CODE", "BUILDING CLASS CATEGORY", "TAX CLASS AT TIME OF SALE", "BUILDING CLASS AT TIME OF SALE"]

del df["BUILDING CLASS AT PRESENT"]
del df["TAX CLASS AT PRESENT"]
# del df["BUILDING CLASS AT TIME OF SALE"]

# Too many features for location
del df["BLOCK"]
del df["NEIGHBORHOOD"]
# del df["ZIP CODE"]

In [128]:
# One-hot encoding
df = pd.get_dummies(df, columns = categorical_features)
df.info(verbose = True, memory_usage = True, null_counts = True)

             31209 non-null  uint8  
 180 ZIP CODE_11428                                                        31209 non-null  uint8  
 181 ZIP CODE_11429                                                        31209 non-null  uint8  
 182 ZIP CODE_11432                                                        31209 non-null  uint8  
 183 ZIP CODE_11433                                                        31209 non-null  uint8  
 184 ZIP CODE_11434                                                        31209 non-null  uint8  
 185 ZIP CODE_11435                                                        31209 non-null  uint8  
 186 ZIP CODE_11436                                                        31209 non-null  uint8  
 187 ZIP CODE_11691                                                        31209 non-null  uint8  
 188 ZIP CODE_11692                                                        31209 non-null  uint8  
 189 ZIP CODE_11693                                                     

# Regression

In [129]:
test_row_count = X_test.shape[0]
test_row_count

9289

In [130]:
df[:(df.shape[0] - test_row_count)].drop("SALE PRICE", 1)

Unnamed: 0_level_0,RESIDENTIAL UNITS,COMMERCIAL UNITS,TOTAL UNITS,LAND SQUARE FEET,GROSS SQUARE FEET,YEAR BUILT,MEDIAN INCOME,MEAN INCOME,POPULATION,BOROUGH_1,BOROUGH_2,BOROUGH_3,BOROUGH_4,BOROUGH_5,ZIP CODE_10001,ZIP CODE_10002,ZIP CODE_10003,ZIP CODE_10004,ZIP CODE_10005,ZIP CODE_10006,ZIP CODE_10007,ZIP CODE_10009,ZIP CODE_10010,ZIP CODE_10011,ZIP CODE_10012,ZIP CODE_10013,ZIP CODE_10014,ZIP CODE_10016,ZIP CODE_10017,ZIP CODE_10018,ZIP CODE_10019,ZIP CODE_10021,ZIP CODE_10022,ZIP CODE_10023,ZIP CODE_10024,ZIP CODE_10025,ZIP CODE_10026,ZIP CODE_10027,ZIP CODE_10028,ZIP CODE_10029,ZIP CODE_10030,ZIP CODE_10031,ZIP CODE_10032,ZIP CODE_10033,ZIP CODE_10034,ZIP CODE_10035,ZIP CODE_10036,ZIP CODE_10037,ZIP CODE_10038,ZIP CODE_10039,ZIP CODE_10040,ZIP CODE_10044,ZIP CODE_10065,ZIP CODE_10075,ZIP CODE_10128,ZIP CODE_10301,ZIP CODE_10302,ZIP CODE_10303,ZIP CODE_10304,ZIP CODE_10305,ZIP CODE_10306,ZIP CODE_10307,ZIP CODE_10308,ZIP CODE_10309,ZIP CODE_10310,ZIP CODE_10312,ZIP CODE_10314,ZIP CODE_10451,ZIP CODE_10452,ZIP CODE_10453,ZIP CODE_10454,ZIP CODE_10455,ZIP CODE_10456,ZIP CODE_10457,ZIP CODE_10458,ZIP CODE_10459,ZIP CODE_10460,ZIP CODE_10461,ZIP CODE_10462,ZIP CODE_10463,ZIP CODE_10464,ZIP CODE_10465,ZIP CODE_10466,ZIP CODE_10467,ZIP CODE_10468,ZIP CODE_10469,ZIP CODE_10470,ZIP CODE_10471,ZIP CODE_10472,ZIP CODE_10473,ZIP CODE_10474,ZIP CODE_10475,ZIP CODE_10803,ZIP CODE_11001,ZIP CODE_11004,ZIP CODE_11005,ZIP CODE_11040,ZIP CODE_11101,ZIP CODE_11102,ZIP CODE_11103,ZIP CODE_11104,ZIP CODE_11105,ZIP CODE_11106,ZIP CODE_11201,ZIP CODE_11203,ZIP CODE_11204,ZIP CODE_11205,ZIP CODE_11206,ZIP CODE_11207,ZIP CODE_11208,ZIP CODE_11209,ZIP CODE_11210,ZIP CODE_11211,ZIP CODE_11212,ZIP CODE_11213,ZIP CODE_11214,ZIP CODE_11215,ZIP CODE_11216,ZIP CODE_11217,ZIP CODE_11218,ZIP CODE_11219,ZIP CODE_11220,ZIP CODE_11221,ZIP CODE_11222,ZIP CODE_11223,ZIP CODE_11224,ZIP CODE_11225,ZIP CODE_11226,ZIP CODE_11228,ZIP CODE_11229,ZIP CODE_11230,ZIP CODE_11231,ZIP CODE_11232,ZIP CODE_11233,ZIP CODE_11234,ZIP CODE_11235,ZIP CODE_11236,ZIP CODE_11237,ZIP CODE_11238,ZIP CODE_11239,ZIP CODE_11354,ZIP CODE_11355,ZIP CODE_11356,ZIP CODE_11357,ZIP CODE_11358,ZIP CODE_11360,ZIP CODE_11361,ZIP CODE_11362,ZIP CODE_11363,ZIP CODE_11364,ZIP CODE_11365,ZIP CODE_11366,ZIP CODE_11367,ZIP CODE_11368,ZIP CODE_11369,ZIP CODE_11370,ZIP CODE_11372,ZIP CODE_11373,ZIP CODE_11374,ZIP CODE_11375,ZIP CODE_11377,ZIP CODE_11378,ZIP CODE_11379,ZIP CODE_11385,ZIP CODE_11411,ZIP CODE_11412,ZIP CODE_11413,ZIP CODE_11414,ZIP CODE_11415,ZIP CODE_11416,ZIP CODE_11417,ZIP CODE_11418,ZIP CODE_11419,ZIP CODE_11420,ZIP CODE_11421,ZIP CODE_11422,ZIP CODE_11423,ZIP CODE_11426,ZIP CODE_11427,ZIP CODE_11428,ZIP CODE_11429,ZIP CODE_11432,ZIP CODE_11433,ZIP CODE_11434,ZIP CODE_11435,ZIP CODE_11436,ZIP CODE_11691,ZIP CODE_11692,ZIP CODE_11693,ZIP CODE_11694,BUILDING CLASS CATEGORY_01 ONE FAMILY DWELLINGS,BUILDING CLASS CATEGORY_02 TWO FAMILY DWELLINGS,BUILDING CLASS CATEGORY_03 THREE FAMILY DWELLINGS,BUILDING CLASS CATEGORY_04 TAX CLASS 1 CONDOS,BUILDING CLASS CATEGORY_05 TAX CLASS 1 VACANT LAND,BUILDING CLASS CATEGORY_06 TAX CLASS 1 - OTHER,BUILDING CLASS CATEGORY_07 RENTALS - WALKUP APARTMENTS,BUILDING CLASS CATEGORY_08 RENTALS - ELEVATOR APARTMENTS,BUILDING CLASS CATEGORY_09 COOPS - WALKUP APARTMENTS,BUILDING CLASS CATEGORY_10 COOPS - ELEVATOR APARTMENTS,BUILDING CLASS CATEGORY_11 SPECIAL CONDO BILLING LOTS,BUILDING CLASS CATEGORY_11A CONDO-RENTALS,BUILDING CLASS CATEGORY_12 CONDOS - WALKUP APARTMENTS,BUILDING CLASS CATEGORY_13 CONDOS - ELEVATOR APARTMENTS,BUILDING CLASS CATEGORY_14 RENTALS - 4-10 UNIT,BUILDING CLASS CATEGORY_15 CONDOS - 2-10 UNIT RESIDENTIAL,BUILDING CLASS CATEGORY_16 CONDOS - 2-10 UNIT WITH COMMERCIAL UNIT,BUILDING CLASS CATEGORY_17 CONDO COOPS,BUILDING CLASS CATEGORY_21 OFFICE BUILDINGS,BUILDING CLASS CATEGORY_22 STORE BUILDINGS,BUILDING CLASS CATEGORY_23 LOFT BUILDINGS,BUILDING CLASS CATEGORY_25 LUXURY HOTELS,BUILDING CLASS CATEGORY_26 OTHER HOTELS,BUILDING CLASS CATEGORY_27 FACTORIES,BUILDING CLASS CATEGORY_29 COMMERCIAL GARAGES,BUILDING CLASS CATEGORY_30 WAREHOUSES,BUILDING CLASS CATEGORY_31 COMMERCIAL VACANT LAND,BUILDING CLASS CATEGORY_32 HOSPITAL AND HEALTH FACILITIES,BUILDING CLASS CATEGORY_33 EDUCATIONAL FACILITIES,BUILDING CLASS CATEGORY_34 THEATRES,BUILDING CLASS CATEGORY_35 INDOOR PUBLIC AND CULTURAL FACILITIES,BUILDING CLASS CATEGORY_36 OUTDOOR RECREATIONAL FACILITIES,BUILDING CLASS CATEGORY_37 RELIGIOUS FACILITIES,BUILDING CLASS CATEGORY_38 ASYLUMS AND HOMES,BUILDING CLASS CATEGORY_40 SELECTED GOVERNMENTAL FACILITIES,BUILDING CLASS CATEGORY_41 TAX CLASS 4 - OTHER,BUILDING CLASS CATEGORY_42 CONDO CULTURAL/MEDICAL/EDUCATIONAL/ETC,BUILDING CLASS CATEGORY_43 CONDO OFFICE BUILDINGS,BUILDING CLASS CATEGORY_44 CONDO PARKING,BUILDING CLASS CATEGORY_46 CONDO STORE BUILDINGS,BUILDING CLASS CATEGORY_47 CONDO NON-BUSINESS STORAGE,BUILDING CLASS CATEGORY_48 CONDO TERRACES/GARDENS/CABANAS,TAX CLASS AT TIME OF SALE_1,TAX CLASS AT TIME OF SALE_2,TAX CLASS AT TIME OF SALE_4,BUILDING CLASS AT TIME OF SALE_A0,BUILDING CLASS AT TIME OF SALE_A1,BUILDING CLASS AT TIME OF SALE_A2,BUILDING CLASS AT TIME OF SALE_A3,BUILDING CLASS AT TIME OF SALE_A4,BUILDING CLASS AT TIME OF SALE_A5,BUILDING CLASS AT TIME OF SALE_A6,BUILDING CLASS AT TIME OF SALE_A7,BUILDING CLASS AT TIME OF SALE_A9,BUILDING CLASS AT TIME OF SALE_B1,BUILDING CLASS AT TIME OF SALE_B2,BUILDING CLASS AT TIME OF SALE_B3,BUILDING CLASS AT TIME OF SALE_B9,BUILDING CLASS AT TIME OF SALE_C0,BUILDING CLASS AT TIME OF SALE_C1,BUILDING CLASS AT TIME OF SALE_C2,BUILDING CLASS AT TIME OF SALE_C3,BUILDING CLASS AT TIME OF SALE_C4,BUILDING CLASS AT TIME OF SALE_C5,BUILDING CLASS AT TIME OF SALE_C6,BUILDING CLASS AT TIME OF SALE_C7,BUILDING CLASS AT TIME OF SALE_C9,BUILDING CLASS AT TIME OF SALE_CM,BUILDING CLASS AT TIME OF SALE_D1,BUILDING CLASS AT TIME OF SALE_D2,BUILDING CLASS AT TIME OF SALE_D3,BUILDING CLASS AT TIME OF SALE_D4,BUILDING CLASS AT TIME OF SALE_D5,BUILDING CLASS AT TIME OF SALE_D6,BUILDING CLASS AT TIME OF SALE_D7,BUILDING CLASS AT TIME OF SALE_D9,BUILDING CLASS AT TIME OF SALE_E1,BUILDING CLASS AT TIME OF SALE_E2,BUILDING CLASS AT TIME OF SALE_E7,BUILDING CLASS AT TIME OF SALE_E9,BUILDING CLASS AT TIME OF SALE_F1,BUILDING CLASS AT TIME OF SALE_F2,BUILDING CLASS AT TIME OF SALE_F4,BUILDING CLASS AT TIME OF SALE_F5,BUILDING CLASS AT TIME OF SALE_F9,BUILDING CLASS AT TIME OF SALE_G0,BUILDING CLASS AT TIME OF SALE_G1,BUILDING CLASS AT TIME OF SALE_G2,BUILDING CLASS AT TIME OF SALE_G4,BUILDING CLASS AT TIME OF SALE_G5,BUILDING CLASS AT TIME OF SALE_G6,BUILDING CLASS AT TIME OF SALE_G7,BUILDING CLASS AT TIME OF SALE_G8,BUILDING CLASS AT TIME OF SALE_G9,BUILDING CLASS AT TIME OF SALE_GU,BUILDING CLASS AT TIME OF SALE_GW,BUILDING CLASS AT TIME OF SALE_H1,BUILDING CLASS AT TIME OF SALE_H2,BUILDING CLASS AT TIME OF SALE_H3,BUILDING CLASS AT TIME OF SALE_H4,BUILDING CLASS AT TIME OF SALE_H6,BUILDING CLASS AT TIME OF SALE_H8,BUILDING CLASS AT TIME OF SALE_H9,BUILDING CLASS AT TIME OF SALE_HB,BUILDING CLASS AT TIME OF SALE_HH,BUILDING CLASS AT TIME OF SALE_HR,BUILDING CLASS AT TIME OF SALE_HS,BUILDING CLASS AT TIME OF SALE_I1,BUILDING CLASS AT TIME OF SALE_I3,BUILDING CLASS AT TIME OF SALE_I4,BUILDING CLASS AT TIME OF SALE_I5,BUILDING CLASS AT TIME OF SALE_I6,BUILDING CLASS AT TIME OF SALE_I7,BUILDING CLASS AT TIME OF SALE_I9,BUILDING CLASS AT TIME OF SALE_J1,BUILDING CLASS AT TIME OF SALE_J4,BUILDING CLASS AT TIME OF SALE_J8,BUILDING CLASS AT TIME OF SALE_J9,BUILDING CLASS AT TIME OF SALE_K1,BUILDING CLASS AT TIME OF SALE_K2,BUILDING CLASS AT TIME OF SALE_K3,BUILDING CLASS AT TIME OF SALE_K4,BUILDING CLASS AT TIME OF SALE_K5,BUILDING CLASS AT TIME OF SALE_K6,BUILDING CLASS AT TIME OF SALE_K7,BUILDING CLASS AT TIME OF SALE_K8,BUILDING CLASS AT TIME OF SALE_K9,BUILDING CLASS AT TIME OF SALE_L1,BUILDING CLASS AT TIME OF SALE_L3,BUILDING CLASS AT TIME OF SALE_L8,BUILDING CLASS AT TIME OF SALE_L9,BUILDING CLASS AT TIME OF SALE_M1,BUILDING CLASS AT TIME OF SALE_M2,BUILDING CLASS AT TIME OF SALE_M3,BUILDING CLASS AT TIME OF SALE_M4,BUILDING CLASS AT TIME OF SALE_M9,BUILDING CLASS AT TIME OF SALE_N2,BUILDING CLASS AT TIME OF SALE_N9,BUILDING CLASS AT TIME OF SALE_O1,BUILDING CLASS AT TIME OF SALE_O2,BUILDING CLASS AT TIME OF SALE_O3,BUILDING CLASS AT TIME OF SALE_O4,BUILDING CLASS AT TIME OF SALE_O5,BUILDING CLASS AT TIME OF SALE_O6,BUILDING CLASS AT TIME OF SALE_O7,BUILDING CLASS AT TIME OF SALE_O8,BUILDING CLASS AT TIME OF SALE_O9,BUILDING CLASS AT TIME OF SALE_P2,BUILDING CLASS AT TIME OF SALE_P5,BUILDING CLASS AT TIME OF SALE_P6,BUILDING CLASS AT TIME OF SALE_P8,BUILDING CLASS AT TIME OF SALE_P9,BUILDING CLASS AT TIME OF SALE_Q8,BUILDING CLASS AT TIME OF SALE_R0,BUILDING CLASS AT TIME OF SALE_R1,BUILDING CLASS AT TIME OF SALE_R2,BUILDING CLASS AT TIME OF SALE_R3,BUILDING CLASS AT TIME OF SALE_R4,BUILDING CLASS AT TIME OF SALE_R6,BUILDING CLASS AT TIME OF SALE_R8,BUILDING CLASS AT TIME OF SALE_R9,BUILDING CLASS AT TIME OF SALE_RA,BUILDING CLASS AT TIME OF SALE_RB,BUILDING CLASS AT TIME OF SALE_RG,BUILDING CLASS AT TIME OF SALE_RK,BUILDING CLASS AT TIME OF SALE_RP,BUILDING CLASS AT TIME OF SALE_RR,BUILDING CLASS AT TIME OF SALE_RS,BUILDING CLASS AT TIME OF SALE_RT,BUILDING CLASS AT TIME OF SALE_S0,BUILDING CLASS AT TIME OF SALE_S1,BUILDING CLASS AT TIME OF SALE_S2,BUILDING CLASS AT TIME OF SALE_S3,BUILDING CLASS AT TIME OF SALE_S4,BUILDING CLASS AT TIME OF SALE_S5,BUILDING CLASS AT TIME OF SALE_S9,BUILDING CLASS AT TIME OF SALE_V0,BUILDING CLASS AT TIME OF SALE_V1,BUILDING CLASS AT TIME OF SALE_V2,BUILDING CLASS AT TIME OF SALE_V9,BUILDING CLASS AT TIME OF SALE_W1,BUILDING CLASS AT TIME OF SALE_W2,BUILDING CLASS AT TIME OF SALE_W3,BUILDING CLASS AT TIME OF SALE_W4,BUILDING CLASS AT TIME OF SALE_W8,BUILDING CLASS AT TIME OF SALE_W9,BUILDING CLASS AT TIME OF SALE_Y1,BUILDING CLASS AT TIME OF SALE_Y3,BUILDING CLASS AT TIME OF SALE_Z0,BUILDING CLASS AT TIME OF SALE_Z9
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1
train0,1,0,1,2760.0,1056.0,1925,48469.7389,57368.0989,68662,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train1,1,0,1,4317.0,2424.0,1960,67952.9473,77076.9976,81033,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train2,1,0,1,1975.0,1460.0,1915,55000.8962,69161.9360,67349,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train3,5,0,5,2200.0,7040.0,1930,61468.5107,79249.3305,49085,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train4,2,0,2,2136.0,1272.0,1935,48469.7389,57368.0989,68662,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
train27857,2,0,2,1710.0,1944.0,1960,34729.0604,44634.0699,87324,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train27858,0,1,1,4950.0,4900.0,1958,49978.8076,60262.6681,87015,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train27860,1,0,1,4000.0,1768.0,1920,55200.2725,73658.3837,40169,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
train27861,1,0,1,2000.0,1036.0,1925,67952.9473,77076.9976,81033,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [131]:
# Split train and test data
from sklearn.model_selection import train_test_split

y = df["SALE PRICE"][:(df.shape[0] - test_row_count)]
X = df[:(df.shape[0] - test_row_count)].drop("SALE PRICE", 1)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

In [132]:
from sklearn.metrics import mean_squared_error
# Root mean squared error
def rmse(y_test,y_pred):
      return np.sqrt(mean_squared_error(y_test,y_pred))

In [133]:
# linear regression
linreg = linear_model.LinearRegression()
linreg.fit(X_train, y_train)
y_pred_lin = linreg.predict(X_valid)
rmse(y_valid, y_pred_lin)

16597434.487994341

In [134]:
linear_submission = pd.DataFrame(linreg.predict(df[(df.shape[0] - test_row_count):].drop("SALE PRICE", 1)))
linear_submission.index = X_test["id"]
linear_submission.columns = ["SALE PRICE"]
linear_submission.to_csv("output/linear_submission.csv")
linear_submission.head()

Unnamed: 0_level_0,SALE PRICE
id,Unnamed: 1_level_1
test0,13817220.0
test1,-7227431.0
test2,321974.3
test3,-6935860.0
test4,-853137.8


In [135]:
# Lasso regression
alpha = 12
lasso_regr = linear_model.Lasso(alpha = alpha, max_iter = 500)
lasso_regr.fit(X_train, y_train)
Y_pred_lasso = lasso_regr.predict(X_valid)
rmse(y_valid,Y_pred_lasso)

16590548.748000665

In [136]:
lasso_submission = pd.DataFrame(lasso_regr.predict(df[(df.shape[0] - test_row_count):].drop("SALE PRICE", 1)))
lasso_submission.index = X_test["id"]
lasso_submission.columns = ["SALE PRICE"]
# lasso_submission[lasso_submission["SALE PRICE"] < 1000] = y_train.median()       # hard code set < 1000 to median
lasso_submission.to_csv("output/lasso_submission.csv")
lasso_submission.head()

Unnamed: 0_level_0,SALE PRICE
id,Unnamed: 1_level_1
test0,13833070.0
test1,1410426.0
test2,326645.3
test3,1697883.0
test4,7809926.0


In [137]:
# Ridge regression
ridge = linear_model.Ridge(alpha = 0.01, normalize = True)
ridge.fit(X_train, y_train)
Y_pred_ridge = ridge.predict(X_valid)
rmse(y_valid, Y_pred_ridge)

16527272.936116837

In [138]:
ridge_submission = pd.DataFrame(ridge.predict(df[(df.shape[0] - test_row_count):].drop("SALE PRICE", 1)))
ridge_submission.index = X_test["id"]
ridge_submission.columns = ["SALE PRICE"]
ridge_submission.to_csv("output/ridge_submission.csv")
ridge_submission.head()

Unnamed: 0_level_0,SALE PRICE
id,Unnamed: 1_level_1
test0,13634520.0
test1,1129067.0
test2,334909.5
test3,1274455.0
test4,7513601.0


In [139]:
from sklearn.ensemble import RandomForestRegressor

In [140]:
# Random forest
rf_regr = RandomForestRegressor()
rf_regr.fit(X_train, y_train)
Y_pred_rf = rf_regr.predict(X_valid)
rmse(y_valid, Y_pred_rf)

16398086.072808791

In [141]:
rf_submission = pd.DataFrame(rf_regr.predict(df[(df.shape[0] - test_row_count):].drop("SALE PRICE", 1)))
rf_submission.index = X_test["id"]
rf_submission.columns = ["SALE PRICE"]
rf_submission.to_csv("output/rf_submission.csv")
rf_submission.head()

Unnamed: 0_level_0,SALE PRICE
id,Unnamed: 1_level_1
test0,9233541.0
test1,825590.7
test2,540474.6
test3,1703580.0
test4,2653210.0


## Bad Linear Regression

In [85]:
# X = X_train[["LAND SQUARE FEET", "GROSS SQUARE FEET", "MEDIAN INCOME", "MEAN INCOME"]]
# Y = y_train["SALE PRICE"]

In [86]:
# model = linear_model.LinearRegression()
# model.fit(X,Y)
# print('Intercept: \n', model.intercept_)
# print('Coefficients: \n', model.coef_)

In [87]:
# x_test_predictions = pd.DataFrame(model.predict(X_test[["LAND SQUARE FEET", "GROSS SQUARE FEET", "MEDIAN INCOME", "MEAN INCOME"]]))
# x_test_predictions.index = X_test["id"]
# x_test_predictions.columns = ["SALE PRICE"]
# x_test_predictions.to_csv("output/submission.csv")
# x_test_predictions.head()

# Submit to Kaggle
Run this command to send a submission to the Kaggle competition

In [112]:
# !kaggle competitions submit -c saas-2020-fall-cx-kaggle-compeition -f output/lasso_submission.csv -m "lasso remove zero gross sqft and land sqft"

Successfully submitted to SAAS 2020 Fall CX Kaggle Compeition

  0%|          | 0.00/257k [00:00<?, ?B/s]
  3%|▎         | 8.00k/257k [00:00<00:03, 78.8kB/s]
 34%|███▍      | 88.0k/257k [00:00<00:01, 107kB/s] 
 44%|████▎     | 112k/257k [00:00<00:01, 102kB/s] 
 53%|█████▎    | 136k/257k [00:00<00:01, 99.4kB/s]
 59%|█████▉    | 152k/257k [00:00<00:01, 97.9kB/s]
 65%|██████▌   | 168k/257k [00:01<00:00, 94.7kB/s]
 72%|███████▏  | 184k/257k [00:01<00:00, 92.9kB/s]
 78%|███████▊  | 200k/257k [00:01<00:00, 94.5kB/s]
 84%|████████▍ | 216k/257k [00:01<00:00, 91.3kB/s]
 90%|█████████ | 232k/257k [00:01<00:00, 93.3kB/s]
 97%|█████████▋| 248k/257k [00:02<00:00, 91.6kB/s]
100%|██████████| 257k/257k [00:05<00:00, 52.4kB/s]
