In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Модели и обработка
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_log_error # Используем метрику, близкую к Kaggle (RMSLE -> MSLE)
from sklearn.model_selection import train_test_split # Для оценки на отложенной выборке


In [19]:
# Настройки для визуализации
#%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
pd.set_option('display.max_columns', None) # Показывать все колонки

In [20]:
# Загрузка данных
train_df = pd.read_csv('train_hw.csv', sep=',')
test_df = pd.read_csv('test_hw.csv', sep=',')

In [21]:
print(f"Размер трейна: {train_df.shape}")
print(f"Размер теста: {test_df.shape}")

Размер трейна: (1168, 81)
Размер теста: (292, 80)


In [56]:
# Сохраним ID для финального файла предсказаний
train_ids = train_df['Id']
test_ids = test_df['Id']

In [16]:
train_df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1,190,RL,75.0,10382,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,2fmCon,SLvl,6,5,1958,1958,Hip,CompShg,HdBoard,HdBoard,BrkFace,105.0,TA,Fa,CBlock,TA,TA,Gd,ALQ,513,Unf,0,75,588,GasA,TA,Y,SBrkr,1095,0,0,1095,1,0,1,0,2,1,TA,6,Typ,0,,Attchd,1958.0,RFn,1,264,TA,TA,Y,0,0,0,0,0,0,,,,0,3,2006,ConLD,Normal
1,2,20,RL,,10708,Pave,,IR1,Lvl,AllPub,Inside,Gtl,ClearCr,Norm,Norm,1Fam,1Story,5,5,1955,1993,Hip,CompShg,Wd Sdng,Wd Sdng,,0.0,Gd,TA,CBlock,TA,TA,No,LwQ,379,BLQ,768,470,1617,GasA,Ex,Y,FuseA,1867,0,0,1867,1,0,1,0,2,1,TA,7,Typ,3,Gd,Attchd,1955.0,Fin,1,303,TA,TA,Y,476,0,0,0,142,0,,GdWo,,0,11,2009,COD,Normal
2,3,60,RL,,24682,Pave,,IR3,Lvl,AllPub,CulDSac,Gtl,Gilbert,RRAn,Norm,1Fam,2Story,6,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,841,841,GasA,Ex,Y,SBrkr,892,783,0,1675,0,0,2,1,3,1,TA,7,Typ,1,TA,BuiltIn,1999.0,Fin,2,502,TA,TA,Y,0,103,0,0,0,0,,,,0,6,2009,WD,Normal
3,4,160,RM,42.0,3964,Pave,,Reg,Lvl,AllPub,Inside,Gtl,MeadowV,Norm,Norm,TwnhsE,2Story,6,4,1973,1973,Gable,CompShg,CemntBd,CmentBd,,0.0,TA,TA,CBlock,Gd,TA,No,ALQ,837,Unf,0,105,942,GasA,Gd,Y,SBrkr,1291,1230,0,2521,1,0,2,1,5,1,TA,10,Maj1,1,Gd,Attchd,1973.0,Fin,2,576,TA,TA,Y,728,20,0,0,0,0,,GdPrv,,0,6,2006,WD,Normal
4,5,20,RL,75.0,10125,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,1Story,6,6,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,TA,TA,CBlock,TA,TA,No,ALQ,641,LwQ,279,276,1196,GasA,TA,Y,SBrkr,1279,0,0,1279,0,1,2,0,3,1,TA,6,Typ,2,Fa,Detchd,1980.0,Unf,2,473,TA,TA,Y,238,83,0,0,0,0,,MnPrv,,0,2,2008,WD,Normal


In [22]:
# Логарифмируем целевую переменную (SalePrice) - ВАЖНЫЙ ШАГ!
# Используем log1p для обработки возможных нулей (хотя в ценах их нет)
train_df['SalePrice'] = np.log1p(train_df['SalePrice'])
y_train_log = train_df['SalePrice']
train_features = train_df.drop('SalePrice', axis=1)
test_features = test_df.copy()

In [23]:
# Объединим трейн и тест для одинаковой обработки признаков
all_features = pd.concat((train_features, test_features)).reset_index(drop=True)
print(f"Размер объединенных данных: {all_features.shape}")


Размер объединенных данных: (1460, 80)


In [25]:

# Определяем типы колонок
numeric_cols = all_features.select_dtypes(include=np.number).columns.tolist()
categorical_cols = all_features.select_dtypes(include='object').columns.tolist()
categorical_cols

['MSZoning',
 'Street',
 'Alley',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'SaleType',
 'SaleCondition']

In [26]:
numeric_cols

['Id',
 'MSSubClass',
 'LotFrontage',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageYrBlt',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'MiscVal',
 'MoSold',
 'YrSold']

In [27]:

# Для некоторых категориальных фичей NaN имеет смысл 'None' (нет гаража, подвала и т.д.)
cols_fill_none = [
    'PoolQC', 'MiscFeature', 'Alley', 'Fence', 'MasVnrType', 'FireplaceQu',
    'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond',
    'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2'
]
for col in cols_fill_none:
    if col in all_features.columns:
        all_features[col] = all_features[col].fillna('None')


In [None]:
# Оставшиеся категориальные пропуски заполняем модой
for col in categorical_cols:
    if all_features[col].isnull().any():
        mode_val = all_features[col].mode()[0]
        all_features[col] = all_features[col].fillna(mode_val)
        print(f"   - Категориальный столбец '{col}' заполнен модой ('{mode_val}')")


   - Категориальный столбец 'Electrical' заполнен модой ('SBrkr')


In [29]:
# Числовые пропуски заполняем медианой (более устойчива к выбросам)
for col in numeric_cols:
    if all_features[col].isnull().any():
        median_val = all_features[col].median()
        all_features[col] = all_features[col].fillna(median_val)
        print(f"   - Числовой столбец '{col}' заполнен медианой ({median_val:.2f})")


   - Числовой столбец 'LotFrontage' заполнен медианой (69.00)
   - Числовой столбец 'MasVnrArea' заполнен медианой (0.00)
   - Числовой столбец 'GarageYrBlt' заполнен медианой (1980.00)


In [30]:
print(f"\nПропусков после заполнения: {all_features.isnull().sum().sum()}")


Пропусков после заполнения: 0


In [34]:
# Логарифмирование числовых признаков с высокой асимметрией
# Это помогает сделать их распределение более нормальным, что полезно для линейных моделей
skewness = all_features[numeric_cols].apply(lambda x: x.skew()).sort_values(ascending=False)
high_skew = skewness[abs(skewness) > 0.75] # Порог можно подбирать
skewed_features = high_skew.index
print(f"   - Найдено {len(skewed_features)} асимметричных признаков для логарифмирования.")

   - Найдено 12 асимметричных признаков для логарифмирования.


In [35]:
for col in skewed_features:
    all_features[col] = np.log1p(all_features[col])

In [None]:
# Кодирование категориальных признаков (One-Hot Encoding)
all_features_encoded = pd.get_dummies(all_features, columns=categorical_cols, drop_first=True, dtype=int) # Используем dtype=int для экономии памяти
print(f"   - Размер данных после OHE: {all_features_encoded.shape}")




2.3 Кодирование категориальных признаков (One-Hot Encoding)...
   - Размер данных после OHE: (1460, 260)


In [37]:
X_train_processed = all_features_encoded[:len(train_df)]
X_test_processed = all_features_encoded[len(train_df):]

print(f"\nРазмер обработанного трейна: {X_train_processed.shape}")
print(f"Размер обработанного теста: {X_test_processed.shape}")



Размер обработанного трейна: (1168, 260)
Размер обработанного теста: (292, 260)


In [38]:
# StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_processed)
X_test_scaled = scaler.transform(X_test_processed) # Применяем тот же scaler к тесту

In [39]:
# Преобразуем обратно в DataFrame для удобства (хотя для моделей это не обязательно)
# Имена колонок теряются после scaler, создаем новые или используем старые
X_train_scaled_df = pd.DataFrame(X_train_scaled, index=X_train_processed.index, columns=X_train_processed.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, index=X_test_processed.index, columns=X_test_processed.columns)

In [46]:
X_train_scaled_df.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Pave,Alley_None,Alley_Pave,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,Utilities_NoSeWa,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_WdShake,RoofMatl_WdShngl,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,MasVnrType_BrkFace,MasVnrType_None,MasVnrType_Stone,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,BsmtQual_Fa,BsmtQual_Gd,BsmtQual_None,BsmtQual_TA,BsmtCond_Gd,BsmtCond_None,BsmtCond_Po,BsmtCond_TA,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_None,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_None,BsmtFinType1_Rec,BsmtFinType1_Unf,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_None,BsmtFinType2_Rec,BsmtFinType2_Unf,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,CentralAir_Y,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,KitchenQual_Fa,KitchenQual_Gd,KitchenQual_TA,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_None,FireplaceQu_Po,FireplaceQu_TA,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_None,GarageFinish_None,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Fa,GarageQual_Gd,GarageQual_None,GarageQual_Po,GarageQual_TA,GarageCond_Fa,GarageCond_Gd,GarageCond_None,GarageCond_Po,GarageCond_TA,PavedDrive_P,PavedDrive_Y,PoolQC_Fa,PoolQC_Gd,PoolQC_None,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_None,MiscFeature_None,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,-1.730569,-1.092671,0.130859,-0.050488,-0.784733,0.414663,-0.060571,-0.721184,0.838219,0.827618,-0.354424,0.027123,0.143065,-0.31724,-0.839023,-0.135024,-1.051651,1.10532,-0.238149,-1.002515,-0.758692,0.20773,-0.198015,-0.922638,-0.958183,-0.239496,0.295763,1.967848,-0.930014,-1.089403,-0.409035,-0.125051,-0.292162,-0.077645,-0.192872,0.619846,0.900748,-0.220168,-0.114059,0.523181,-0.416976,0.071858,0.254405,-0.170514,-0.16512,-0.071858,-1.312483,-0.197853,-0.156721,0.350823,-0.029273,-0.246686,-0.188311,-0.058621,0.623557,-0.22441,-0.088121,-0.041416,-0.092928,-0.20476,-0.125109,-0.328876,-0.188311,-0.261951,-0.228588,-0.159565,-0.110144,-0.190735,2.335174,-0.083045,-0.232705,-0.170514,-0.244727,-0.294129,-0.125109,-0.232705,-0.211477,-0.258199,-0.135309,-0.173154,-0.088121,-0.246686,0.399851,-0.071858,-0.114059,-0.092928,-0.128593,-0.041416,-0.058621,-0.071858,0.114059,-0.029273,-0.041416,-0.029273,-0.029273,-0.041416,-0.138554,-0.195505,-0.173154,-0.299253,-0.106092,0.963012,-0.071858,-0.092928,-0.650119,-0.162364,-0.209258,0.536434,-0.083045,-0.505879,-0.058621,-0.041416,0.14173,0.0,-0.029273,-0.029273,-0.088121,-0.065568,-0.071858,-0.029273,-0.029273,-0.193133,-0.029273,-0.209258,-0.429633,-0.029273,2.358346,-0.276582,-0.041416,-0.128593,-0.735457,-0.404162,-0.128593,-0.050746,-0.065568,-0.135309,-0.029273,-0.20702,-0.404162,-0.088121,2.374103,-0.029273,-0.327275,-0.065568,-0.128593,-0.720316,-0.39262,-0.167836,1.513321,-1.210014,-0.317566,-0.110144,-0.710742,0.790239,-0.138554,-0.319197,-0.029273,0.359991,1.147807,-0.909866,-0.117851,-0.050746,-0.029273,-0.162364,-0.856118,-0.150888,1.129957,-0.218022,-0.150888,-0.041416,0.338384,-0.31593,-0.290678,0.717576,-0.153829,3.055533,-0.634162,-0.234742,-0.150888,-0.325669,-0.65679,-0.150888,-0.106092,-0.178331,-0.153829,-0.18586,0.394071,0.125109,-0.077648,-0.071858,-0.029273,-0.050746,-0.197853,-0.44215,-0.029273,1.560543,0.263813,-0.144841,-0.050746,-0.029273,0.310988,-0.17576,-0.819121,1.008599,-0.058621,-0.156721,-0.147893,-0.097506,-0.029273,0.271163,-0.144841,-0.601087,1.065497,-0.125109,-0.523181,-1.236179,-0.110144,-0.236765,-0.088121,1.67839,-0.244727,-0.244727,-0.627531,1.205729,-0.190735,-0.106092,-0.244727,-0.050746,0.347737,-0.162364,-0.071858,-0.244727,-0.077648,0.324059,-0.150888,0.297551,-0.041416,-0.050746,0.077648,-0.18586,3.025975,-0.092928,-2.111384,0.195505,-0.029273,-0.188311,-0.029273,-0.050746,-0.029273,-0.065568,-0.058621,-0.058621,-0.310988,-0.041416,0.391166,-0.050746,-0.092928,-0.114059,0.476331,-0.31593
1,-1.727603,1.844188,-3.367363,-2.651943,-0.784733,0.414663,-0.060571,-0.721184,1.371061,0.801363,-0.354424,-3.301939,0.030787,-1.151818,1.116728,-0.135024,-0.128486,-0.818694,-0.238149,-1.002515,1.237146,0.20773,-0.198015,0.32247,-0.958183,-0.363545,0.295763,-0.172237,-0.930014,0.583638,-0.409035,-0.125051,-0.292162,-0.077645,-0.192872,-0.48603,0.900748,-0.220168,-0.114059,-1.911383,2.398217,0.071858,0.254405,-0.170514,-0.16512,-0.071858,0.761915,-0.197853,-0.156721,0.350823,-0.029273,-0.246686,-0.188311,-0.058621,0.623557,-0.22441,-0.088121,-0.041416,10.761041,-0.20476,-0.125109,-0.328876,-0.188311,-0.261951,-0.228588,-0.159565,-0.110144,-0.190735,-0.428234,-0.083045,-0.232705,-0.170514,-0.244727,-0.294129,-0.125109,-0.232705,-0.211477,-0.258199,-0.135309,-0.173154,-0.088121,-0.246686,0.399851,-0.071858,-0.114059,-0.092928,-0.128593,-0.041416,-0.058621,-0.071858,0.114059,-0.029273,-0.041416,-0.029273,-0.029273,-0.041416,-0.138554,-0.195505,-0.173154,3.341656,-0.106092,-1.038408,-0.071858,-0.092928,1.538179,-0.162364,-0.209258,0.536434,-0.083045,-0.505879,-0.058621,-0.041416,0.14173,0.0,-0.029273,-0.029273,-0.088121,-0.065568,-0.071858,-0.029273,-0.029273,-0.193133,-0.029273,-0.209258,2.32757,-0.029273,-0.424026,-0.276582,-0.041416,-0.128593,-0.735457,-0.404162,-0.128593,-0.050746,-0.065568,-0.135309,-0.029273,-0.20702,2.474258,-0.088121,-0.421212,-0.029273,-0.327275,-0.065568,-0.128593,-0.720316,-0.39262,-0.167836,-0.660798,0.826436,-0.317566,-0.110144,-0.710742,0.790239,-0.138554,-0.319197,-0.029273,0.359991,1.147807,-0.909866,-0.117851,-0.050746,-0.029273,-0.162364,-0.856118,-0.150888,1.129957,-0.218022,-0.150888,-0.041416,0.338384,-0.31593,-0.290678,0.717576,-0.153829,-0.327275,-0.634162,4.259993,-0.150888,-0.325669,-0.65679,-0.150888,-0.106092,-0.178331,-0.153829,-0.18586,0.394071,0.125109,-0.077648,-0.071858,-0.029273,-0.050746,-0.197853,-0.44215,-0.029273,1.560543,0.263813,-0.144841,-0.050746,-0.029273,0.310988,-0.17576,-0.819121,1.008599,-0.058621,6.380775,-0.147893,-0.097506,-0.029273,-3.687818,-0.144841,-0.601087,1.065497,-0.125109,-0.523181,0.808944,-0.110144,-0.236765,-0.088121,-0.595809,-0.244727,-0.244727,-0.627531,1.205729,-0.190735,-0.106092,-0.244727,-0.050746,0.347737,-0.162364,-0.071858,-0.244727,-0.077648,0.324059,-0.150888,0.297551,-0.041416,-0.050746,0.077648,-0.18586,-0.330472,-0.092928,0.473623,0.195505,-0.029273,-0.188311,-0.029273,-0.050746,-0.029273,-0.065568,-0.058621,-0.058621,-0.310988,-0.041416,0.391166,-0.050746,-0.092928,-0.114059,0.476331,-0.31593
2,-1.724637,0.444838,0.57026,0.166967,-0.072559,0.414663,-0.260328,-1.011697,1.42527,0.759351,2.545494,-3.301939,0.038996,-1.021233,1.209453,-0.135024,0.3664,1.10532,-0.238149,-1.002515,1.237146,1.451981,-0.198015,0.32247,-0.958183,-0.611643,0.295763,-0.162972,1.30526,1.162287,-0.409035,-0.125051,3.443043,-0.077645,-0.192872,-0.117405,0.14873,-0.220168,-0.114059,0.523181,-0.416976,0.071858,0.254405,-0.170514,-0.16512,-0.071858,0.761915,-0.197853,-0.156721,0.350823,-0.029273,-0.246686,-0.188311,-0.058621,0.623557,4.456135,-0.088121,-0.041416,-0.092928,-0.20476,-0.125109,-0.328876,-0.188311,-0.261951,-0.228588,-0.159565,-0.110144,-0.190735,2.335174,-0.083045,-0.232705,-0.170514,-0.244727,-0.294129,-0.125109,-0.232705,-0.211477,-0.258199,-0.135309,-0.173154,-0.088121,-0.246686,0.399851,-0.071858,-0.114059,-0.092928,-0.128593,-0.041416,-0.058621,-0.071858,0.114059,-0.029273,-0.041416,-0.029273,-0.029273,-0.041416,-0.138554,-0.195505,-0.173154,-0.299253,-0.106092,-1.038408,-0.071858,-0.092928,1.538179,-0.162364,-0.209258,0.536434,-0.083045,-0.505879,-0.058621,-0.041416,0.14173,0.0,-0.029273,-0.029273,-0.088121,-0.065568,-0.071858,-0.029273,-0.029273,-0.193133,-0.029273,-0.209258,2.32757,-0.029273,-0.424026,-0.276582,-0.041416,-0.128593,-0.735457,-0.404162,-0.128593,-0.050746,-0.065568,-0.135309,-0.029273,-0.20702,2.474258,-0.088121,-0.421212,-0.029273,-0.327275,-0.065568,-0.128593,-0.720316,-0.39262,-0.167836,1.513321,-1.210014,-0.317566,-0.110144,-0.710742,0.790239,-0.138554,-0.319197,-0.029273,0.359991,1.147807,-0.909866,-0.117851,-0.050746,-0.029273,-0.162364,-0.856118,-0.150888,1.129957,-0.218022,-0.150888,-0.041416,0.338384,3.16526,-0.290678,-1.39358,-0.153829,-0.327275,1.576885,-0.234742,-0.150888,-0.325669,-0.65679,-0.150888,-0.106092,5.607535,-0.153829,-0.18586,-2.537615,0.125109,-0.077648,-0.071858,-0.029273,-0.050746,-0.197853,-0.44215,-0.029273,1.560543,0.263813,-0.144841,-0.050746,-0.029273,0.310988,-0.17576,-0.819121,1.008599,-0.058621,-0.156721,-0.147893,-0.097506,-0.029273,0.271163,-0.144841,-0.601087,1.065497,-0.125109,-0.523181,0.808944,-0.110144,-0.236765,-0.088121,-0.595809,-0.244727,-0.244727,1.593548,-0.829374,-0.190735,-0.106092,-0.244727,-0.050746,0.347737,-0.162364,-0.071858,-0.244727,-0.077648,0.324059,-0.150888,0.297551,-0.041416,-0.050746,0.077648,-0.18586,-0.330472,-0.092928,0.473623,0.195505,-0.029273,-0.188311,-0.029273,-0.050746,-0.029273,-0.065568,-0.058621,-0.058621,-0.310988,-0.041416,0.391166,-0.050746,-0.092928,-0.114059,0.476331,-0.31593
3,-1.721671,1.432378,0.130859,-1.374147,-0.072559,-0.500887,0.90492,0.682963,1.488175,0.848957,-0.354424,0.215966,0.2239,0.301786,-0.839023,-0.135024,-0.454086,1.10532,-0.238149,0.807281,-0.758692,-1.03652,-0.198015,-0.922638,-0.958183,0.835598,0.295763,-0.348261,-0.930014,1.560981,-0.409035,-0.125051,-0.292162,-0.077645,-0.192872,-0.117405,0.900748,-0.220168,-0.114059,-1.911383,2.398217,0.071858,0.254405,-0.170514,-0.16512,-0.071858,0.761915,-0.197853,-0.156721,0.350823,-0.029273,-0.246686,5.310367,-0.058621,-1.603704,-0.22441,-0.088121,-0.041416,-0.092928,-0.20476,-0.125109,-0.328876,-0.188311,-0.261951,-0.228588,-0.159565,-0.110144,5.242881,-0.428234,-0.083045,-0.232705,-0.170514,-0.244727,-0.294129,-0.125109,-0.232705,-0.211477,-0.258199,-0.135309,-0.173154,-0.088121,-0.246686,0.399851,-0.071858,-0.114059,-0.092928,-0.128593,-0.041416,-0.058621,-0.071858,0.114059,-0.029273,-0.041416,-0.029273,-0.029273,-0.041416,-0.138554,-0.195505,-0.173154,3.341656,-0.106092,0.963012,-0.071858,-0.092928,-0.650119,-0.162364,-0.209258,-1.864161,-0.083045,1.976756,-0.058621,-0.041416,0.14173,0.0,-0.029273,-0.029273,-0.088121,-0.065568,-0.071858,-0.029273,-0.029273,-0.193133,-0.029273,-0.209258,-0.429633,-0.029273,-0.424026,-0.276582,-0.041416,-0.128593,1.359699,-0.404162,-0.128593,-0.050746,-0.065568,-0.135309,-0.029273,-0.20702,-0.404162,-0.088121,-0.421212,-0.029273,-0.327275,-0.065568,-0.128593,1.388279,-0.39262,-0.167836,1.513321,-1.210014,-0.317566,-0.110144,-0.710742,0.790239,-0.138554,-0.319197,-0.029273,0.359991,-0.871227,1.099063,-0.117851,-0.050746,-0.029273,-0.162364,-0.856118,-0.150888,-0.884989,-0.218022,-0.150888,-0.041416,0.338384,-0.31593,-0.290678,0.717576,-0.153829,-0.327275,1.576885,-0.234742,-0.150888,-0.325669,-0.65679,-0.150888,-0.106092,-0.178331,-0.153829,-0.18586,0.394071,0.125109,-0.077648,-0.071858,-0.029273,-0.050746,-0.197853,-0.44215,-0.029273,-0.640803,0.263813,-0.144841,-0.050746,-0.029273,0.310988,-0.17576,-0.819121,1.008599,-0.058621,-0.156721,-0.147893,-0.097506,-0.029273,0.271163,-0.144841,-0.601087,1.065497,-0.125109,-0.523181,0.808944,-0.110144,-0.236765,-0.088121,-0.595809,-0.244727,-0.244727,-0.627531,-0.829374,-0.190735,-0.106092,-0.244727,-0.050746,0.347737,-0.162364,-0.071858,-0.244727,-0.077648,0.324059,-0.150888,0.297551,-0.041416,-0.050746,0.077648,-0.18586,-0.330472,-0.092928,0.473623,0.195505,-0.029273,-0.188311,-0.029273,-0.050746,-0.029273,-0.065568,-0.058621,-0.058621,-0.310988,-0.041416,0.391166,-0.050746,-0.092928,-0.114059,0.476331,-0.31593
4,-1.718705,0.853708,0.130859,0.901,-0.784733,1.330212,-0.260328,-1.011697,0.884612,0.894862,-0.354424,-0.042566,0.199624,0.114252,-0.839023,-0.135024,-0.635119,1.10532,-0.238149,-1.002515,-0.758692,0.20773,-0.198015,-0.300084,0.589753,-0.611643,0.295763,0.031581,1.163836,-1.089403,-0.409035,-0.125051,-0.292162,-0.077645,-0.192872,-0.854656,1.652766,-0.220168,-0.114059,0.523181,-0.416976,0.071858,0.254405,-0.170514,-0.16512,-0.071858,-1.312483,-0.197853,-0.156721,0.350823,-0.029273,-0.246686,-0.188311,-0.058621,-1.603704,-0.22441,-0.088121,-0.041416,-0.092928,-0.20476,-0.125109,-0.328876,-0.188311,-0.261951,-0.228588,-0.159565,-0.110144,-0.190735,2.335174,-0.083045,-0.232705,-0.170514,-0.244727,-0.294129,-0.125109,-0.232705,-0.211477,-0.258199,-0.135309,-0.173154,-0.088121,-0.246686,0.399851,-0.071858,-0.114059,-0.092928,-0.128593,-0.041416,-0.058621,-0.071858,0.114059,-0.029273,-0.041416,-0.029273,-0.029273,-0.041416,-0.138554,-0.195505,-0.173154,-0.299253,-0.106092,-1.038408,-0.071858,-0.092928,-0.650119,-0.162364,4.77878,-1.864161,-0.083045,1.976756,-0.058621,-0.041416,0.14173,0.0,-0.029273,-0.029273,-0.088121,-0.065568,-0.071858,-0.029273,-0.029273,-0.193133,-0.029273,-0.209258,-0.429633,-0.029273,-0.424026,-0.276582,-0.041416,-0.128593,-0.735457,2.474258,-0.128593,-0.050746,-0.065568,-0.135309,-0.029273,-0.20702,2.474258,-0.088121,-0.421212,-0.029273,-0.327275,-0.065568,-0.128593,-0.720316,-0.39262,-0.167836,1.513321,-1.210014,-0.317566,-0.110144,-0.710742,0.790239,-0.138554,-0.319197,-0.029273,0.359991,-0.871227,1.099063,-0.117851,-0.050746,-0.029273,-0.162364,-0.856118,-0.150888,1.129957,-0.218022,-0.150888,-0.041416,0.338384,-0.31593,-0.290678,-1.39358,-0.153829,-0.327275,1.576885,-0.234742,-0.150888,-0.325669,-0.65679,-0.150888,-0.106092,-0.178331,-0.153829,-0.18586,0.394071,0.125109,-0.077648,-0.071858,-0.029273,-0.050746,-0.197853,-0.44215,-0.029273,1.560543,0.263813,-0.144841,-0.050746,-0.029273,0.310988,-0.17576,-0.819121,1.008599,-0.058621,-0.156721,-0.147893,-0.097506,-0.029273,0.271163,-0.144841,1.663653,-0.938529,-0.125109,-0.523181,0.808944,-0.110144,-0.236765,-0.088121,-0.595809,-0.244727,-0.244727,1.593548,-0.829374,-0.190735,-0.106092,-0.244727,-0.050746,0.347737,-0.162364,-0.071858,-0.244727,-0.077648,0.324059,-0.150888,0.297551,-0.041416,-0.050746,0.077648,-0.18586,-0.330472,-0.092928,0.473623,0.195505,-0.029273,-0.188311,-0.029273,-0.050746,-0.029273,-0.065568,-0.058621,-0.058621,-0.310988,-0.041416,0.391166,-0.050746,-0.092928,-0.114059,0.476331,-0.31593


In [47]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train_log)

In [48]:
# RidgeCV автоматически подбирает лучший alpha из списка с помощью кросс-валидации
alphas_ridge = [0.01, 0.1, 1.0, 5.0, 10.0, 20.0, 50.0, 100.0] # Примерный набор alpha
ridge_cv = RidgeCV(alphas=alphas_ridge, cv=5) # cv=5 означает 5-fold Cross-Validation
ridge_cv.fit(X_train_scaled, y_train_log)
print(f"     - Лучший alpha для Ridge: {ridge_cv.alpha_}")

     - Лучший alpha для Ridge: 100.0


In [49]:
#Lasso Регрессия (L1 регуляризация)
alphas_lasso = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] # Lasso требует меньшие alpha обычно
lasso_cv = LassoCV(alphas=alphas_lasso, cv=5, max_iter=5000, random_state=42) # Увеличим max_iter для сходимости
lasso_cv.fit(X_train_scaled, y_train_log)
print(f"     - Лучший alpha для Lasso: {lasso_cv.alpha_}")

     - Лучший alpha для Lasso: 0.005


In [50]:
# Оценка моделей (на трейне)

In [51]:
def rmsle(y_true_log, y_pred_log):
    # Используем MSLE, так как RMSLE = sqrt(MSLE)
    # Предсказания и таргет уже в логарифмах
    return mean_squared_log_error(np.expm1(y_true_log), np.expm1(y_pred_log))



In [53]:
pred_lr_log = lr.predict(X_train_scaled)
pred_ridge_log = ridge_cv.predict(X_train_scaled)
pred_lasso_log = lasso_cv.predict(X_train_scaled)

msle_lr = rmsle(y_train_log, pred_lr_log)
msle_ridge = rmsle(y_train_log, pred_ridge_log)
msle_lasso = rmsle(y_train_log, pred_lasso_log)
print(f"   - Linear Regression MSLE (train): {msle_lr:.5f} (RMSLE: {np.sqrt(msle_lr):.5f})")
print(f"   - RidgeCV MSLE (train):           {msle_ridge:.5f} (RMSLE: {np.sqrt(msle_ridge):.5f})")
print(f"   - LassoCV MSLE (train):           {msle_lasso:.5f} (RMSLE: {np.sqrt(msle_lasso):.5f})")


   - Linear Regression MSLE (train): 0.00819 (RMSLE: 0.09049)
   - RidgeCV MSLE (train):           0.01067 (RMSLE: 0.10332)
   - LassoCV MSLE (train):           0.01362 (RMSLE: 0.11673)


In [54]:
best_model = ridge_cv
print(f"\n   - Выбрана модель: RidgeCV (alpha={best_model.alpha_})")


   - Выбрана модель: RidgeCV (alpha=100.0)


In [None]:
# Предсказываем на масштабированных тестовых данных
test_predictions_log = best_model.predict(X_test_scaled)

# Возвращаем предсказания из логарифмической шкалы в обычную
test_predictions = np.expm1(test_predictions_log)

# Убедимся, что нет отрицательных предсказаний (хотя маловероятно после expm1)
test_predictions[test_predictions < 0] = 0

# Создаем DataFrame для отправки
submission_df = pd.DataFrame({
    'Id': test_ids,
    'SalePrice': test_predictions
})


In [None]:
submission_filename = 'submission_simplified_ridge.csv'
submission_df.to_csv(submission_filename, index=False)

print(f"   - Файл предсказаний '{submission_filename}' успешно сохранен.")
print(f"   - Пример предсказаний:\n{submission_df.head()}")



   - Файл предсказаний 'submission_simplified_ridge.csv' успешно сохранен.
   - Пример предсказаний:
   Id      SalePrice
0   1  144252.118591
1   2  193839.330062
2   3  170875.259179
3   4  169768.801385
4   5  164963.195069
