In [32]:
import numpy as np
import pandas as pd
import patsy
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.model_selection import cross_val_score
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [33]:
train = pd.read_csv('./train.csv')

In [34]:
y = train['SalePrice']
X = train.drop(['SalePrice'], axis = "columns")

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

In [9]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1025, 80) (1025,)
(1026, 80) (1026,)


In [10]:
null_count = X_train.isnull().sum().to_frame('nulls')

In [11]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1025 entries, 1620 to 1839
Data columns (total 80 columns):
Id                 1025 non-null int64
PID                1025 non-null int64
MS SubClass        1025 non-null int64
MS Zoning          1025 non-null object
Lot Frontage       866 non-null float64
Lot Area           1025 non-null int64
Street             1025 non-null object
Alley              73 non-null object
Lot Shape          1025 non-null object
Land Contour       1025 non-null object
Utilities          1025 non-null object
Lot Config         1025 non-null object
Land Slope         1025 non-null object
Neighborhood       1025 non-null object
Condition 1        1025 non-null object
Condition 2        1025 non-null object
Bldg Type          1025 non-null object
House Style        1025 non-null object
Overall Qual       1025 non-null int64
Overall Cond       1025 non-null int64
Year Built         1025 non-null int64
Year Remod/Add     1025 non-null int64
Roof Style         1

In [12]:
mask = null_count['nulls'] != 0
null_list = null_count[mask]
null_list

Unnamed: 0,nulls
Lot Frontage,159
Alley,952
Mas Vnr Type,9
Mas Vnr Area,9
Bsmt Qual,28
Bsmt Cond,28
Bsmt Exposure,30
BsmtFin Type 1,28
BsmtFin SF 1,1
BsmtFin Type 2,29


In [13]:
for i in range(80): 
    if [X_train.iloc[: ,i].dtypes!=object] == True:
        mean = X_train.iloc[: ,i].mean()
        X_train.iloc[: ,i].fillna(mean, inplace=True)
    else:
        pass


In [14]:
str_cols = X_train.columns[X_train.dtypes==object]
#X_train[str_cols] = X_train[str_cols].fillna('None')
#X_train.fillna(0,inplace=True)

In [15]:
str_cols

Index(['MS Zoning', 'Street', 'Alley', 'Lot Shape', 'Land Contour',
       'Utilities', 'Lot Config', 'Land Slope', 'Neighborhood', 'Condition 1',
       'Condition 2', 'Bldg Type', 'House Style', 'Roof Style', 'Roof Matl',
       'Exterior 1st', 'Exterior 2nd', 'Mas Vnr Type', 'Exter Qual',
       'Exter Cond', 'Foundation', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure',
       'BsmtFin Type 1', 'BsmtFin Type 2', 'Heating', 'Heating QC',
       'Central Air', 'Electrical', 'Kitchen Qual', 'Functional',
       'Fireplace Qu', 'Garage Type', 'Garage Finish', 'Garage Qual',
       'Garage Cond', 'Paved Drive', 'Pool QC', 'Fence', 'Misc Feature',
       'Sale Type'],
      dtype='object')

In [16]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1025 entries, 1620 to 1839
Data columns (total 80 columns):
Id                 1025 non-null int64
PID                1025 non-null int64
MS SubClass        1025 non-null int64
MS Zoning          1025 non-null object
Lot Frontage       866 non-null float64
Lot Area           1025 non-null int64
Street             1025 non-null object
Alley              73 non-null object
Lot Shape          1025 non-null object
Land Contour       1025 non-null object
Utilities          1025 non-null object
Lot Config         1025 non-null object
Land Slope         1025 non-null object
Neighborhood       1025 non-null object
Condition 1        1025 non-null object
Condition 2        1025 non-null object
Bldg Type          1025 non-null object
House Style        1025 non-null object
Overall Qual       1025 non-null int64
Overall Cond       1025 non-null int64
Year Built         1025 non-null int64
Year Remod/Add     1025 non-null int64
Roof Style         1

In [17]:
X_train.head()

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
1620,815,906226140,90,RL,64.0,7007,Pave,,Reg,Bnk,...,0,0,0,,,,0,6,2009,WD
1485,283,908225320,50,RL,52.0,8741,Pave,,Reg,Lvl,...,0,0,0,,MnPrv,,0,4,2010,WD
840,1924,535177020,20,RL,70.0,9100,Pave,,Reg,Lvl,...,0,0,0,,,,0,6,2007,WD
475,2900,916475100,20,RL,85.0,14331,Pave,,Reg,Lvl,...,0,0,0,,,,0,5,2006,WD
281,1075,528186130,120,RL,89.0,8232,Pave,,IR1,Lvl,...,0,0,0,,,,0,6,2008,New


In [18]:
str_cols = X_train.columns[X_train.dtypes==object]
X_train[str_cols] = X_train[str_cols].fillna('AANone')
X_train.fillna(0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [19]:
X_train.head()

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
1620,815,906226140,90,RL,64.0,7007,Pave,AANone,Reg,Bnk,...,0,0,0,AANone,AANone,AANone,0,6,2009,WD
1485,283,908225320,50,RL,52.0,8741,Pave,AANone,Reg,Lvl,...,0,0,0,AANone,MnPrv,AANone,0,4,2010,WD
840,1924,535177020,20,RL,70.0,9100,Pave,AANone,Reg,Lvl,...,0,0,0,AANone,AANone,AANone,0,6,2007,WD
475,2900,916475100,20,RL,85.0,14331,Pave,AANone,Reg,Lvl,...,0,0,0,AANone,AANone,AANone,0,5,2006,WD
281,1075,528186130,120,RL,89.0,8232,Pave,AANone,IR1,Lvl,...,0,0,0,AANone,AANone,AANone,0,6,2008,New


In [20]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1025 entries, 1620 to 1839
Data columns (total 80 columns):
Id                 1025 non-null int64
PID                1025 non-null int64
MS SubClass        1025 non-null int64
MS Zoning          1025 non-null object
Lot Frontage       1025 non-null float64
Lot Area           1025 non-null int64
Street             1025 non-null object
Alley              1025 non-null object
Lot Shape          1025 non-null object
Land Contour       1025 non-null object
Utilities          1025 non-null object
Lot Config         1025 non-null object
Land Slope         1025 non-null object
Neighborhood       1025 non-null object
Condition 1        1025 non-null object
Condition 2        1025 non-null object
Bldg Type          1025 non-null object
House Style        1025 non-null object
Overall Qual       1025 non-null int64
Overall Cond       1025 non-null int64
Year Built         1025 non-null int64
Year Remod/Add     1025 non-null int64
Roof Style       

In [21]:
for col in ['Low Qual Fin SF', 'Gr Liv Area', 'Lot Area', '1st Flr SF', '2nd Flr SF', 'Wood Deck SF', 'Open Porch SF', 'Enclosed Porch', '3Ssn Porch', 'Screen Porch', 'Pool Area', 'Misc Val']:
    X_train[col] = X_train[col].astype('float64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [22]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1025 entries, 1620 to 1839
Data columns (total 80 columns):
Id                 1025 non-null int64
PID                1025 non-null int64
MS SubClass        1025 non-null int64
MS Zoning          1025 non-null object
Lot Frontage       1025 non-null float64
Lot Area           1025 non-null float64
Street             1025 non-null object
Alley              1025 non-null object
Lot Shape          1025 non-null object
Land Contour       1025 non-null object
Utilities          1025 non-null object
Lot Config         1025 non-null object
Land Slope         1025 non-null object
Neighborhood       1025 non-null object
Condition 1        1025 non-null object
Condition 2        1025 non-null object
Bldg Type          1025 non-null object
House Style        1025 non-null object
Overall Qual       1025 non-null int64
Overall Cond       1025 non-null int64
Year Built         1025 non-null int64
Year Remod/Add     1025 non-null int64
Roof Style     

In [24]:
int_cols = X_train.columns[X_train.dtypes=='int64']

In [25]:
int_cols

Index(['MS SubClass', 'Overall Qual', 'Overall Cond', 'Year Built',
       'Year Remod/Add', 'Full Bath', 'Half Bath', 'Bedroom AbvGr',
       'Kitchen AbvGr', 'TotRms AbvGrd', 'Fireplaces', 'Mo Sold', 'Yr Sold'],
      dtype='object')

In [26]:
X_train[int_cols] = X_train[int_cols].astype('object')

In [28]:
len(X_train.columns)

78

**Cleaning Done**

**Cleaning Function**

In [36]:
def clean_me(df):
    #Replaces Null values outside of object columns 
    for i in range(len(df.columns)): 
        if [df.iloc[: ,i].dtypes!=object] == True:
            mean = df.iloc[: ,i].mean()
            df.iloc[: ,i].fillna(mean, inplace=True)
        else:
            pass
        #Replaces other null values with AANone
    str_cols = df.columns[df.dtypes==object]
    df[str_cols] = df[str_cols].fillna('AANone')
    df.fillna(0,inplace=True)
        #Change selected columns from Int to float
    for col in ['Low Qual Fin SF', 'Gr Liv Area', 'Lot Area', '1st Flr SF', '2nd Flr SF',
                'Wood Deck SF', 'Open Porch SF', 'Enclosed Porch', '3Ssn Porch', 'Screen Porch', 'Pool Area', 'Misc Val']:
                df[col] = df[col].astype('float64')
        #Makes remaining int colomns objects 
    int_cols = df.columns[df.dtypes=='int64']
    df[int_cols] = df[int_cols].astype('object')
    df = df.drop(['Id', 'PID'], axis=1)
    return df

In [37]:
X_train.head()

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
1579,2742,905452140,50,RL,75.0,9525,Pave,,Reg,Lvl,...,0,0,0,,,,0,10,2006,WD
1251,1316,902306130,70,RM,60.0,9600,Pave,,Reg,Lvl,...,0,0,0,,,,0,10,2008,WD
1857,1434,907135260,20,RL,57.0,9245,Pave,,IR2,Lvl,...,0,0,0,,,,0,2,2008,WD
798,312,914476520,20,RL,129.0,9196,Pave,,IR1,Lvl,...,0,0,0,,,,0,4,2010,WD
561,956,916176030,20,RL,,14375,Pave,,IR1,Lvl,...,0,233,0,,,,0,1,2009,COD


In [38]:
clean_me(X_train).head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
1579,50,RL,75.0,9525.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,10,2006,WD
1251,70,RM,60.0,9600.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,10,2008,WD
1857,20,RL,57.0,9245.0,Pave,AANone,IR2,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,2,2008,WD
798,20,RL,129.0,9196.0,Pave,AANone,IR1,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,4,2010,WD
561,20,RL,0.0,14375.0,Pave,AANone,IR1,Lvl,NoSeWa,CulDSac,...,0.0,233.0,0.0,AANone,AANone,AANone,0.0,1,2009,COD


In [None]:
X_train.info()

In [39]:
clean_me(X_test).head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
720,20,RL,80.0,9547.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,7,2007,WD
997,20,RL,65.0,8450.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,MnPrv,AANone,0.0,11,2006,COD
1581,60,RL,82.0,9430.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,180.0,0.0,AANone,AANone,AANone,0.0,7,2009,WD
1712,70,RM,50.0,9060.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,MnPrv,AANone,0.0,3,2007,WD
849,20,RL,0.0,10456.0,Pave,AANone,IR1,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,5,2010,WD


**Linear Regression Numbers**

In [40]:
test = pd.read_csv('./test.csv')

In [41]:
test_id = test['Id']

In [42]:
clean_me(test)

Unnamed: 0,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
0,190,RM,69.0,9142.0,Pave,Grvl,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,4,2006,WD
1,90,RL,0.0,9662.0,Pave,AANone,IR1,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,8,2006,WD
2,60,RL,58.0,17104.0,Pave,AANone,IR1,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,9,2006,New
3,30,RM,60.0,8520.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,7,2007,WD
4,20,RL,0.0,9500.0,Pave,AANone,IR1,Lvl,AllPub,Inside,...,0.0,185.0,0.0,AANone,AANone,AANone,0.0,7,2009,WD
5,160,RM,21.0,1890.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,6,2010,WD
6,20,RM,52.0,8516.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,5,2008,WD
7,20,RL,0.0,9286.0,Pave,AANone,IR1,Lvl,AllPub,CulDSac,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,10,2009,WD
8,160,FV,39.0,3515.0,Pave,Pave,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,AANone,AANone,0.0,1,2010,WD
9,20,RL,75.0,10125.0,Pave,AANone,Reg,Lvl,AllPub,Inside,...,0.0,0.0,0.0,AANone,MnPrv,AANone,0.0,2,2008,WD


In [333]:
float_cols = X_train.columns[X_train.dtypes=='float64']
num_X_train = X_train[float_cols]

In [334]:
float_cols = X_test.columns[X_test.dtypes=='float64']
num_X_test = X_test[float_cols]

In [346]:
float_cols = test.columns[test.dtypes=='float64']
num_test = test[float_cols]

In [335]:
slr = LinearRegression()
slr.fit(num_X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [336]:
print(slr.score(num_X_test, y_test))

0.6453783148657568


In [347]:
model =lr.fit(num_X_train, y_train)
y_hat = model.predict(num_test)

In [361]:
y_hat

array([171364.2180142 , 235724.37803808, 162939.84714622, 115388.36355483,
       210879.13884148,  87797.85285988, 105438.58272157, 133331.7264873 ,
       175910.7603706 , 181308.27051106, 160656.90339758, 135310.45119475,
       171496.85248588, 327327.73457504, 152558.1598886 , 106584.9149097 ,
       171424.67561288, 124672.20716271, 245769.62679878, 194581.23832728,
       134452.13951968, 158148.43731508, 215133.86250518, 160585.40761393,
       171219.16924362,  98959.36416841, 141470.81174676, 121002.93139551,
       163598.65994532,  44473.91397125, 104905.05948729, 102090.97793872,
       280415.23523349, 172202.96906256, 215747.96598764, 147592.63032012,
       153303.68209683,  75147.79620219,  83947.82554632, 191602.45707499,
       147489.62353372, 176917.54963707, 173768.37732472, 167529.17335845,
       232532.5543174 ,  90703.84208578, 209079.18051281, 123716.97560018,
       113412.21551904, 119391.92001135, 105976.97007817, 199591.06686341,
       244185.11821119, 1

In [360]:
num_trial = pd.DataFrame(y_hat, index = test_id)
num_trial.columns = ['SalePrice']
num_trial.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
2658,171364.218014
2718,235724.378038
2414,162939.847146
1989,115388.363555
625,210879.138841


In [363]:
num_trial.to_csv("./num_trial.csv")

In [340]:
coefs = pd.DataFrame([model.coef_], columns=num_X_train.columns)
coefs.T

Unnamed: 0,0
Lot Frontage,108.427736
Lot Area,0.035611
Mas Vnr Area,59.893763
BsmtFin SF 1,21.756085
BsmtFin SF 2,10.590919
Bsmt Unf SF,3.027448
Total Bsmt SF,35.374453
1st Flr SF,41.41754
2nd Flr SF,36.547534
Low Qual Fin SF,-49.232246


**Linear Regression Numbers 2nd attampt**

In [130]:
cor_df = num_X_train

NameError: name 'num_X_train' is not defined

In [365]:
cor_df['sales'] = y_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [370]:
cor_num = cor_df.corr()[['sales']]

In [371]:
cor_num

Unnamed: 0,sales
Lot Frontage,0.1792
Lot Area,0.250532
Mas Vnr Area,0.535928
BsmtFin SF 1,0.416168
BsmtFin SF 2,0.025242
Bsmt Unf SF,0.18603
Total Bsmt SF,0.662023
1st Flr SF,0.651799
2nd Flr SF,0.275068
Low Qual Fin SF,-0.054607


In [373]:
mask = abs(cor_num['sales']) > .5 
new_list = cor_num[mask]
new_list

Unnamed: 0,sales
Mas Vnr Area,0.535928
Total Bsmt SF,0.662023
1st Flr SF,0.651799
Gr Liv Area,0.723401
Garage Cars,0.648702
Garage Area,0.654627
sales,1.0


In [374]:
num_X_train_2 = num_X_train[['Mas Vnr Area', 'Total Bsmt SF', '1st Flr SF', 'Gr Liv Area', 'Garage Cars', 'Garage Area']]

In [375]:
num_X_test_2 = num_X_test[['Mas Vnr Area', 'Total Bsmt SF', '1st Flr SF', 'Gr Liv Area', 'Garage Cars', 'Garage Area']]

In [376]:
num_test_2 = num_test[['Mas Vnr Area', 'Total Bsmt SF', '1st Flr SF', 'Gr Liv Area', 'Garage Cars', 'Garage Area']]

In [378]:
slr.fit(num_X_train_2, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [380]:
print(slr.score(num_X_test_2, y_test))

0.6166057608771992


In [381]:
model =lr.fit(num_X_train_2, y_train)
y_hat = model.predict(num_test_2)

In [382]:
num_trial_2 = pd.DataFrame(y_hat, index = test_id)
num_trial_2.columns = ['SalePrice']
num_trial_2.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
2658,188290.083192
2718,265923.010941
2414,150810.389689
1989,137557.887548
625,210518.029107


In [383]:
num_trial_2.to_csv("./num_trial_2.csv")

**Polynomial Variables from second Numbers**

In [399]:
poly = PolynomialFeatures(include_bias = False)
X_poly = poly.fit_transform(num_X_train_2)

In [400]:
poly = PolynomialFeatures(include_bias = False)
test_poly = poly.fit_transform(num_test_2)

In [401]:
ss = StandardScaler()
ss.fit(X_poly)
X_scaled = ss.transform(X_poly)

In [402]:
ss = StandardScaler()
ss.fit(test_poly)
test_scaled = ss.transform(test_poly)

In [403]:
lr.fit(X_scaled, y_train)
lr.score(X_scaled, y_train)

0.8069073477493527

In [406]:
model =slr.fit(X_scaled, y_train)
y_hat = model.predict(test_scaled)

In [407]:
y_hat

array([175391.78442994, 274015.85100384, 160743.9254336 , 136433.28828036,
       205635.93034983, 113157.15505099, 130865.75689708, 152881.71322823,
       176864.05876616, 172070.57511927, 167661.95270392, 144743.9242907 ,
       185581.7248876 , 321603.84144112, 159257.87015643,  89078.53456812,
       153837.11736767, 134043.06983794, 212994.85433942, 183277.87957678,
       132517.88252029, 113021.83825433, 221333.56666354, 136605.12914067,
       169533.48288287, 110291.69944968, 159980.96832188, 144880.92593425,
       147773.04617867,  75367.64925099,  93312.33619157, 140879.34173456,
       302579.72639704, 161819.66312676, 222286.3478919 , 159745.35234147,
       157771.45164578,  89724.70974346,  88477.90674185, 190243.74915615,
       139090.12270344, 192822.09529514, 179369.23114758, 153987.84846464,
       224670.51748189, 103661.02097834, 200955.61510282, 119384.46899037,
       119351.5869215 , 130476.5032869 , 133582.89365776, 194432.48890144,
       269343.92315757, 1

In [408]:
poly_trial = pd.DataFrame(y_hat, index = test_id)
poly_trial.columns = ['SalePrice']
poly_trial.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
2658,175391.78443
2718,274015.851004
2414,160743.925434
1989,136433.28828
625,205635.93035


In [409]:
poly_trial.to_csv("./poly_trial.csv")