# Linear Regression Practice

Adapted from http://www.ritchieng.com/machine-learning-linear-regression/

## 1. Reading Data using Pandas

### 1.1 Importing libraries

In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing

### 1.2 Analyzing correlation with SalePrice

In [2]:
# Reading training data
data = pd.read_csv('data/train.csv', index_col=0)
#Getting correlation matrix
corrmat = data.corr().abs()
# Ordering parameters acording correlation
corrmat2=corrmat['SalePrice'].sort_values(ascending=False)
corrmat2

SalePrice        1.000000
OverallQual      0.790982
GrLivArea        0.708624
GarageCars       0.640409
GarageArea       0.623431
TotalBsmtSF      0.613581
1stFlrSF         0.605852
FullBath         0.560664
TotRmsAbvGrd     0.533723
YearBuilt        0.522897
YearRemodAdd     0.507101
GarageYrBlt      0.486362
MasVnrArea       0.477493
Fireplaces       0.466929
BsmtFinSF1       0.386420
LotFrontage      0.351799
WoodDeckSF       0.324413
2ndFlrSF         0.319334
OpenPorchSF      0.315856
HalfBath         0.284108
LotArea          0.263843
BsmtFullBath     0.227122
BsmtUnfSF        0.214479
BedroomAbvGr     0.168213
KitchenAbvGr     0.135907
EnclosedPorch    0.128578
ScreenPorch      0.111447
PoolArea         0.092404
MSSubClass       0.084284
OverallCond      0.077856
MoSold           0.046432
3SsnPorch        0.044584
YrSold           0.028923
LowQualFinSF     0.025606
MiscVal          0.021190
BsmtHalfBath     0.016844
BsmtFinSF2       0.011378
Name: SalePrice, dtype: float64

In [3]:
# List of parameters with correlation greater than a threshold
Ncvar=corrmat2[corrmat2<0.35].index

### 1.3 Merging training and test datasets for pre-processing

In [4]:
# Extracting targets
Y=data['SalePrice']
# Separating input parameters
data=data.drop(columns=['SalePrice'])
# Reading test data
tdata = pd.read_csv('data/test.csv', index_col=0)
# Merging train and test datasets for pre-processing
frames = [data, tdata]
adata = pd.concat(frames)
adata.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,0,,,,0,2,2008,WD,Normal
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,0,,,,0,5,2007,WD,Normal
3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,9,2008,WD,Normal
4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,,0,2,2006,WD,Abnorml
5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,,0,12,2008,WD,Normal


## 2. Finding and deleting non-relevant parameters

### 2.1 Analyzing by percentage of Nan content

In [5]:
#Analysing missing data in parameters
total = adata.isnull().sum().sort_values(ascending=False)
percent = (adata.isnull().sum()/adata.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head(20)

Unnamed: 0,Total,Percent
PoolQC,2909,0.996574
MiscFeature,2814,0.964029
Alley,2721,0.932169
Fence,2348,0.804385
FireplaceQu,1420,0.486468
LotFrontage,486,0.166495
GarageCond,159,0.054471
GarageQual,159,0.054471
GarageYrBlt,159,0.054471
GarageFinish,159,0.054471


### 2.2 Deleting parameters

In [6]:
# Dropping parameters by Nan content
adata=adata.drop(columns=['PoolQC','MiscFeature','Alley','Fence','FireplaceQu'])
#Dropping parameters by low correlation
adata=adata.drop(columns=Ncvar)

In [7]:
adata.head()

Unnamed: 0_level_0,MSZoning,LotFrontage,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,...,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,RL,65.0,Pave,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,...,Attchd,2003.0,RFn,2.0,548.0,TA,TA,Y,WD,Normal
2,RL,80.0,Pave,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,...,Attchd,1976.0,RFn,2.0,460.0,TA,TA,Y,WD,Normal
3,RL,68.0,Pave,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,...,Attchd,2001.0,RFn,2.0,608.0,TA,TA,Y,WD,Normal
4,RL,60.0,Pave,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,...,Detchd,1998.0,Unf,3.0,642.0,TA,TA,Y,WD,Abnorml
5,RL,84.0,Pave,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,...,Attchd,2000.0,RFn,3.0,836.0,TA,TA,Y,WD,Normal


## 2. Filling lost data

In [8]:
# Searching at each parameter for Nan data 
for var in adata:
        if adata[var].dtype!='O':
            # For int and floats we use the mean
            adata[var].fillna((adata[var].mean()), inplace=True)
        else:
            # For string data we use the mode
            adata[var].fillna((adata[var].mode()[0]), inplace=True)
adata_m=adata
adata_m.head()

Unnamed: 0_level_0,MSZoning,LotFrontage,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,...,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,RL,65.0,Pave,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,...,Attchd,2003.0,RFn,2.0,548.0,TA,TA,Y,WD,Normal
2,RL,80.0,Pave,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,...,Attchd,1976.0,RFn,2.0,460.0,TA,TA,Y,WD,Normal
3,RL,68.0,Pave,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,...,Attchd,2001.0,RFn,2.0,608.0,TA,TA,Y,WD,Normal
4,RL,60.0,Pave,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,...,Detchd,1998.0,Unf,3.0,642.0,TA,TA,Y,WD,Abnorml
5,RL,84.0,Pave,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,...,Attchd,2000.0,RFn,3.0,836.0,TA,TA,Y,WD,Normal


## 3. Encoding categorical variables and normalization

### 3.1 Encoding categorical variables

In [9]:
# Creating a list with all the categorical variables in the dataset
categorical_var=[]
for var in adata_m:
    if adata_m[var].dtype=='O':
        categorical_var.append(var)
# Using the function get_dummies we use the list "categorical_var" and encode the categorical variables. 
adata_m = pd.get_dummies( adata_m, columns = categorical_var)
adata_m.head()

Unnamed: 0_level_0,LotFrontage,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,TotalBsmtSF,1stFlrSF,GrLivArea,FullBath,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,65.0,7,2003,2003,196.0,706.0,856.0,856,1710,2,...,0,0,0,1,0,0,0,0,1,0
2,80.0,6,1976,1976,0.0,978.0,1262.0,1262,1262,2,...,0,0,0,1,0,0,0,0,1,0
3,68.0,7,2001,2002,162.0,486.0,920.0,920,1786,2,...,0,0,0,1,0,0,0,0,1,0
4,60.0,7,1915,1970,0.0,216.0,756.0,961,1717,1,...,0,0,0,1,1,0,0,0,0,0
5,84.0,8,2000,2000,350.0,655.0,1145.0,1145,2198,2,...,0,0,0,1,0,0,0,0,1,0


### 3.2 Normalization

In [10]:
for var in adata_m:
    adata_m[var]=(adata_m[var]-adata_m[var].min())/(adata_m[var].max()-adata_m[var].min())
#adata_m = preprocessing.scale(adata_m)
adata_m

Unnamed: 0_level_0,LotFrontage,OverallQual,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,TotalBsmtSF,1stFlrSF,GrLivArea,FullBath,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.150685,0.666667,0.949275,0.883333,0.122500,0.125089,0.140098,0.109641,0.259231,0.50,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.202055,0.555556,0.753623,0.433333,0.000000,0.173281,0.206547,0.194917,0.174830,0.50,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.160959,0.666667,0.934783,0.866667,0.101250,0.086109,0.150573,0.123083,0.273549,0.50,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.133562,0.666667,0.311594,0.333333,0.000000,0.038271,0.123732,0.131695,0.260550,0.25,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
5,0.215753,0.777778,0.927536,0.833333,0.218750,0.116052,0.187398,0.170342,0.351168,0.50,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
6,0.219178,0.444444,0.876812,0.750000,0.000000,0.129695,0.130278,0.097038,0.193670,0.25,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
7,0.184932,0.777778,0.956522,0.916667,0.116250,0.242558,0.275941,0.285654,0.256217,0.50,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
8,0.165431,0.666667,0.731884,0.383333,0.150000,0.152197,0.181178,0.162361,0.330821,0.50,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
9,0.102740,0.666667,0.427536,0.000000,0.000000,0.000000,0.155810,0.144507,0.271289,0.50,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
10,0.099315,0.444444,0.485507,0.000000,0.000000,0.150780,0.162193,0.156060,0.139977,0.25,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


## 4. Spliting data

### 4.1 Training-Test data

In [11]:
# Getting train data
data2 = adata_m.iloc[:data.shape[0],:]
# Getting test data
tdata2 = adata_m.iloc[data.shape[0]:,:]

### 4.2 Input-output

Spliting input-output only for training data

In [12]:
Y=Y.iloc[:].values;
Y=np.reshape(Y, (Y.shape[0], 1))
X=data2.iloc[:,:].values;
X.shape

(1460, 249)

In [13]:
X=np.column_stack((np.ones((X.shape[0],1)),X))

### 4.3 Training-Validation

In [14]:
# Generating a random array to get random elements in the training-validation data
ordering= np.arange(X.shape[0])
np.random.shuffle(ordering)

In [15]:
X.shape

(1460, 250)

In [16]:
# 80% training-20% validation
train=ordering[0:int(X.shape[0]*0.8)]
test=ordering[int(X.shape[0]*0.8):X.shape[0]]
X_train=X[train,:]
X_test=X[test,:]
Y_train=Y[train]
Y_test=Y[test]

In [17]:
X_train.shape

(1168, 250)

## 5. Training

In [18]:
############################################
# PENDIENTE
# normal equation : (X'X)^-1.X'Y
############################################

In [19]:
############################################
# BATCH
############################################
# Setting learning rate, regularization factor and number of epochs
alfa=0.0001
epochs=800000
# Initializing weights
W=np.random.rand(1,X_train.shape[1])/X_train.shape[1]
for epoch in range(epochs):
    # Calculation of weights updating
    err=np.transpose(Y_train)-np.matmul(W,np.transpose(X_train))
    dw=alfa*np.matmul(err,X_train)/X.shape[0]
    W+=dw
    # Validation error (only for visualization)
    if epoch%1000==0:
        y_pred=np.matmul(X_test,np.transpose(W))
        err_ep=np.sqrt(np.mean((np.log(1+y_pred) - np.log(1+Y_test))**2));
        #y_pred=np.matmul(X_test,np.transpose(W))*(Y2.max()-Y2.min())+Y2.min()
        #y_test=Y_test*(Y2.max()-Y2.min())+Y2.min()
        #err_ep=np.sqrt(np.mean((np.log(1+y_pred) - np.log(1+y_test))**2));
        print('Epoca: ',epoch)
        print('err_t: ',err_ep)
print(W)


Epoca:  0
err_t:  6.020544521733581
Epoca:  1000
err_t:  0.33081940298800455
Epoca:  2000
err_t:  0.31879116512616085
Epoca:  3000
err_t:  0.30132250360582574
Epoca:  4000
err_t:  0.2849843443898903
Epoca:  5000
err_t:  0.2716439793329115
Epoca:  6000
err_t:  0.26092890249225364
Epoca:  7000
err_t:  0.25229734221409017
Epoca:  8000
err_t:  0.24528503481386002
Epoca:  9000
err_t:  0.2395205284143783
Epoca:  10000
err_t:  0.23471362088594303
Epoca:  11000
err_t:  0.23064115252768103
Epoca:  12000
err_t:  0.22713361649321223
Epoca:  13000
err_t:  0.22406344889927962
Epoca:  14000
err_t:  0.22133525954674277
Epoca:  15000
err_t:  0.21887796716205232
Epoca:  16000
err_t:  0.21663864035362562
Epoca:  17000
err_t:  0.21457777672941472
Epoca:  18000
err_t:  0.21266574426553456
Epoca:  19000
err_t:  0.21088013345126827
Epoca:  20000
err_t:  0.2092038069634219
Epoca:  21000
err_t:  0.2076234743426549
Epoca:  22000
err_t:  0.20612865656543936
Epoca:  23000
err_t:  0.204710937172706
Epoca:  24000


Epoca:  197000
err_t:  0.15480569085215728
Epoca:  198000
err_t:  0.1547327072796234
Epoca:  199000
err_t:  0.15466041592221277
Epoca:  200000
err_t:  0.15458880908381625
Epoca:  201000
err_t:  0.15451787917079676
Epoca:  202000
err_t:  0.15444761869054555
Epoca:  203000
err_t:  0.15437802025005554
Epoca:  204000
err_t:  0.15430907655451565
Epoca:  205000
err_t:  0.15424078040592357
Epoca:  206000
err_t:  0.1541731247017164
Epoca:  207000
err_t:  0.1541061024334202
Epoca:  208000
err_t:  0.15403970668531752
Epoca:  209000
err_t:  0.15397393063313194
Epoca:  210000
err_t:  0.15390876754273092
Epoca:  211000
err_t:  0.15384421076884444
Epoca:  212000
err_t:  0.15378025375380228
Epoca:  213000
err_t:  0.1537168900262863
Epoca:  214000
err_t:  0.15365411320010045
Epoca:  215000
err_t:  0.1535919169729561
Epoca:  216000
err_t:  0.15353029512527389
Epoca:  217000
err_t:  0.1534692415190018
Epoca:  218000
err_t:  0.15340875009644783
Epoca:  219000
err_t:  0.1533488148791294
Epoca:  220000
err

Epoca:  392000
err_t:  0.14807338078582136
Epoca:  393000
err_t:  0.14806056459668537
Epoca:  394000
err_t:  0.14804786768393985
Epoca:  395000
err_t:  0.14803528915151654
Epoca:  396000
err_t:  0.1480228281109749
Epoca:  397000
err_t:  0.14801048368142103
Epoca:  398000
err_t:  0.14799825498942942
Epoca:  399000
err_t:  0.1479861411689641
Epoca:  400000
err_t:  0.14797414136130133
Epoca:  401000
err_t:  0.14796225471495414
Epoca:  402000
err_t:  0.14795048038559638
Epoca:  403000
err_t:  0.14793881753598892
Epoca:  404000
err_t:  0.14792726533590622
Epoca:  405000
err_t:  0.14791582296206485
Epoca:  406000
err_t:  0.1479044895980511
Epoca:  407000
err_t:  0.1478932644342513
Epoca:  408000
err_t:  0.14788214666778218
Epoca:  409000
err_t:  0.1478711355024223
Epoca:  410000
err_t:  0.14786023014854469
Epoca:  411000
err_t:  0.14784942982304952
Epoca:  412000
err_t:  0.14783873374929835
Epoca:  413000
err_t:  0.14782814115704931
Epoca:  414000
err_t:  0.14781765128239263
Epoca:  415000
e

Epoca:  586000
err_t:  0.147059335303327
Epoca:  587000
err_t:  0.1470589673970456
Epoca:  588000
err_t:  0.14705862919470347
Epoca:  589000
err_t:  0.14705832048743137
Epoca:  590000
err_t:  0.14705804106779582
Epoca:  591000
err_t:  0.14705779072978897
Epoca:  592000
err_t:  0.1470575692688177
Epoca:  593000
err_t:  0.1470573764816935
Epoca:  594000
err_t:  0.14705721216662207
Epoca:  595000
err_t:  0.14705707612319338
Epoca:  596000
err_t:  0.14705696815237107
Epoca:  597000
err_t:  0.14705688805648262
Epoca:  598000
err_t:  0.14705683563920985
Epoca:  599000
err_t:  0.14705681070557808
Epoca:  600000
err_t:  0.14705681306194732
Epoca:  601000
err_t:  0.14705684251600187
Epoca:  602000
err_t:  0.14705689887674092
Epoca:  603000
err_t:  0.14705698195446917
Epoca:  604000
err_t:  0.14705709156078703
Epoca:  605000
err_t:  0.14705722750858147
Epoca:  606000
err_t:  0.1470573896120165
Epoca:  607000
err_t:  0.1470575776865243
Epoca:  608000
err_t:  0.14705779154879525
Epoca:  609000
err

Epoca:  778000
err_t:  0.1473522861814303
Epoca:  779000
err_t:  0.1473550186193791
Epoca:  780000
err_t:  0.14735775824132152
Epoca:  781000
err_t:  0.14736050498971162
Epoca:  782000
err_t:  0.14736325880738418
Epoca:  783000
err_t:  0.14736601963755247
Epoca:  784000
err_t:  0.14736878742380588
Epoca:  785000
err_t:  0.14737156211010705
Epoca:  786000
err_t:  0.1473743436407896
Epoca:  787000
err_t:  0.14737713196055596
Epoca:  788000
err_t:  0.14737992701447478
Epoca:  789000
err_t:  0.14738272874797823
Epoca:  790000
err_t:  0.14738553710685995
Epoca:  791000
err_t:  0.14738835203727282
Epoca:  792000
err_t:  0.147391173485726
Epoca:  793000
err_t:  0.1473940013990835
Epoca:  794000
err_t:  0.1473968357245608
Epoca:  795000
err_t:  0.1473996764097234
Epoca:  796000
err_t:  0.1474025234024843
Epoca:  797000
err_t:  0.14740537665110157
Epoca:  798000
err_t:  0.14740823610417603
Epoca:  799000
err_t:  0.14741110171064936
[[ 4.45090104e+03  3.76975274e+03  5.01245564e+04  1.12091354e+

In [20]:
#W = np.loadtxt('test1.txt', dtype=float)
#W

In [21]:
y_pred=np.matmul(X_test,np.transpose(W))
y_test=Y_test

In [22]:
##### RMSLE-KAGGLE
np.sqrt(np.mean((np.log(1+y_pred) - np.log(1+y_test))**2))

0.14741397054506106

## 6. TESTEO

In [23]:
tdata2.shape

(1459, 249)

In [24]:
X_send=tdata2.iloc[:,:].values;
X_send=np.column_stack((np.ones((X_send.shape[0],1)),X_send))
X_send.shape

(1459, 250)

In [25]:
Y_send=np.matmul(X_send,np.transpose(W))
Y_send

array([[111808.44493275],
       [147246.20399631],
       [174507.72070899],
       ...,
       [146267.76566032],
       [108050.43432041],
       [222388.1923131 ]])

In [26]:
df = pd.DataFrame(Y_send,columns = ["SalePrice"])
df.index = np.arange(1461, 1461+len(df))
df.to_csv("outputs/predictions2.csv")