In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [55]:
from sklearn.model_selection import cross_val_score,train_test_split,GridSearchCV
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error

In [61]:
from sklearn.linear_model import LinearRegression,RidgeCV,LassoCV
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor

In [54]:
def write_sumb(name,prediction):
    pd.DataFrame({'Id':range(1461,1461+1459),'SalePrice':prediction}).to_csv(name,index=False)

In [40]:
train = pd.read_csv('train_clean.csv')
test = pd.read_csv('test_clean.csv')

In [41]:
X_train,X_test,y_train,y_test = train_test_split(train.drop('SalePrice',axis=1),train.SalePrice,random_state=5,)

In [42]:
categorical_features = np.where(X_train.dtypes=='object')[0]
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

numeric_features = np.where((X_train.dtypes=='int')|(X_train.dtypes=='float'))[0]
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [58]:
X_train_prep = preprocessor.fit_transform(X_train)
X_test_prep = preprocessor.transform(X_test)
test_prep = preprocessor.transform(test)

In [59]:
linear = LinearRegression()
linear.fit(X_train_prep,y_train)
pred = linear.predict(test_prep)
write_sumb('linear.csv',pred)

In [62]:
lasso = LassoCV(random_state=5)
lasso.fit(X_train_prep,y_train)
pred = lasso.predict(test_prep)
write_sumb('lasso.csv',pred)

In [63]:
ridge = RidgeCV()
ridge.fit(X_train_prep,y_train)
pred = ridge.predict(test_prep)
write_sumb('ridge.csv',pred)

In [64]:
forest = RandomForestRegressor(random_state=5)
forest.fit(X_train_prep,y_train)
pred = forest.predict(test_prep)
write_sumb('forest.csv',pred)

In [65]:
gbr = GradientBoostingRegressor(random_state=5)
gbr.fit(X_train_prep,y_train)
pred = gbr.predict(test_prep)
write_sumb('gbr.csv',pred)

In [68]:
import tensorflow as tf
import tensorflow.keras

In [116]:
X_train_ = X_train_prep.toarray()
X_test_ = X_test_prep.toarray()
test_ = test_prep.toarray()

In [170]:
X_train_tf = tf.convert_to_tensor(X_train_,dtype='float64')
X_test_tf = tf.convert_to_tensor(X_test_,dtype='float64')
test_tf = tf.convert_to_tensor(test_,dtype='float64')
y_train_tf = tf.convert_to_tensor(y_train,dtype='float64')

In [171]:
y_train_tf

<tf.Tensor: shape=(1095,), dtype=float64, numpy=array([204000., 185000., 143000., ...,  91000., 143900., 129000.])>

In [172]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Flatten(dtype='float64'),
        tf.keras.layers.Dense(128,activation='relu',dtype='float64'),
        tf.keras.layers.Dense(1,dtype='float64')
    ]
)
model.compile(optimizer='adam',loss=tf.keras.losses.MeanSquaredError(),metrics=tf.keras.metrics.mean_squared_error)

In [177]:
model.fit(X_train_tf,y_train_tf,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<tensorflow.python.keras.callbacks.History at 0x154624a60>

In [221]:
pred = model.predict(test_tf)
pred = np.array(pred).ravel()
write_sumb('keras.csv',pred)

In [253]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Flatten(dtype='float64'),
        tf.keras.layers.Dense(256,activation='relu',dtype='float64'),
        tf.keras.layers.BatchNormalization(dtype='float64'),
        tf.keras.layers.Dropout(0.2,dtype='float64'),
        tf.keras.layers.Dense(128,activation='relu',dtype='float64'),
        tf.keras.layers.BatchNormalization(dtype='float64'),
        tf.keras.layers.Dropout(0.2,dtype='float64'),
        tf.keras.layers.Dense(32,activation='relu',dtype='float64'),
        tf.keras.layers.Dense(1,dtype='float64')
    ]
)
#sgd = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(optimizer='adam',loss=tf.keras.losses.MeanSquaredError(),metrics=tf.keras.metrics.mean_squared_error)

In [254]:
model.fit(X_train_tf,y_train_tf,epochs=500,batch_size=16)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x15520aaf0>

In [255]:
pred = model.predict(test_tf)
pred = np.array(pred).ravel()
write_sumb('keras.csv',pred)

In [256]:
pred

array([127141.41522967, 198232.39744689, 192388.46632104, ...,
       171863.46881001, 118952.26567613, 212171.06678593])