In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from table_reader import TableReader
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LassoCV, RidgeCV
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor

In [None]:
tr = TableReader()
df = tr.properties_vector(include_amenitites=True)
geodata = tr.geodata_vector()
tr.close()


In [None]:
df = pd.merge(df, geodata, on='listingID', how='left')

In [None]:
df['zipcode'].hist()

In [None]:
df

In [None]:
features = df[df.columns.drop(['price', 'listingID', 'zipcode'])]
label = df['price']

In [None]:
cross_val_score(Ridge(), X=features, y=label, cv=10)

In [None]:
model = ElasticNet()
esfm = SelectFromModel(model)
esfm.fit(features, label)
print(list(features.iloc[:, esfm.get_support(indices=True)]))

In [None]:
model = Lasso()
sfm = SelectFromModel(model)
sfm.fit(features, label)
print(list(features.iloc[:, sfm.get_support(indices=True)]))

In [None]:
model = Ridge()
sfm = SelectFromModel(model)
sfm.fit(features, label)
print(list(features.iloc[:, sfm.get_support(indices=True)]))

In [None]:
def build_poly(model, degree=1):
    if degree == 1:
        return Pipeline([
            ("std", StandardScaler()), 
            ("reg", model), 
        ])
    
    return Pipeline([
        ("std", StandardScaler()),
        ("poly", PolynomialFeatures(degree)), 
        ("reg", model), 
    ])

In [None]:
lasso_reg = build_poly(Ridge(), degree=1)
cross_val_score(lasso_reg, X=features, y=label, cv=10)

In [None]:
df['costlier_than_100'] = df['price'].apply(lambda price: 1 if price > 100 else 0)

In [None]:
df[['costlier_than_100', 'price']]