In [233]:
import pandas as pd
from dotenv import load_dotenv
from utils import get_engine
from sqlalchemy import text
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [234]:
load_dotenv()
engine = get_engine(echo_arg=True)

In [235]:
with engine.begin() as conn:
    query = text("""SELECT * FROM "CO2_bulding_filtered_feauturs_selection"; """)
    df = pd.read_sql(query, conn)

2023-06-15 20:27:49,997 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2023-06-15 20:27:50,000 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-06-15 20:27:50,043 INFO sqlalchemy.engine.Engine select current_schema()
2023-06-15 20:27:50,045 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-06-15 20:27:50,090 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2023-06-15 20:27:50,092 INFO sqlalchemy.engine.Engine [raw sql] {}
2023-06-15 20:27:50,134 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-06-15 20:27:50,145 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname

In [236]:
X_cols = ['YearBuilt', 'Have_Stream_Energy', 'Have_Electricity_Energy',
       'Have_NaturalGas_Energy', 'BuildingType', 'PrimaryPropertyType',
       'Neighborhood', 'NumberofBuildings',
       'NumberofFloors', 'PropertyGFATotal', 'PropertyGFAParking',
       'PropertyGFABuilding_s_', 'LargestPropertyUseTypeGFA']
X = df[X_cols]

In [237]:
y = df['TotalGHGEmissions']

In [238]:
bool_cols = ['Have_Stream_Energy', 'Have_Electricity_Energy','Have_NaturalGas_Energy']
X[bool_cols]= X[bool_cols].apply(lambda x: x.apply(lambda x: 1 if x else 0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[bool_cols]= X[bool_cols].apply(lambda x: x.apply(lambda x: 1 if x else 0))


In [239]:
X_cat = X.select_dtypes(include=[object])
X_num = X.select_dtypes(exclude=[object])

In [240]:
preparation = ColumnTransformer(transformers=[
    ('tf_cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), X_cat.columns),
    ('tf_num', RobustScaler(), X_num.columns)
])

In [241]:
from keras import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint

In [243]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_tf_train = preparation.fit_transform(X_train)
X_tf_test = preparation.fit_transform(X_test)

In [244]:
NN_model = Sequential()

# The Input Layer :
NN_model.add(Dense(128, kernel_initializer='normal',input_dim = X_tf_train.shape[1], activation='relu'))

# The Hidden Layers :
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))

# The Output Layer :
NN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

# Compile the network :
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
NN_model.summary()

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_71 (Dense)            (None, 128)               7808      
                                                                 
 dense_72 (Dense)            (None, 256)               33024     
                                                                 
 dense_73 (Dense)            (None, 256)               65792     
                                                                 
 dense_74 (Dense)            (None, 256)               65792     
                                                                 
 dense_75 (Dense)            (None, 1)                 257       
                                                                 
Total params: 172,673
Trainable params: 172,673
Non-trainable params: 0
_________________________________________________________________


In [None]:
# checkpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
# checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
# callbacks_list = [checkpoint]

In [246]:
NN_model.fit(X_tf_train, y, epochs=100, batch_size=32, validation_split = 0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7fae763d9850>

In [247]:
y_pred = NN_model.predict(X_tf_test)



In [248]:
from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred))

-0.17956495164765673
