In [1]:
import pandas as pd

df = pd.read_csv('EnergyEfficiency.csv')

In [5]:
df['GlazingAreaDistribution'].unique()

array(['None', 'Uniform', 'North', 'East', 'South', 'West'], dtype=object)

In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.utils import shuffle

df = shuffle(df)

label_encoder = LabelEncoder()
df['GlazingAreaDistribution'] = label_encoder.fit_transform(df['GlazingAreaDistribution'])

column_transformer = ColumnTransformer([('encoder', OneHotEncoder(), [5])], remainder='passthrough')
df = column_transformer.fit_transform(df)

X = df[:, :-2]
y_heat = df[:, -2]
y_cool = df[:, -1]

imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

regressor_heat = LinearRegression()
regressor_cool = LinearRegression()

regressor_heat.fit(X, y_heat)
regressor_cool.fit(X, y_cool)

dummy_data = {
    'Compactness': [0.62],
    'SurfaceArea': [808.5],
    'WallArea': [367.5],
    'RoofArea': [220.5],
    'Height': [3.5],
    'Orientation': ['East'],
    'GlazingLevel': [0.1],
    'GlazingAreaDistribution': ['North'],
    'HeatingLoad': [0.0],
    'CoolingLoad': [0.0]
}

dummy_df = pd.DataFrame(dummy_data)

dummy_df['GlazingAreaDistribution'] = label_encoder.transform(dummy_df['GlazingAreaDistribution'])

dummy_data_transformed = column_transformer.transform(dummy_df)[:, :-2] 

dummy_pred_heat = regressor_heat.predict(dummy_data_transformed)
dummy_pred_cool = regressor_cool.predict(dummy_data_transformed)

r2_heat = r2_score(y_heat, regressor_heat.predict(X))
mse_heat = mean_squared_error(y_heat, regressor_heat.predict(X))
r2_cool = r2_score(y_cool, regressor_cool.predict(X))
mse_cool = mean_squared_error(y_cool, regressor_cool.predict(X))

print('Predicted Heating Load:', dummy_pred_heat)
print('Predicted Cooling Load:', dummy_pred_cool)
print('R-squared value (Heating Load):', r2_heat)
print('Mean Squared Error (Heating Load):', mse_heat)
print('R-squared value (Cooling Load):', r2_cool)
print('Mean Squared Error (Cooling Load):', mse_cool)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices