In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style("darkgrid")

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.preprocessing import RobustScaler

In [None]:
file_paths = [
    "/kaggle/input/citylearn-aicrowd/Building_1.csv",
    "/kaggle/input/citylearn-aicrowd/Building_2.csv",
    "/kaggle/input/citylearn-aicrowd/Building_3.csv",
    "/kaggle/input/citylearn-aicrowd/carbon_intensity.csv",
    "/kaggle/input/citylearn-aicrowd/pricing.csv",
    "/kaggle/input/citylearn-aicrowd/weather.csv",
]

# Create a list of DataFrames by reading the CSV files
dataframes = [pd.read_csv(file_path) for file_path in file_paths]

In [None]:
for i, df in enumerate(dataframes):
    print(f"Shape of DataFrame {i + 1}: {df.shape}")

In [None]:
for i, df in enumerate(dataframes):
    missing_values = df.isnull().sum()  # You can also use df.isna().sum()
    print(f"Missing values in DataFrame {i + 1}:\n{missing_values}")

In [None]:
for i, df in enumerate(dataframes):
    data_types = df.dtypes
    print(f"Data types in DataFrame {i + 1}:\n{data_types}")

In [None]:
def calculate_score_forecast(episode_forecasts, episode_truth, w, b):
    """
    Calculate the forecast track score based on the given forecasts and truth data.

    Parameters:
    - episode_forecasts: A 3D NumPy array of shape (n, w, V) containing episode forecasts.
    - episode_truth: A 3D NumPy array of shape (n, w, V) containing episode ground truth.
    - w: Length of the forecasting window (48hrs).
    - b: Total number of buildings.

    Returns:
    - The forecast track score.
    """
    n, _, V = episode_forecasts.shape
    score_sum = 0.0

    for v in range(V):
        rmse_sum = 0.0
        for t in range(n):
            for τ in range(w):
                forecast = episode_forecasts[t, τ, v]
                truth = episode_truth[t, τ, v]
                rmse = (forecast - truth) ** 2
                rmse_sum += rmse

        normalized_rmse = np.sqrt(rmse_sum / (n * w))
        score_sum += normalized_rmse

    score_forecast = score_sum / V
    return score_forecast

In [None]:
n = 10  # Number of time steps
w = 48  # Forecasting window length
b = 2   # Total number of buildings
V = 3 * b + 2  # Total number of variables

# Generate random episode forecasts and truth data
episode_forecasts = np.random.rand(n, w, V)  # Replace with your actual forecasts
episode_truth = np.random.rand(n, w, V)      # Replace with your actual ground truth

score = calculate_score_forecast(episode_forecasts, episode_truth, w, b)
print("Forecast Track Score:", score)

In [None]:
# Example data
n = 20 # Number of time steps
w = 48  # Forecasting window length
b = 2   # Total number of buildings
V = 3 * b + 2  # Total number of variables

# Generate example forecasts and truth data (replace with your actual data)
# For simplicity, we'll use random values as an example.
episode_forecasts = np.random.rand(n, w, V)  # Replace with your actual forecasts
episode_truth = np.random.rand(n, w, V)      # Replace with your actual ground truth

# Calculate the forecast track score for the example data
score = calculate_score_forecast(episode_forecasts, episode_truth, w, b)
print("Forecast Track Score:", score)

In [None]:
# Example data
n = 20 # Number of time steps
w = 48  # Forecasting window length
b = 3   # Total number of buildings
V = 3 * b + 2  # Total number of variables

# Generate example forecasts and truth data (replace with your actual data)
# For simplicity, we'll use random values as an example.
episode_forecasts = np.random.rand(n, w, V)  # Replace with your actual forecasts
episode_truth = np.random.rand(n, w, V)      # Replace with your actual ground truth

# Calculate the forecast track score for the example data
score = calculate_score_forecast(episode_forecasts, episode_truth, w, b)
print("Forecast Track Score:", score)

# Column Names

In [None]:
# Initialize a list to store column names from all CSVs
all_column_names = []

# Iterate through the CSV files and collect column names
for file_path in file_paths:
    df = pd.read_csv(file_path)
    column_names = df.columns.tolist()
    all_column_names.extend(column_names)

# Remove duplicate column names by converting to a set and back to a list
unique_column_names = list(set(all_column_names))

# Print the unique column names
for idx, column_name in enumerate(unique_column_names, start=1):
    print(f"Column {idx}: {column_name}")

In [None]:
# Iterate through the CSV files and print column names for each DataFrame
for i, file_path in enumerate(file_paths, start=1):
    df = pd.read_csv(file_path)
    column_names = df.columns.tolist()
    print(f"DataFrame {i} - Column Names:")
    for idx, column_name in enumerate(column_names, start=1):
        print(f"  Column {idx}: {column_name}")
    print()  # Empty line to separate DataFrame outputs

# Head(2)

In [None]:
for i, file_path in enumerate(file_paths, start=1):
    df = pd.read_csv(file_path)
    print(f"DataFrame {i} - First 2 Rows:")
    print(df.head(2))
    print()  # Empty line to separate DataFrame outputs

In [None]:
b1=pd.read_csv( "/kaggle/input/citylearn-aicrowd/Building_1.csv")
b2=pd.read_csv( "/kaggle/input/citylearn-aicrowd/Building_2.csv")
b3=pd.read_csv( "/kaggle/input/citylearn-aicrowd/Building_3.csv")
cint=pd.read_csv( "/kaggle/input/citylearn-aicrowd/carbon_intensity.csv")
pr=pd.read_csv( "/kaggle/input/citylearn-aicrowd/pricing.csv")
weat=pd.read_csv( "/kaggle/input/citylearn-aicrowd/weather.csv")

In [None]:
b1.head(2)

In [None]:
b2.head(2)

In [None]:
b3.head(2)

# Total Number of Time Steps

In [None]:
file_paths = [
    "/kaggle/input/citylearn-aicrowd/Building_1.csv",
    "/kaggle/input/citylearn-aicrowd/Building_2.csv",
    "/kaggle/input/citylearn-aicrowd/Building_3.csv",
    "/kaggle/input/citylearn-aicrowd/carbon_intensity.csv",
    "/kaggle/input/citylearn-aicrowd/pricing.csv",
    "/kaggle/input/citylearn-aicrowd/weather.csv",
]

# Create a list of DataFrames by reading the CSV files
dataframes = [pd.read_csv(file_path) for file_path in file_paths]

# Find the maximum number of time steps among all DataFrames
max_time_steps = max(df.shape[0] for df in dataframes)

print("Total number of time steps:", max_time_steps)

# Data Viz.

## Heatmap

In [None]:
# Calculate the correlation matrix
corr_matrix = b1.corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Calculate the correlation matrix
corr_matrix = b2.corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Calculate the correlation matrix
corr_matrix = b3.corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Calculate the correlation matrix
corr_matrix = cint.corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Calculate the correlation matrix
corr_matrix = pr.corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Calculate the correlation matrix
corr_matrix = weat.corr()

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

## Histograms

In [None]:
# Create histograms for selected numeric columns
plt.figure(figsize=(12, 6))

# Histogram for Indoor Temperature (C)
plt.subplot(2, 2, 1)
plt.hist(b1["Indoor Temperature (C)"], bins=10, color='skyblue', edgecolor='black')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Frequency")
plt.title("Histogram of Indoor Temperature (C)")

# Histogram for Equipment Electric Power (kWh)
plt.subplot(2, 2, 2)
plt.hist(b1["Equipment Electric Power (kWh)"], bins=10, color='salmon', edgecolor='black')
plt.xlabel("Equipment Electric Power (kWh)")
plt.ylabel("Frequency")
plt.title("Histogram of Equipment Electric Power (kWh)")

# Histogram for Cooling Load (kWh)
plt.subplot(2, 2, 3)
plt.hist(b1["Cooling Load (kWh)"], bins=10, color='lightgreen', edgecolor='black')
plt.xlabel("Cooling Load (kWh)")
plt.ylabel("Frequency")
plt.title("Histogram of Cooling Load (kWh)")

# Customize layout
plt.tight_layout()

# Show histograms
plt.show()

In [None]:
# Create histograms for selected numeric columns
plt.figure(figsize=(12, 6))

# Histogram for Indoor Temperature (C)
plt.subplot(2, 2, 1)
plt.hist(b2["Indoor Temperature (C)"], bins=10, color='skyblue', edgecolor='black')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Frequency")
plt.title("Histogram of Indoor Temperature (C)")

# Histogram for Equipment Electric Power (kWh)
plt.subplot(2, 2, 2)
plt.hist(b2["Equipment Electric Power (kWh)"], bins=10, color='salmon', edgecolor='black')
plt.xlabel("Equipment Electric Power (kWh)")
plt.ylabel("Frequency")
plt.title("Histogram of Equipment Electric Power (kWh)")

# Histogram for Cooling Load (kWh)
plt.subplot(2, 2, 3)
plt.hist(b2["Cooling Load (kWh)"], bins=10, color='lightgreen', edgecolor='black')
plt.xlabel("Cooling Load (kWh)")
plt.ylabel("Frequency")
plt.title("Histogram of Cooling Load (kWh)")

# Customize layout
plt.tight_layout()

# Show histograms
plt.show()

In [None]:
# Create histograms for selected numeric columns
plt.figure(figsize=(12, 6))

# Histogram for Indoor Temperature (C)
plt.subplot(2, 2, 1)
plt.hist(b3["Indoor Temperature (C)"], bins=10, color='skyblue', edgecolor='black')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Frequency")
plt.title("Histogram of Indoor Temperature (C)")

# Histogram for Equipment Electric Power (kWh)
plt.subplot(2, 2, 2)
plt.hist(b3["Equipment Electric Power (kWh)"], bins=10, color='salmon', edgecolor='black')
plt.xlabel("Equipment Electric Power (kWh)")
plt.ylabel("Frequency")
plt.title("Histogram of Equipment Electric Power (kWh)")

# Histogram for Cooling Load (kWh)
plt.subplot(2, 2, 3)
plt.hist(b3["Cooling Load (kWh)"], bins=10, color='lightgreen', edgecolor='black')
plt.xlabel("Cooling Load (kWh)")
plt.ylabel("Frequency")
plt.title("Histogram of Cooling Load (kWh)")

# Customize layout
plt.tight_layout()

# Show histograms
plt.show()

## Scatterplots

In [None]:
# Create scatter plots for selected pairs of numeric columns
plt.figure(figsize=(12, 4))

# Scatter plot: Indoor Temperature vs. Equipment Electric Power
plt.subplot(1, 2, 1)
plt.scatter(b1["Indoor Temperature (C)"], b1["Equipment Electric Power (kWh)"], color='skyblue')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Equipment Electric Power (kWh)")
plt.title("Scatter Plot: Indoor Temperature vs. Equipment Electric Power")

# Scatter plot: Indoor Temperature vs. Cooling Load
plt.subplot(1, 2, 2)
plt.scatter(b1["Indoor Temperature (C)"], b1["Cooling Load (kWh)"], color='salmon')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Cooling Load (kWh)")
plt.title("Scatter Plot: Indoor Temperature vs. Cooling Load")

# Customize layout
plt.tight_layout()

# Show scatter plots
plt.show()

In [None]:
# Create scatter plots for selected pairs of numeric columns
plt.figure(figsize=(12, 4))

# Scatter plot: Indoor Temperature vs. Equipment Electric Power
plt.subplot(1, 2, 1)
plt.scatter(b2["Indoor Temperature (C)"], b2["Equipment Electric Power (kWh)"], color='skyblue')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Equipment Electric Power (kWh)")
plt.title("Scatter Plot: Indoor Temperature vs. Equipment Electric Power")

# Scatter plot: Indoor Temperature vs. Cooling Load
plt.subplot(1, 2, 2)
plt.scatter(b2["Indoor Temperature (C)"], b2["Cooling Load (kWh)"], color='salmon')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Cooling Load (kWh)")
plt.title("Scatter Plot: Indoor Temperature vs. Cooling Load")

# Customize layout
plt.tight_layout()

# Show scatter plots
plt.show()

In [None]:
# Create scatter plots for selected pairs of numeric columns
plt.figure(figsize=(12, 4))

# Scatter plot: Indoor Temperature vs. Equipment Electric Power
plt.subplot(1, 2, 1)
plt.scatter(b3["Indoor Temperature (C)"], b3["Equipment Electric Power (kWh)"], color='skyblue')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Equipment Electric Power (kWh)")
plt.title("Scatter Plot: Indoor Temperature vs. Equipment Electric Power")

# Scatter plot: Indoor Temperature vs. Cooling Load
plt.subplot(1, 2, 2)
plt.scatter(b3["Indoor Temperature (C)"], b3["Cooling Load (kWh)"], color='salmon')
plt.xlabel("Indoor Temperature (C)")
plt.ylabel("Cooling Load (kWh)")
plt.title("Scatter Plot: Indoor Temperature vs. Cooling Load")

# Customize layout
plt.tight_layout()

# Show scatter plots
plt.show()

## Bar Plots

In [None]:
# Create bar plots for selected categorical columns
plt.figure(figsize=(12, 6))

# Bar plot: Month
plt.subplot(1, 3, 1)
b1['Month'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.xlabel('Month')
plt.ylabel('Count')
plt.title('Bar Plot: Month')

# Bar plot: Day Type
plt.subplot(1, 3, 2)
b1['Day Type'].value_counts().sort_index().plot(kind='bar', color='salmon')
plt.xlabel('Day Type')
plt.ylabel('Count')
plt.title('Bar Plot: Day Type')

# Bar plot: HVAC Mode
plt.subplot(1, 3, 3)
b1['HVAC Mode (Off/Cooling/Heating)'].value_counts().sort_index().plot(kind='bar', color='lightgreen')
plt.xlabel('HVAC Mode')
plt.ylabel('Count')
plt.title('Bar Plot: HVAC Mode')

# Customize layout
plt.tight_layout()

# Show bar plots
plt.show()

In [None]:
# Create bar plots for selected categorical columns
plt.figure(figsize=(12, 6))

# Bar plot: Month
plt.subplot(1, 3, 1)
b2['Month'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.xlabel('Month')
plt.ylabel('Count')
plt.title('Bar Plot: Month')

# Bar plot: Day Type
plt.subplot(1, 3, 2)
b2['Day Type'].value_counts().sort_index().plot(kind='bar', color='salmon')
plt.xlabel('Day Type')
plt.ylabel('Count')
plt.title('Bar Plot: Day Type')

# Bar plot: HVAC Mode
plt.subplot(1, 3, 3)
b2['HVAC Mode (Off/Cooling/Heating)'].value_counts().sort_index().plot(kind='bar', color='lightgreen')
plt.xlabel('HVAC Mode')
plt.ylabel('Count')
plt.title('Bar Plot: HVAC Mode')

# Customize layout
plt.tight_layout()

# Show bar plots
plt.show()

In [None]:
# Create bar plots for selected categorical columns
plt.figure(figsize=(12, 6))

# Bar plot: Month
plt.subplot(1, 3, 1)
b3['Month'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.xlabel('Month')
plt.ylabel('Count')
plt.title('Bar Plot: Month')

# Bar plot: Day Type
plt.subplot(1, 3, 2)
b3['Day Type'].value_counts().sort_index().plot(kind='bar', color='salmon')
plt.xlabel('Day Type')
plt.ylabel('Count')
plt.title('Bar Plot: Day Type')

# Bar plot: HVAC Mode
plt.subplot(1, 3, 3)
b3['HVAC Mode (Off/Cooling/Heating)'].value_counts().sort_index().plot(kind='bar', color='lightgreen')
plt.xlabel('HVAC Mode')
plt.ylabel('Count')
plt.title('Bar Plot: HVAC Mode')

# Customize layout
plt.tight_layout()

# Show bar plots
plt.show()

# Robust Scaling

In [None]:
from sklearn.preprocessing import RobustScaler

In [None]:
# Initialize the RobustScaler
scaler = RobustScaler()

In [None]:
# Select only the numeric columns to be scaled
numeric_columns = b1.select_dtypes(include='number').columns

# Apply robust scaling to numeric columns
scaled_data = scaler.fit_transform(b1[numeric_columns])

# Create a DataFrame from the scaled data with column names
scaled_b1 = pd.DataFrame(scaled_data, columns=numeric_columns)

# Reassign non-numeric columns from the original DataFrame
non_numeric_columns = b1.select_dtypes(exclude='number').columns
for col in non_numeric_columns:
    scaled_df[col] = df[col]

In [None]:
# Select only the numeric columns to be scaled
numeric_columns = b2.select_dtypes(include='number').columns

# Apply robust scaling to numeric columns
scaled_data = scaler.fit_transform(b2[numeric_columns])

# Create a DataFrame from the scaled data with column names
scaled_b2 = pd.DataFrame(scaled_data, columns=numeric_columns)

# Reassign non-numeric columns from the original DataFrame
non_numeric_columns = b2.select_dtypes(exclude='number').columns
for col in non_numeric_columns:
    scaled_df[col] = df[col]

In [None]:
# Select only the numeric columns to be scaled
numeric_columns = b3.select_dtypes(include='number').columns

# Apply robust scaling to numeric columns
scaled_data = scaler.fit_transform(b3[numeric_columns])

# Create a DataFrame from the scaled data with column names
scaled_b3 = pd.DataFrame(scaled_data, columns=numeric_columns)

# Reassign non-numeric columns from the original DataFrame
non_numeric_columns = b3.select_dtypes(exclude='number').columns
for col in non_numeric_columns:
    scaled_df[col] = df[col]

In [None]:
# Select only the numeric columns to be scaled
numeric_columns = cint.select_dtypes(include='number').columns

# Apply robust scaling to numeric columns
scaled_data = scaler.fit_transform(cint[numeric_columns])

# Create a DataFrame from the scaled data with column names
scaled_cint = pd.DataFrame(scaled_data, columns=numeric_columns)

# Reassign non-numeric columns from the original DataFrame
non_numeric_columns = cint.select_dtypes(exclude='number').columns
for col in non_numeric_columns:
    scaled_df[col] = df[col]

In [None]:
# Select only the numeric columns to be scaled
numeric_columns = pr.select_dtypes(include='number').columns

# Apply robust scaling to numeric columns
scaled_data = scaler.fit_transform(pr[numeric_columns])

# Create a DataFrame from the scaled data with column names
scaled_pr = pd.DataFrame(scaled_data, columns=numeric_columns)

# Reassign non-numeric columns from the original DataFrame
non_numeric_columns = pr.select_dtypes(exclude='number').columns
for col in non_numeric_columns:
    scaled_df[col] = df[col]

In [None]:
# Select only the numeric columns to be scaled
numeric_columns = weat.select_dtypes(include='number').columns

# Apply robust scaling to numeric columns
scaled_data = scaler.fit_transform(weat[numeric_columns])

# Create a DataFrame from the scaled data with column names
scaled_weat = pd.DataFrame(scaled_data, columns=numeric_columns)

# Reassign non-numeric columns from the original DataFrame
non_numeric_columns = weat.select_dtypes(exclude='number').columns
for col in non_numeric_columns:
    scaled_df[col] = df[col]

In [None]:
scaled_b1.head(2)

In [None]:
scaled_b2.head(2)

In [None]:
scaled_b3.head(2)

In [None]:
scaled_cint.head(2)

In [None]:
scaled_pr.head(2)

In [None]:
scaled_weat.head(2)

In [None]:
scaled_b1.shape

In [None]:
scaled_b2.shape

In [None]:
scaled_b3.shape

In [None]:
scaled_cint.shape

In [None]:
scaled_pr.shape

In [None]:
scaled_weat.shape

In [None]:
scaled_b1.to_csv('scaled_b1.csv', index=False)

In [None]:
scaled_b2.to_csv('scaled_b2.csv', index=False)

In [None]:
scaled_b3.to_csv('scaled_b3.csv', index=False)

In [None]:
scaled_cint.to_csv('scaled_cint.csv', index=False)

In [None]:
scaled_pr.to_csv('scaled_pr.csv', index=False)

In [None]:
scaled_weat.to_csv('scaled_weat.csv', index=False)