In [98]:
!pip install -q huggingface_hub pandas Dataset datasets scikit-learn



## 2. Real-Prediction comparison

In [100]:
import os
from huggingface_hub import hf_hub_download
import joblib

# Get the token from environment variables
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

if not HUGGINGFACE_TOKEN:
    raise ValueError("HUGGINGFACE_TOKEN is not set in the environment variables.")



### 1. Retrieve Todays Prediction

In [101]:
import pandas as pd
import numpy as np
from datasets import load_dataset
from datasets import Dataset

import matplotlib.pyplot as plt
from datetime import timedelta

# Step 1: Load the seating dataset
repo_name_seating = "davnas/occupancy_perc"
hf_dataset_seating = load_dataset(repo_name_seating)

# Combine all splits into a single DataFrame with appropriate data types
df_seating = pd.concat(
    [split.to_pandas().astype({'KTH Library': int, 'South-East Gallery': int, 'North Gallery': int,
                               'South Gallery': int, 'Ångdomen': int, 'Newton': int})
     for split in hf_dataset_seating.values()],
    ignore_index=True
)
df_seating.set_index('index', inplace=True)
df_seating.index = pd.to_datetime(df_seating.index)  # Ensure the index is datetime

# Resample to 30-minute intervals and fill missing data (while keeping original rounding)
df_seating_resampled = df_seating.resample('30T').mean().interpolate(method='linear').round().astype(int)

# Filter data to only include today and tomorrow
today = pd.Timestamp.today().normalize()  # Today's date without time
tomorrow_midnight = (today + timedelta(days=1)).normalize()  # Tomorrow's midnight
end_of_tomorrow = tomorrow_midnight + timedelta(days=1)  # End of tomorrow, for the full 24 hours

# Get the last available entry for today
last_valid_entry = df_seating_resampled[df_seating_resampled.index.date == today.date()].index[-1]

# Create the datetime range from the last entry until tomorrow midnight
time_range = pd.date_range(start=last_valid_entry + timedelta(minutes=30), end=end_of_tomorrow, freq='30T')

# Create a DataFrame for this time range with NaN values
nan_df = pd.DataFrame(np.nan, index=time_range, columns=df_seating_resampled.columns)

# Concatenate the original data with the NaN data (after the last valid entry until tomorrow midnight)
df_seating_combined = pd.concat([df_seating_resampled, nan_df])

# Ensure the data has the correct format, with NaN entries for after today until tomorrow midnight
df_seating_combined = df_seating_combined.sort_index()

# Filter to keep only data from today and tomorrow
df_today_tomorrow = df_seating_combined[(df_seating_combined.index >= today) & (df_seating_combined.index < end_of_tomorrow)]

# Split into downloaded data (non-NaN) and null data (NaN)
df_today_real = df_today_tomorrow[df_today_tomorrow.notna().all(axis=1)].copy()
df_null = df_today_tomorrow[df_today_tomorrow.isna().any(axis=1)].copy()

# Recombine into the full DataFrame
df_full = pd.concat([df_today_real, df_null]).sort_index()

# Plot the data for today and tomorrow
#plt.figure(figsize=(10, 6))
#plt.plot(df_full.index, df_full['South-East Gallery'], label="South-East Gallery")
#plt.title("South-East Gallery Occupancy (Today's and Tomorrow's Data)")
#plt.xlabel("Time")
#plt.ylabel("Occupancy")
#plt.legend()
#plt.xticks(rotation=45)
#plt.grid(True)
#plt.tight_layout()
#plt.show()

# Optionally, print the split DataFrames to check
df_today_real.tail()




IndexError: index -1 is out of bounds for axis 0 with size 0

In [57]:
# Check if the last entry's time is before 21:30
last_entry_time = df_today_real.index[-1].time()

# Set the threshold time as 21:30
threshold_time = pd.to_datetime("21:30").time()

if last_entry_time < threshold_time:
    raise ValueError("Not enough data in df_todays_meas")


In [58]:
# Filter data for time between 07:30 and 21:30
start_time = pd.to_datetime("07:30").time()
end_time = pd.to_datetime("21:30").time()

df_filtered = df_today_real.between_time(start_time, end_time)
df_today_real = df_filtered.copy()
df_today_real.tail()

Unnamed: 0,KTH Library,South-East Gallery,North Gallery,South Gallery,Ångdomen,Newton
2025-01-07 19:30:00,14.0,60.0,22.0,10.0,14.0,6.0
2025-01-07 20:00:00,11.0,57.0,17.0,10.0,10.0,6.0
2025-01-07 20:30:00,7.0,53.0,13.0,10.0,7.0,6.0
2025-01-07 21:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2025-01-07 21:30:00,0.0,0.0,0.0,0.0,0.0,0.0


### 2. Retrieve past Prediction for today:


In [None]:
# Step 2: Load the full forecast
repo_name = "davnas/library-occupancy"
hf_full_forecast = load_dataset(repo_name)
df_full_forecast = pd.DataFrame(hf_full_forecast['train'])
df_full_forecast.tail()

Generating train split:   0%|          | 0/232 [00:00<?, ? examples/s]

Unnamed: 0,CommitTime,Time,Occupancy_main,Occupancy_southEast,Occupancy_north,Occupancy_south,Occupancy_angdomen,Occupancy_newton,Prediction_date
227,2025-01-07 22:09:17,08:00,23,10,9,13,0,14,2025-01-08
228,2025-01-07 22:09:17,07:30,0,0,0,0,0,0,2025-01-08
229,2025-01-07 22:09:17,21:00,0,0,0,0,0,0,2025-01-08
230,2025-01-07 22:09:17,14:00,63,43,49,56,10,28,2025-01-08
231,2025-01-07 22:09:17,21:30,0,0,0,0,0,0,2025-01-08


In [60]:
today = pd.Timestamp.now().date()
df_today_predicted = df_full_forecast[df_full_forecast['Prediction_date'] == str(today)]
df_today_predicted = df_today_predicted.sort_values('Time')
df_today_predicted.tail()

Unnamed: 0,CommitTime,Time,Occupancy_main,Occupancy_southEast,Occupancy_north,Occupancy_south,Occupancy_angdomen,Occupancy_newton,Prediction_date
181,2025-01-06 22:19:07,19:30,16,37,21,12,7,19,2025-01-07
180,2025-01-06 22:19:07,20:00,17,38,20,11,6,15,2025-01-07
188,2025-01-06 22:19:07,20:30,14,36,18,11,6,13,2025-01-07
199,2025-01-06 22:19:07,21:00,0,0,0,0,0,0,2025-01-07
195,2025-01-06 22:19:07,21:30,0,0,0,0,0,0,2025-01-07


### 3. Merging the predicted and reality

In [61]:
df_today_real

Unnamed: 0,KTH Library,South-East Gallery,North Gallery,South Gallery,Ångdomen,Newton
2025-01-07 07:30:00,0.0,0.0,0.0,0.0,0.0,0.0
2025-01-07 08:00:00,7.0,3.0,11.0,3.0,3.0,0.0
2025-01-07 08:30:00,15.0,5.0,29.0,11.0,8.0,2.0
2025-01-07 09:00:00,27.0,17.0,51.0,21.0,11.0,13.0
2025-01-07 09:30:00,40.0,28.0,72.0,26.0,16.0,31.0
2025-01-07 10:00:00,50.0,44.0,86.0,35.0,21.0,57.0
2025-01-07 10:30:00,61.0,64.0,98.0,43.0,30.0,58.0
2025-01-07 11:00:00,67.0,86.0,95.0,46.0,39.0,59.0
2025-01-07 11:30:00,70.0,99.0,97.0,47.0,42.0,66.0
2025-01-07 12:00:00,59.0,100.0,83.0,36.0,34.0,61.0


In [62]:
df_today_real.tail()

Unnamed: 0,KTH Library,South-East Gallery,North Gallery,South Gallery,Ångdomen,Newton
2025-01-07 19:30:00,14.0,60.0,22.0,10.0,14.0,6.0
2025-01-07 20:00:00,11.0,57.0,17.0,10.0,10.0,6.0
2025-01-07 20:30:00,7.0,53.0,13.0,10.0,7.0,6.0
2025-01-07 21:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2025-01-07 21:30:00,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
df_today_predicted.tail()

Unnamed: 0,CommitTime,Time,Occupancy_main,Occupancy_southEast,Occupancy_north,Occupancy_south,Occupancy_angdomen,Occupancy_newton,Prediction_date
181,2025-01-06 22:19:07,19:30,16,37,21,12,7,19,2025-01-07
180,2025-01-06 22:19:07,20:00,17,38,20,11,6,15,2025-01-07
188,2025-01-06 22:19:07,20:30,14,36,18,11,6,13,2025-01-07
199,2025-01-06 22:19:07,21:00,0,0,0,0,0,0,2025-01-07
195,2025-01-06 22:19:07,21:30,0,0,0,0,0,0,2025-01-07


In [64]:
# Step 1: Format df_today_real (ensure Date, Time, and values are correctly aligned)
df_today_real.rename(columns={
    'KTH Library': 'Occupancy_main_real',
    'South-East Gallery': 'Occupancy_southEast_real',
    'North Gallery': 'Occupancy_north_real',
    'South Gallery': 'Occupancy_south_real',
    'Ångdomen': 'Occupancy_angdomen_real',
    'Newton': 'Occupancy_newton_real'
}, inplace=True)

df_today_real['Date'] = df_today_real.index.date
df_today_real['Time'] = df_today_real.index.strftime('%H:%M')
df_today_real.tail()

Unnamed: 0,Occupancy_main_real,Occupancy_southEast_real,Occupancy_north_real,Occupancy_south_real,Occupancy_angdomen_real,Occupancy_newton_real,Date,Time
2025-01-07 19:30:00,14.0,60.0,22.0,10.0,14.0,6.0,2025-01-07,19:30
2025-01-07 20:00:00,11.0,57.0,17.0,10.0,10.0,6.0,2025-01-07,20:00
2025-01-07 20:30:00,7.0,53.0,13.0,10.0,7.0,6.0,2025-01-07,20:30
2025-01-07 21:00:00,0.0,0.0,0.0,0.0,0.0,0.0,2025-01-07,21:00
2025-01-07 21:30:00,0.0,0.0,0.0,0.0,0.0,0.0,2025-01-07,21:30


In [65]:
# Step 3: Rename the columns in df_today_predicted to match the desired output
df_today_predicted.rename(columns={
    'Occupancy_main': 'Occupancy_main_predicted',
    'Occupancy_southEast': 'Occupancy_southEast_predicted',
    'Occupancy_north': 'Occupancy_north_predicted',
    'Occupancy_south': 'Occupancy_south_predicted',
    'Occupancy_angdomen': 'Occupancy_angdomen_predicted',
    'Occupancy_newton': 'Occupancy_newton_predicted'
}, inplace=True)

# Step 4: Format df_today_predicted (ensure Date, Time, and values are correctly aligned)
df_today_predicted['Date'] = pd.to_datetime(df_today_predicted['Prediction_date']).dt.date
df_today_predicted['Time'] = df_today_predicted['Time']
df_today_predicted.tail()

Unnamed: 0,CommitTime,Time,Occupancy_main_predicted,Occupancy_southEast_predicted,Occupancy_north_predicted,Occupancy_south_predicted,Occupancy_angdomen_predicted,Occupancy_newton_predicted,Prediction_date,Date
181,2025-01-06 22:19:07,19:30,16,37,21,12,7,19,2025-01-07,2025-01-07
180,2025-01-06 22:19:07,20:00,17,38,20,11,6,15,2025-01-07,2025-01-07
188,2025-01-06 22:19:07,20:30,14,36,18,11,6,13,2025-01-07,2025-01-07
199,2025-01-06 22:19:07,21:00,0,0,0,0,0,0,2025-01-07,2025-01-07
195,2025-01-06 22:19:07,21:30,0,0,0,0,0,0,2025-01-07,2025-01-07


In [None]:
df_merged = pd.merge(df_today_real, df_today_predicted, on=['Date', 'Time'], how='outer')


# Step 6: Reorder and finalize the columns
final_columns = ['Date', 'Time', 'Occupancy_main_real', 'Occupancy_main_predicted',
                 'Occupancy_southEast_real', 'Occupancy_southEast_predicted',
                 'Occupancy_north_real', 'Occupancy_north_predicted',
                 'Occupancy_south_real', 'Occupancy_south_predicted',
                 'Occupancy_angdomen_real', 'Occupancy_angdomen_predicted',
                 'Occupancy_newton_real', 'Occupancy_newton_predicted']

df_final = df_merged[final_columns]
df_final.to_csv("df_final.csv", index=False)
df_final.tail()

Unnamed: 0,Date,Time,Occupancy_main_real,Occupancy_main_predicted,Occupancy_southEast_real,Occupancy_southEast_predicted,Occupancy_north_real,Occupancy_north_predicted,Occupancy_south_real,Occupancy_south_predicted,Occupancy_angdomen_real,Occupancy_angdomen_predicted,Occupancy_newton_real,Occupancy_newton_predicted
24,2025-01-07,19:30,14.0,16,60.0,37,22.0,21,10.0,12,14.0,7,6.0,19
25,2025-01-07,20:00,11.0,17,57.0,38,17.0,20,10.0,11,10.0,6,6.0,15
26,2025-01-07,20:30,7.0,14,53.0,36,13.0,18,10.0,11,7.0,6,6.0,13
27,2025-01-07,21:00,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0
28,2025-01-07,21:30,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0


In [67]:
print(df_final.isnull().sum())  # Check for NaN values
#print(df_final.describe())  # Check for zero values or anomalies
df = df_final.copy()

Date                             0
Time                             0
Occupancy_main_real              0
Occupancy_main_predicted         0
Occupancy_southEast_real         0
Occupancy_southEast_predicted    0
Occupancy_north_real             0
Occupancy_north_predicted        0
Occupancy_south_real             0
Occupancy_south_predicted        0
Occupancy_angdomen_real          0
Occupancy_angdomen_predicted     0
Occupancy_newton_real            0
Occupancy_newton_predicted       0
dtype: int64


### 4. Calculate Metrics

In [None]:
#f = pd.read_csv("df_final.csv")
#f

Unnamed: 0,Date,Time,Occupancy_main_real,Occupancy_main_predicted,Occupancy_southEast_real,Occupancy_southEast_predicted,Occupancy_north_real,Occupancy_north_predicted,Occupancy_south_real,Occupancy_south_predicted,Occupancy_angdomen_real,Occupancy_angdomen_predicted,Occupancy_newton_real,Occupancy_newton_predicted
0,2025-01-07,07:30,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0
1,2025-01-07,08:00,7.0,15,3.0,25,11.0,8,3.0,3,3.0,0,0.0,0
2,2025-01-07,08:30,15.0,21,5.0,31,29.0,16,11.0,9,8.0,0,2.0,1
3,2025-01-07,09:00,27.0,26,17.0,34,51.0,23,21.0,11,11.0,1,13.0,0
4,2025-01-07,09:30,40.0,32,28.0,34,72.0,28,26.0,14,16.0,3,31.0,2
5,2025-01-07,10:00,50.0,44,44.0,36,86.0,34,35.0,45,21.0,5,57.0,10
6,2025-01-07,10:30,61.0,47,64.0,38,98.0,44,43.0,45,30.0,6,58.0,26
7,2025-01-07,11:00,67.0,52,86.0,52,95.0,50,46.0,46,39.0,9,59.0,37
8,2025-01-07,11:30,70.0,51,99.0,51,97.0,46,47.0,40,42.0,9,66.0,37
9,2025-01-07,12:00,59.0,45,100.0,51,83.0,31,36.0,24,34.0,5,61.0,37


In [103]:
from io import StringIO
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error

In [104]:
last_date = df['Date'].iloc[-1]

# Columns to calculate metrics for
columns = [
    "Occupancy_main",
    "Occupancy_southEast",
    "Occupancy_north",
    "Occupancy_south",
    "Occupancy_angdomen",
    "Occupancy_newton",
]

# Initialize results dictionary
results = {"RME": [], "MAPE": []}

# Calculate RME and MAPE for each column
for col in columns:
    real = df[f"{col}_real"].to_numpy()
    predicted = df[f"{col}_predicted"].to_numpy()
    
    if 'newton' in col.lower():
        # For Newton, only consider non-zero real values
        mask = (real > 0)  # Only include where real values are greater than 0
        real_filtered = real[mask]
        predicted_filtered = predicted[mask]
        
        if len(real_filtered) > 0:
            # Calculate RME using filtered values
            rme = 100 * np.sqrt(np.mean((real_filtered - predicted_filtered) ** 2)) / np.mean(real_filtered)
            
            # Calculate custom MAPE only for non-zero real values
            abs_perc_errors = np.abs((real_filtered - predicted_filtered) / real_filtered) * 100
            mape = np.mean(abs_perc_errors)
        else:
            rme = 0.0
            mape = 0.0
    else:
        # For other columns, use original logic but avoid division by zero
        mask = (real != 0) | (predicted != 0)
        real_filtered = real[mask]
        predicted_filtered = predicted[mask]
        
        if len(real_filtered) > 0:
            # Calculate RME
            rme = 100 * np.sqrt(np.mean((real_filtered - predicted_filtered) ** 2)) / np.mean(real_filtered)
            
            # Calculate MAPE
            mape = mean_absolute_percentage_error(real_filtered, predicted_filtered) * 100
        else:
            rme = 0.0
            mape = 0.0

    # Format all numbers to 2 decimal places
    results["RME"].append(round(rme, 2))
    results["MAPE"].append(round(mape, 2))

# Prepare rows to append to the DataFrame
rme_row = [last_date, "RME"] + [value for rme in results["RME"] for value in (rme, rme)]
mape_row = [last_date, "MAPE"] + [value for mape in results["MAPE"] for value in (mape, mape)]

# Append rows to the DataFrame
df.loc[len(df)] = rme_row
df.loc[len(df)] = mape_row

df.tail()

Unnamed: 0,Date,Time,Occupancy_main_real,Occupancy_main_predicted,Occupancy_southEast_real,Occupancy_southEast_predicted,Occupancy_north_real,Occupancy_north_predicted,Occupancy_south_real,Occupancy_south_predicted,Occupancy_angdomen_real,Occupancy_angdomen_predicted,Occupancy_newton_real,Occupancy_newton_predicted
26,2025-01-07,20:30,7.0,14.0,53.0,36.0,13.0,18.0,10.0,11.0,7.0,6.0,6.0,13.0
27,2025-01-07,21:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28,2025-01-07,21:30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29,2025-01-07,RME,25.61,25.61,49.87,49.87,53.37,53.37,19.11,19.11,83.91,83.91,74.83,74.83
30,2025-01-07,MAPE,30.44,30.44,88.43,88.43,42.72,42.72,17.24,17.24,72.68,72.68,294.52,294.52


### 5. Upload to HugginFace

In [105]:
csv_file_path = "Real_vs_Predicted_Occupancy_Data.csv"
df.to_csv(csv_file_path, index=False)

In [106]:
import pandas as pd
from huggingface_hub import HfApi, HfFolder

# Hugging Face token and repository details
repo_name = "davnas/library-occupancy"  # Replace with your repository name
#csv_file_path = "data.csv"  # Replace with your CSV file path

# Authenticate using the token
HfFolder.save_token(HUGGINGFACE_TOKEN)
api = HfApi()

# Upload the CSV file
api.upload_file(
    path_or_fileobj=csv_file_path,
    path_in_repo="Real_vs_Predicted_Occupancy_Data.csv",  # The name of the file in the repository
    repo_id=repo_name,
    repo_type="dataset",  # Indicates this is a dataset repository
    token=HUGGINGFACE_TOKEN
)


print(f"CSV file successfully uploaded to Hugging Face repository: {repo_name}")



No files have been modified since last commit. Skipping to prevent empty commit.


CSV file successfully uploaded to Hugging Face repository: davnas/library-occupancy


In [107]:
from datetime import datetime

df['Day'] = datetime.now().date()
df

Unnamed: 0,Date,Time,Occupancy_main_real,Occupancy_main_predicted,Occupancy_southEast_real,Occupancy_southEast_predicted,Occupancy_north_real,Occupancy_north_predicted,Occupancy_south_real,Occupancy_south_predicted,Occupancy_angdomen_real,Occupancy_angdomen_predicted,Occupancy_newton_real,Occupancy_newton_predicted,Day
0,2025-01-07,07:30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2025-01-08
1,2025-01-07,08:00,7.0,15.0,3.0,25.0,11.0,8.0,3.0,3.0,3.0,0.0,0.0,0.0,2025-01-08
2,2025-01-07,08:30,15.0,21.0,5.0,31.0,29.0,16.0,11.0,9.0,8.0,0.0,2.0,1.0,2025-01-08
3,2025-01-07,09:00,27.0,26.0,17.0,34.0,51.0,23.0,21.0,11.0,11.0,1.0,13.0,0.0,2025-01-08
4,2025-01-07,09:30,40.0,32.0,28.0,34.0,72.0,28.0,26.0,14.0,16.0,3.0,31.0,2.0,2025-01-08
5,2025-01-07,10:00,50.0,44.0,44.0,36.0,86.0,34.0,35.0,45.0,21.0,5.0,57.0,10.0,2025-01-08
6,2025-01-07,10:30,61.0,47.0,64.0,38.0,98.0,44.0,43.0,45.0,30.0,6.0,58.0,26.0,2025-01-08
7,2025-01-07,11:00,67.0,52.0,86.0,52.0,95.0,50.0,46.0,46.0,39.0,9.0,59.0,37.0,2025-01-08
8,2025-01-07,11:30,70.0,51.0,99.0,51.0,97.0,46.0,47.0,40.0,42.0,9.0,66.0,37.0,2025-01-08
9,2025-01-07,12:00,59.0,45.0,100.0,51.0,83.0,31.0,36.0,24.0,34.0,5.0,61.0,37.0,2025-01-08


In [108]:
import pandas as pd
from huggingface_hub import HfApi, HfFolder

# Load existing log from Hugging Face
try:
    csv_url = "https://huggingface.co/datasets/davnas/library-occupancy/resolve/main/Real_vs_Predicted_Occupancy_Data_LOG.csv"
    df_LOG = pd.read_csv(csv_url)
except:
    df_LOG = pd.DataFrame(columns=df.columns)

df_LOG.columns

Index(['Date', 'Time', 'Occupancy_main_real', 'Occupancy_main_predicted',
       'Occupancy_southEast_real', 'Occupancy_southEast_predicted',
       'Occupancy_north_real', 'Occupancy_north_predicted',
       'Occupancy_south_real', 'Occupancy_south_predicted',
       'Occupancy_angdomen_real', 'Occupancy_angdomen_predicted',
       'Occupancy_newton_real', 'Occupancy_newton_predicted', 'Day'],
      dtype='object')

In [109]:
# Convert Day column to string for consistent comparison
df_LOG['Day'] = df_LOG['Day'].astype(str)
df['Day'] = df['Day'].astype(str)

# Find records with days that don't exist in the log
existing_days = set(df_LOG['Day'])
new_records = df[~df['Day'].isin(existing_days)]

if len(new_records) == 0:
    print("No new records to add.")
else:
    # Concatenate with existing log
    df_LOG = pd.concat([df_LOG, new_records], ignore_index=True)
    
    # Save updated log locally
    csv_file_path = "Real_vs_Predicted_Occupancy_Data_LOG.csv"
    df_LOG.to_csv(csv_file_path, index=False)
    
    # Upload to Hugging Face
    HfFolder.save_token(HUGGINGFACE_TOKEN)
    api = HfApi()
    
    api.upload_file(
        path_or_fileobj=csv_file_path,
        path_in_repo="Real_vs_Predicted_Occupancy_Data_LOG.csv",
        repo_id="davnas/library-occupancy",
        repo_type="dataset",
        token=HUGGINGFACE_TOKEN
    )
    
    print(f"Added {len(new_records)} new records to the log.")
    print(f"CSV file successfully uploaded to Hugging Face repository")

No new records to add.


In [91]:
'''
Final CSV format
Date,Time,Occupancy_main_real,Occupancy_main_predicted,Occupancy_southEast_real,Occupancy_southEast_predicted,Occupancy_north_real,Occupancy_north_predicted,Occupancy_south_real,Occupancy_south_predicted,Occupancy_angdomen_real,Occupancy_angdomen_predicted,Occupancy_newton_real,Occupancy_newton_predicted
2024-12-26,08:00,0,-2,5,4,3,6,4,1,2,3,0,-1
2024-12-26,08:30,0,-4,6,6,4,-1,3,1,2,-3,5,7
2024-12-26,09:00,28,33,35,32,30,30,25,27,20,22,10,11
2024-12-26,09:30,40,44,45,41,38,34,32,37,28,28,22,19
2024-12-26,10:00,54,50,50,54,48,49,42,47,38,40,35,30
2024-12-26,10:30,64,60,60,65,58,55,52,53,48,49,45,44
2024-12-26,11:00,71,66,68,71,65,64,60,62,55,50,50,54
2024-12-26,11:30,70,69,67,72,64,61,59,57,54,59,49,50
2024-12-26,12:00,51,55,50,47,48,51,45,44,43,47,40,38
2024-12-26,12:30,53,50,52,53,50,49,47,49,45,44,42,40
2024-12-26,13:00,64,63,62,67,60,58,55,59,50,54,48,47
2024-12-26,13:30,68,69,66,64,64,65,59,63,55,57,53,54
2024-12-26,14:00,70,68,68,69,65,67,60,59,55,57,53,55
2024-12-26,14:30,71,70,69,67,67,71,62,64,58,57,56,55
2024-12-26,15:00,71,66,70,74,68,69,63,61,59,55,57,54
2024-12-26,15:30,70,65,69,66,67,66,62,64,58,55,56,53
2024-12-26,16:00,64,61,63,62,60,57,58,56,55,56,53,54
2024-12-26,16:30,55,54,54,58,52,57,50,53,48,51,45,49
2024-12-26,17:00,88,83,85,87,82,79,80,77,78,80,75,78
2024-12-26,17:30,35,36,34,39,32,29,30,33,28,27,25,24
2024-12-26,18:00,29,33,28,30,27,31,25,27,23,21,20,21
2024-12-26,18:30,24,22,23,26,22,20,20,22,18,19,15,18
2024-12-26,19:00,20,24,19,18,18,22,16,20,15,12,12,15
2024-12-26,19:30,15,11,14,17,13,10,12,10,10,11,8,6
2024-12-26,20:00,11,12,10,8,9,8,8,6,7,10,5,6
2024-12-26,20:30,7,5,6,8,5,3,4,7,3,6,2,50
2024-12-26,21:00,0,1,0,0,0,0,0,0,0,0,0,0
2024-12-26,RME,15.24,14.76,15.67,16.24,14.45,13.67,14.56,14.89,13.42,14.11,13.78,13.21
2024-12-26,MAPE,8.54,8.24,9.15,9.67,8.11,8.45,8.78,8.56,7.89,8.12,7.67,7.98
'''


'\nFinal CSV format\nDate,Time,Occupancy_main_real,Occupancy_main_predicted,Occupancy_southEast_real,Occupancy_southEast_predicted,Occupancy_north_real,Occupancy_north_predicted,Occupancy_south_real,Occupancy_south_predicted,Occupancy_angdomen_real,Occupancy_angdomen_predicted,Occupancy_newton_real,Occupancy_newton_predicted\n2024-12-26,08:00,0,-2,5,4,3,6,4,1,2,3,0,-1\n2024-12-26,08:30,0,-4,6,6,4,-1,3,1,2,-3,5,7\n2024-12-26,09:00,28,33,35,32,30,30,25,27,20,22,10,11\n2024-12-26,09:30,40,44,45,41,38,34,32,37,28,28,22,19\n2024-12-26,10:00,54,50,50,54,48,49,42,47,38,40,35,30\n2024-12-26,10:30,64,60,60,65,58,55,52,53,48,49,45,44\n2024-12-26,11:00,71,66,68,71,65,64,60,62,55,50,50,54\n2024-12-26,11:30,70,69,67,72,64,61,59,57,54,59,49,50\n2024-12-26,12:00,51,55,50,47,48,51,45,44,43,47,40,38\n2024-12-26,12:30,53,50,52,53,50,49,47,49,45,44,42,40\n2024-12-26,13:00,64,63,62,67,60,58,55,59,50,54,48,47\n2024-12-26,13:30,68,69,66,64,64,65,59,63,55,57,53,54\n2024-12-26,14:00,70,68,68,69,65,67,60,59,55