In [7]:
from prophet import Prophet
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [8]:
# Read csv (include humidity and temperature)
df = pd.read_csv("data.csv") 
df['time'] = pd.to_datetime(df['time'])
df = df.rename(columns={'time':'ds', 'relative_humidity_2m (%)': 'y'})

In [9]:
model_humid = Prophet()
model_humid.fit(df)
future = model_humid.make_future_dataframe(periods=45, freq='H')

19:44:57 - cmdstanpy - INFO - Chain [1] start processing
19:44:58 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(


In [10]:
forecast = model_humid.predict(future)
future_forecast = forecast[forecast['ds'] > df['ds'].max()]
future_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
3168,2025-05-13 00:00:00,89.257326,78.010223,100.414611
3169,2025-05-13 01:00:00,81.24515,69.227502,92.592222
3170,2025-05-13 02:00:00,71.845843,60.912489,83.118949
3171,2025-05-13 03:00:00,63.635776,52.312228,74.314385
3172,2025-05-13 04:00:00,58.124331,46.722313,69.297116
3173,2025-05-13 05:00:00,55.106026,43.837697,66.215377
3174,2025-05-13 06:00:00,53.480733,42.491986,64.579366
3175,2025-05-13 07:00:00,52.67439,41.853206,64.427164
3176,2025-05-13 08:00:00,53.288388,41.523155,65.171546
3177,2025-05-13 09:00:00,56.403885,45.116966,67.085888


In [11]:
# Extract predicted values after the original data
predicted_df = future_forecast[['ds', 'yhat']].rename(columns={'yhat': 'relative_humidity_2m (%)'})
predicted_df['source'] = 'predicted'

# Format datetime to match original format
predicted_df['ds'] = predicted_df['ds'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Original data — rename and format
original_df = df.rename(columns={'y': 'relative_humidity_2m (%)'})
original_df['source'] = 'actual'
original_df['ds'] = original_df['ds'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Combine actual and predicted
combined_df = pd.concat([original_df[['ds', 'relative_humidity_2m (%)', 'source']],
                         predicted_df[['ds', 'relative_humidity_2m (%)', 'source']]],
                        ignore_index=True)

# Sort by datetime as string (since both are formatted the same)
combined_df = combined_df.sort_values('ds')

# Save to CSV
combined_df.to_csv("data_with_predictions_2.csv", index=False)

In [12]:
with open("humid_model.pkl","wb") as file:
    pickle.dump(model_humid, file)