# Delhi Climate Forecasting

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
sns.set_theme(rc={'figure.figsize': (11, 4)}, font_scale=1.5, style='darkgrid')
import plotly.io as pio
pio.templates.default = 'plotly_dark'

In [2]:
df=pd.read_csv('archive\DailyDelhiClimateTrain.csv',parse_dates=['date'],index_col='date')
df.head()

Unnamed: 0_level_0,meantemp,humidity,wind_speed,meanpressure
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-01,10.0,84.5,0.0,1015.666667
2013-01-02,7.4,92.0,2.98,1017.8
2013-01-03,7.166667,87.0,4.633333,1018.666667
2013-01-04,8.666667,71.333333,1.233333,1017.166667
2013-01-05,6.0,86.833333,3.7,1016.5


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1462 entries, 2013-01-01 to 2017-01-01
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   meantemp      1462 non-null   float64
 1   humidity      1462 non-null   float64
 2   wind_speed    1462 non-null   float64
 3   meanpressure  1462 non-null   float64
dtypes: float64(4)
memory usage: 57.1 KB


In [4]:
df.shape

(1462, 4)

In [5]:
print(f"Total number of years of data present: {df.index.nunique()//356}")

Total number of years of data present: 4


In [6]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
meantemp,1462.0,25.495521,7.348103,6.0,18.857143,27.714286,31.305804,38.714286
humidity,1462.0,60.771702,16.769652,13.428571,50.375,62.625,72.21875,100.0
wind_speed,1462.0,6.802209,4.561602,0.0,3.475,6.221667,9.238235,42.22
meanpressure,1462.0,1011.104548,180.231668,-3.041667,1001.580357,1008.563492,1014.944901,7679.333333


## EDA

In [7]:
fig=px.line(data_frame=df,x=df.index,y='meantemp',color=df.index.year,title="MeanTemp distribution over the year")
fig.show()

In [8]:
fig=px.scatter(data_frame=df,x=df.index,y='meantemp',color=df.index.month,title="MeanTemp distribution over the days")
fig.show()

In [9]:
df_resample=df['meantemp'].resample('M').mean().reset_index().set_index('date')

In [10]:
fig=px.bar(data_frame=df,x=df_resample.index,y=df_resample['meantemp'],color=df_resample.index.month,title="MeanTemp distribution over the month")
fig.show()

1. **Late May to Early June (Last 2 weeks of May to first 2 weeks of June):** High temperatures due to the peak of the summer season.

2. **July to Early September (July to first week of September):** Fluctuating temperatures due to the rainy season.

3. **Overall Trend (January to June, September to December):** Increasing temperatures from January to June and decreasing temperatures from September to December due to seasonal impact.

[Reference](https://delhitourism.gov.in/delhitourism/aboutus/seasons_of_delhi.jsp])

In [11]:
fig=px.scatter(data_frame=df,x=df.index,y='humidity',color=df.index.month,title="Humidity distribution over the days")
fig.show()

In [12]:
df_resample['humidity']=df['humidity'].resample('M').mean().reset_index().set_index('date')

In [13]:
fig=px.bar(data_frame=df,x=df_resample.index,y=df_resample['humidity'],color=df_resample.index.month,title="Humidity distribution over the month",)
fig.show()

In [14]:
fig = px.scatter(data_frame=df, x=df.index, y='meantemp', title="Temp. vs Humidity")
fig.add_trace(
    go.Scatter(
        x=df.index,
        y=df['humidity'],
        mode='lines',
        name='Humidity'
    )
)
fig.show()

In [15]:
fig = px.scatter(data_frame=df_resample, x=df_resample['humidity'], y=df_resample['meantemp'], title="Temp. vs Humidity",trendline='ols', trendline_color_override='white')
fig.show()

In [16]:
df[['humidity','meantemp']].corr()

Unnamed: 0,humidity,meantemp
humidity,1.0,-0.571951
meantemp,-0.571951,1.0


You can clearly see that when the temperature rises, the humidity decreases by about 50%-60%, and the reverse is also true.

temperature 1/∝ humidity

- [Reference1](https://www.vedantu.com/geography/relation-between-temperature-and-humidity)
- [Reference2](https://www.indianclimate.com/relative-humidity-data.php?baithak=1559964334)

In [17]:
fig=px.line(data_frame=df,x=df.index,y='wind_speed',color=df.index.year,title="wind_speed distribution over the days")
fig.show()

In [18]:
df_resample['wind_speed']=df['wind_speed'].resample('M').mean().reset_index().set_index('date')

In [19]:
fig=px.bar(data_frame=df_resample,x=df_resample.index,y=df_resample['wind_speed'],color=df_resample.index.month,title="wind_speed distribution over the month",)
fig.show()

- On average, the most wind is seen in May to June.
- On average, the least wind is seen in November to December.

[Reference](https://weather-and-climate.com/average-monthly-Wind-speed,New-Delhi,India)

In [20]:
fig = px.scatter(data_frame=df_resample, x=df_resample['wind_speed'], y=df_resample['meantemp'], title="Temp. vs wind_speed",trendline='ols', trendline_color_override='white')
fig.show()

- wind_speed ∝ meantemp
- ⟹ wind_speed 1/∝ humidity

In [21]:
fig=px.line(data_frame=df,x=df.index,y='meanpressure',color=df.index.year,title="meanpressure distribution over the days")
fig.show()

The mean pressure data is almost stationary, but on March 28, 2016, June 9, and for 2-3 days from August to December, there are some large data points. These outliers are not useful for trend analysis, so I replaced them with the mean.

### Forecasting using Prophet

In [22]:
df_test=pd.read_csv('archive\DailyDelhiClimateTest.csv',parse_dates=['date'],index_col='date')

In [23]:
data=df.copy()

In [24]:
data.drop('meanpressure',axis=1,inplace=True)

In [25]:
data.reset_index(inplace=True)

In [26]:
data.rename(columns={'meantemp': 'y','date':'ds'}, inplace=True)

In [27]:
data.head()

Unnamed: 0,ds,y,humidity,wind_speed
0,2013-01-01,10.0,84.5,0.0
1,2013-01-02,7.4,92.0,2.98
2,2013-01-03,7.166667,87.0,4.633333
3,2013-01-04,8.666667,71.333333,1.233333
4,2013-01-05,6.0,86.833333,3.7


In [28]:
data['y'] = np.log(data['y'])

In [29]:
data.head()

Unnamed: 0,ds,y,humidity,wind_speed
0,2013-01-01,2.302585,84.5,0.0
1,2013-01-02,2.00148,92.0,2.98
2,2013-01-03,1.969441,87.0,4.633333
3,2013-01-04,2.159484,71.333333,1.233333
4,2013-01-05,1.791759,86.833333,3.7


In [30]:
from prophet import Prophet


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



Uni-Variate Forecasting

In [31]:
model = Prophet()
model.fit(data)

23:01:14 - cmdstanpy - INFO - Chain [1] start processing
23:01:16 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1ef412b1810>

In [32]:
future_data = model.make_future_dataframe(periods=113, freq = 'D')

In [33]:
forecast_data = model.predict(future_data)

In [34]:
forecast_data_orig = forecast_data 
forecast_data_orig['yhat'] = np.exp(forecast_data_orig['yhat'])
forecast_data_orig['yhat_lower'] = np.exp(forecast_data_orig['yhat_lower'])
forecast_data_orig['yhat_upper'] = np.exp(forecast_data_orig['yhat_upper'])

In [35]:
fig = px.line(data_frame=forecast_data_orig, x=forecast_data_orig['ds'], y=forecast_data_orig['yhat'], title="Actual vs Predicted")
fig.add_trace(
    go.Scatter(
        x=df.index,
        y=df['meantemp'],
        mode='lines',
        name='Actual'
    )
)
fig.update_layout(xaxis_title='Date', yaxis_title='Temperature')
fig.show()

In [45]:
forecast_future_data_orig=forecast_data_orig.tail(114)

In [48]:
fig = px.line(data_frame=forecast_future_data_orig, x=forecast_future_data_orig['ds'], y=forecast_future_data_orig['yhat'], title="Future Actual vs Predicted")
fig.add_trace(
    go.Scatter(
        x=df_test.index,
        y=df_test['meantemp'],
        mode='lines',
        name='Actual'
    )
)
fig.update_layout(xaxis_title='Date', yaxis_title='Temperature')
fig.show()

Multi-Variate Forecasting

In [36]:
model_mulvar = Prophet()

In [37]:
model_mulvar.add_regressor('humidity')
model_mulvar.add_regressor('wind_speed')

<prophet.forecaster.Prophet at 0x1ef410549d0>

In [38]:
model_mulvar.fit(data)

23:01:19 - cmdstanpy - INFO - Chain [1] start processing
23:01:19 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1ef410549d0>

In [39]:
future_data_mulvar = model.make_future_dataframe(periods=113, freq = 'D')

In [40]:
forecast_data_mulvar = model.predict(future_data_mulvar)

In [41]:
forecast_data_mulvar_orig = forecast_data_mulvar
forecast_data_mulvar_orig ['yhat'] = np.exp(forecast_data_mulvar_orig ['yhat'])
forecast_data_mulvar_orig ['yhat_lower'] = np.exp(forecast_data_mulvar_orig ['yhat_lower'])
forecast_data_mulvar_orig ['yhat_upper'] = np.exp(forecast_data_mulvar_orig ['yhat_upper'])

In [42]:
fig = px.line(data_frame=forecast_data_mulvar_orig, x=forecast_data_mulvar_orig['ds'], y=forecast_data_mulvar_orig['yhat'], title="Actual vs Predicted")
fig.add_trace(
    go.Scatter(
        x=df.index,
        y=df['meantemp'],
        mode='lines',
        name='Actual'
    )
)
fig.update_layout(xaxis_title='Date', yaxis_title='Temperature')
fig.show()

In [50]:
forecast_future_data_mulvar_orig=forecast_data_mulvar_orig.tail(114)

In [51]:
fig = px.line(data_frame=forecast_future_data_mulvar_orig, x=forecast_future_data_mulvar_orig['ds'], y=forecast_future_data_mulvar_orig['yhat'], title="Future Actual vs Predicted")
fig.add_trace(
    go.Scatter(
        x=df_test.index,
        y=df_test['meantemp'],
        mode='lines',
        name='Actual'
    )
)
fig.update_layout(xaxis_title='Date', yaxis_title='Temperature')
fig.show()