In [2]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

### Reading data from CSV file

In [24]:
df = pd.read_csv('data3.csv')

In [25]:
df.describe()

Unnamed: 0,tmin,tmax,prcp,snow,snwd,awnd
count,1977.0,1977.0,1977.0,1977.0,1976.0,1973.0
mean,51.088012,72.298938,0.135771,0.012838,0.016953,6.074805
std,15.889527,15.796087,0.370521,0.227433,0.215338,2.954427
min,4.0,27.0,0.0,0.0,0.0,0.0
25%,37.0,60.0,0.0,0.0,0.0,3.8
50%,52.0,74.0,0.0,0.0,0.0,5.8
75%,66.0,86.0,0.06,0.0,0.0,7.8
max,78.0,102.0,4.96,7.0,5.9,20.36


### Converting date column to datetime format

In [4]:
df['date'] = pd.to_datetime(df['date'])

### Extracting year, month, day, and dayofweek as new features

In [5]:
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['dayofweek'] = df['date'].dt.dayofweek

### Preparing input data

In [6]:
X = df[['year', 'month', 'day', 'dayofweek']]
y_tmin = df['tmin']
y_tmax = df['tmax']

### Splitting data into train andtest sets

In [7]:
X_train, X_test, y_tmin_train, y_tmin_test, y_tmax_train, y_tmax_test = train_test_split(X, y_tmin, y_tmax, test_size=0.2, random_state=42)

### Training model for TMin using XGBoost

In [13]:
model_tmin = xgb.XGBRegressor()
model_tmin.fit(X_train, y_tmin_train)

### Training model for TMax using XGBoost

In [9]:
model_tmax = xgb.XGBRegressor()
model_tmax.fit(X_train, y_tmax_train)

### Predicting TMin and TMax for future days

In [30]:
num_days_future = 20
future = pd.DataFrame({'date': pd.date_range(start=df['date'].max(), periods=num_days_future+1)[1:]})
future['year'] = future['date'].dt.year
future['month'] = future['date'].dt.month
future['day'] = future['date'].dt.day
future['dayofweek'] = future['date'].dt.dayofweek
future_x = future[['year', 'month', 'day', 'dayofweek']]

tmin_forecast = model_tmin.predict(future_x)
tmax_forecast = model_tmax.predict(future_x)

### Evaluating the model with test data

In [31]:
tmin_test_forecast = model_tmin.predict(X_test)
tmax_test_forecast = model_tmax.predict(X_test)
tmin_error_percentage = mean_absolute_error(y_tmin_test, tmin_test_forecast) / y_tmin_test.mean() * 100
tmax_error_percentage = mean_absolute_error(y_tmax_test, tmax_test_forecast) / y_tmax_test.mean() * 100

### Preparing new DataFrame to display forecasts along with date and error percentage

In [32]:
forecast_df = pd.DataFrame({'Date': future['date'], 'TMin Forecast': tmin_forecast, 'TMax Forecast': tmax_forecast})
print(forecast_df)

         Date  TMin Forecast  TMax Forecast
0  2022-06-01      60.161125      90.284004
1  2022-06-02      64.368988      89.428185
2  2022-06-03      67.726151      92.548553
3  2022-06-04      66.364250      85.945663
4  2022-06-05      72.669861      86.750427
5  2022-06-06      81.166290      94.770088
6  2022-06-07      74.842346      91.646904
7  2022-06-08      71.074326      73.024239
8  2022-06-09      70.260567      71.323891
9  2022-06-10      66.376930      78.257378
10 2022-06-11      65.020782      83.541702
11 2022-06-12      65.656303      81.825737
12 2022-06-13      70.985214      92.344688
13 2022-06-14      61.839008      93.086639
14 2022-06-15      60.879349      90.241539
15 2022-06-16      72.327080      95.119614
16 2022-06-17      70.772850      94.050819
17 2022-06-18      77.675682      97.704964
18 2022-06-19      79.238945      98.639137
19 2022-06-20      68.340477      87.941071


In [33]:
forecast_df.describe()

Unnamed: 0,Date,TMin Forecast,TMax Forecast
count,20,20.0,20.0
mean,2022-06-10 12:00:00,69.387314,88.423752
min,2022-06-01 00:00:00,60.161125,71.323891
25%,2022-06-05 18:00:00,65.497423,85.344673
50%,2022-06-10 12:00:00,69.300522,90.262772
75%,2022-06-15 06:00:00,72.412775,93.327684
max,2022-06-20 00:00:00,81.16629,98.639137
std,,5.87799,7.570753


### Printing the error percentage

In [29]:
print(f"TMin Error Percentage: {tmin_error_percentage}%")
print(f"TMax Error Percentage: {tmax_error_percentage}%")

TMin Error Percentage: 11.29424034075582%
TMax Error Percentage: 8.810488504599059%
