# Import libraries and functions

In [234]:
import numpy as np
import pandas as pd

from dataset import create_dataframe
from data_collector import API_KEY

df = create_dataframe('Athens', API_KEY)

df

Lat: 37.9839412
Lon: 23.7283052


Unnamed: 0,datetime,temp,feels_like,humidity,weather_main,description,wind_speed,wind_gust
0,2025-08-02 12:00:00,32.25,305.26,37,Clouds,scattered clouds,7.71,7.78
1,2025-08-02 15:00:00,32.32,304.88,34,Clouds,scattered clouds,6.40,7.93
2,2025-08-02 18:00:00,30.64,302.70,32,Clouds,few clouds,2.83,3.06
3,2025-08-02 21:00:00,28.22,300.87,38,Clear,clear sky,1.12,1.19
4,2025-08-03 00:00:00,27.12,299.86,35,Clear,clear sky,2.21,2.01
...,...,...,...,...,...,...,...,...
755,2025-08-07 18:00:00,29.41,302.09,39,Clear,clear sky,6.37,11.63
756,2025-08-07 21:00:00,27.77,300.96,45,Clear,clear sky,5.49,10.83
757,2025-08-08 00:00:00,26.66,299.81,57,Clear,clear sky,7.82,15.61
758,2025-08-08 03:00:00,25.34,298.59,58,Clear,clear sky,8.91,17.05


# Convert 'datetime' column to datetime type

In [235]:
df['datetime'] = pd.to_datetime(df['datetime'])

# Split 'datetime' column to year-month-day-hour

In [236]:
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour

In [237]:
df

Unnamed: 0,datetime,temp,feels_like,humidity,weather_main,description,wind_speed,wind_gust,year,month,day,hour
0,2025-08-02 12:00:00,32.25,305.26,37,Clouds,scattered clouds,7.71,7.78,2025,8,2,12
1,2025-08-02 15:00:00,32.32,304.88,34,Clouds,scattered clouds,6.40,7.93,2025,8,2,15
2,2025-08-02 18:00:00,30.64,302.70,32,Clouds,few clouds,2.83,3.06,2025,8,2,18
3,2025-08-02 21:00:00,28.22,300.87,38,Clear,clear sky,1.12,1.19,2025,8,2,21
4,2025-08-03 00:00:00,27.12,299.86,35,Clear,clear sky,2.21,2.01,2025,8,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...
755,2025-08-07 18:00:00,29.41,302.09,39,Clear,clear sky,6.37,11.63,2025,8,7,18
756,2025-08-07 21:00:00,27.77,300.96,45,Clear,clear sky,5.49,10.83,2025,8,7,21
757,2025-08-08 00:00:00,26.66,299.81,57,Clear,clear sky,7.82,15.61,2025,8,8,0
758,2025-08-08 03:00:00,25.34,298.59,58,Clear,clear sky,8.91,17.05,2025,8,8,3


# Drop 'datetime' column

In [238]:
df.drop('datetime', axis=1, inplace=True)

In [239]:
df

Unnamed: 0,temp,feels_like,humidity,weather_main,description,wind_speed,wind_gust,year,month,day,hour
0,32.25,305.26,37,Clouds,scattered clouds,7.71,7.78,2025,8,2,12
1,32.32,304.88,34,Clouds,scattered clouds,6.40,7.93,2025,8,2,15
2,30.64,302.70,32,Clouds,few clouds,2.83,3.06,2025,8,2,18
3,28.22,300.87,38,Clear,clear sky,1.12,1.19,2025,8,2,21
4,27.12,299.86,35,Clear,clear sky,2.21,2.01,2025,8,3,0
...,...,...,...,...,...,...,...,...,...,...,...
755,29.41,302.09,39,Clear,clear sky,6.37,11.63,2025,8,7,18
756,27.77,300.96,45,Clear,clear sky,5.49,10.83,2025,8,7,21
757,26.66,299.81,57,Clear,clear sky,7.82,15.61,2025,8,8,0
758,25.34,298.59,58,Clear,clear sky,8.91,17.05,2025,8,8,3


# Check for null-nan values

In [240]:
df.isnull().mean() * 100

temp            0.0
feels_like      0.0
humidity        0.0
weather_main    0.0
description     0.0
wind_speed      0.0
wind_gust       0.0
year            0.0
month           0.0
day             0.0
hour            0.0
dtype: float64

# Split set to features and target

In [241]:
X = df.iloc[:, 1:].to_numpy()

y = df.iloc[:, :1].to_numpy()

In [242]:
X

array([[305.26, 37, 'Clouds', ..., 8, 2, 12],
       [304.88, 34, 'Clouds', ..., 8, 2, 15],
       [302.7, 32, 'Clouds', ..., 8, 2, 18],
       ...,
       [299.81, 57, 'Clear', ..., 8, 8, 0],
       [298.59, 58, 'Clear', ..., 8, 8, 3],
       [300.42, 49, 'Clear', ..., 8, 8, 6]], shape=(760, 10), dtype=object)

In [243]:
y

array([[32.25],
       [32.32],
       [30.64],
       [28.22],
       [27.12],
       [25.37],
       [29.4 ],
       [32.65],
       [33.89],
       [33.09],
       [28.91],
       [27.37],
       [26.63],
       [25.97],
       [30.02],
       [33.39],
       [34.09],
       [32.44],
       [29.1 ],
       [27.87],
       [27.11],
       [25.6 ],
       [28.34],
       [32.43],
       [34.27],
       [32.97],
       [29.58],
       [27.48],
       [26.31],
       [25.12],
       [27.68],
       [31.76],
       [33.25],
       [32.35],
       [28.63],
       [27.3 ],
       [24.8 ],
       [24.44],
       [28.92],
       [31.78],
       [32.25],
       [32.32],
       [30.64],
       [28.22],
       [27.12],
       [25.37],
       [29.4 ],
       [32.65],
       [33.89],
       [33.09],
       [28.91],
       [27.37],
       [26.63],
       [25.97],
       [30.02],
       [33.39],
       [34.09],
       [32.44],
       [29.1 ],
       [27.87],
       [27.11],
       [25.6 ],
       [

# Convert object columns to numerical

In [244]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(
    transformers=[(
        'encoder', OneHotEncoder(), [2, 3]
    )], remainder='passthrough'
)

X = np.array(ct.fit_transform(X))

X

array([[0.0, 1.0, 0.0, ..., 8, 2, 12],
       [0.0, 1.0, 0.0, ..., 8, 2, 15],
       [0.0, 1.0, 0.0, ..., 8, 2, 18],
       ...,
       [1.0, 0.0, 0.0, ..., 8, 8, 0],
       [1.0, 0.0, 0.0, ..., 8, 8, 3],
       [1.0, 0.0, 0.0, ..., 8, 8, 6]], shape=(760, 17), dtype=object)

# Split the dataset to Train-Test sets

In [245]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling

In [246]:
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_y = StandardScaler()

X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

y_train = sc_y.fit_transform(y_train.reshape(-1, 1))
y_test = sc_y.transform(y_test.reshape(-1, 1))

In [247]:
y_train = y_train.ravel()

In [248]:
y_test = sc_y.inverse_transform(y_test)

# Model-Training

In [249]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)

forest_regressor = RandomForestRegressor()
forest_regressor.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


# Model-Prediction

In [250]:
linear_predictions = sc_y.inverse_transform(linear_regressor.predict(X_test).reshape(-1, 1))
forest_predictions = sc_y.inverse_transform(forest_regressor.predict(X_test).reshape(-1, 1))

In [274]:
forest_predictions

array([[27.3   ],
       [27.12  ],
       [32.44  ],
       [33.4498],
       [25.12  ],
       [29.    ],
       [32.34  ],
       [33.74  ],
       [27.56  ],
       [26.83  ],
       [28.92  ],
       [25.58  ],
       [25.3407],
       [31.95  ],
       [29.6909],
       [25.3407],
       [29.    ],
       [24.93  ],
       [31.78  ],
       [31.76  ],
       [29.59  ],
       [33.4498],
       [30.02  ],
       [32.87  ],
       [29.6909],
       [31.95  ],
       [26.31  ],
       [29.6909],
       [25.6   ],
       [29.2899],
       [29.6909],
       [32.56  ],
       [31.76  ],
       [27.11  ],
       [26.63  ],
       [25.96  ],
       [29.58  ],
       [27.68  ],
       [25.97  ],
       [28.91  ],
       [29.59  ],
       [33.5669],
       [26.66  ],
       [24.44  ],
       [24.93  ],
       [26.66  ],
       [34.09  ],
       [32.28  ],
       [32.8   ],
       [26.63  ],
       [28.34  ],
       [24.8   ],
       [31.4908],
       [33.8409],
       [31.54  ],
       [30

In [277]:
mean_temp = forest_predictions.mean()
int(mean_temp)

29

In [280]:
far_mean = int((mean_temp * 1.8) + 32)
far_mean

84

# R2-Score

In [252]:
from sklearn.metrics import r2_score

linear_score = r2_score(y_test, linear_predictions)
forest_score = r2_score(y_test, forest_predictions)

print(f'Linear Regression R2 Score: {linear_score}')
print(f'Random Forest Regression R2 Score: {forest_score}')

Linear Regression R2 Score: 0.9932795405042049
Random Forest Regression R2 Score: 0.9998545760258999


In [265]:
df

Unnamed: 0,temp,feels_like,humidity,weather_main,description,wind_speed,wind_gust,year,month,day,hour
0,32.25,305.26,37,Clouds,scattered clouds,7.71,7.78,2025,8,2,12
1,32.32,304.88,34,Clouds,scattered clouds,6.40,7.93,2025,8,2,15
2,30.64,302.70,32,Clouds,few clouds,2.83,3.06,2025,8,2,18
3,28.22,300.87,38,Clear,clear sky,1.12,1.19,2025,8,2,21
4,27.12,299.86,35,Clear,clear sky,2.21,2.01,2025,8,3,0
...,...,...,...,...,...,...,...,...,...,...,...
755,29.41,302.09,39,Clear,clear sky,6.37,11.63,2025,8,7,18
756,27.77,300.96,45,Clear,clear sky,5.49,10.83,2025,8,7,21
757,26.66,299.81,57,Clear,clear sky,7.82,15.61,2025,8,8,0
758,25.34,298.59,58,Clear,clear sky,8.91,17.05,2025,8,8,3


In [273]:
df = df[(df['day'] == 7) & (df['month'] == 8) & (df['year'] == 2025)]
mean_temp = df['temp'].mean()
print(int(mean_temp))

28
