In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('temperature.csv')

df = df.drop(columns=['Date'])

X = df.drop(['Next_Tmax', 'Next_Tmin'], axis=1)  # Features
y_max = df['Next_Tmax']  # Target for Tmax
y_min = df['Next_Tmin']  # Target for Tmin

X_train_max, X_test_max, y_train_max, y_test_max = train_test_split(X, y_max, test_size=0.2, random_state=42)
X_train_min, X_test_min, y_train_min, y_test_min = train_test_split(X, y_min, test_size=0.2, random_state=42)

print("Missing values in X_train_max:")
print(X_train_max.isnull().sum())
print("Missing values in X_test_max:")
print(X_test_max.isnull().sum())
print("Missing values in X_train_min:")
print(X_train_min.isnull().sum())
print("Missing values in X_test_min:")
print(X_test_min.isnull().sum())

imputer = SimpleImputer(strategy='median')

X_train_max = imputer.fit_transform(X_train_max)
X_test_max = imputer.transform(X_test_max)
X_train_min = imputer.fit_transform(X_train_min)
X_test_min = imputer.transform(X_test_min)

print("Infinite values in X_train_max:")
print(np.isinf(X_train_max).sum())
print("Infinite values in X_test_max:")
print(np.isinf(X_test_max).sum())
print("Infinite values in X_train_min:")
print(np.isinf(X_train_min).sum())
print("Infinite values in X_test_min:")
print(np.isinf(X_test_min).sum())

X_train_max = np.where(np.isinf(X_train_max), 1e+10, X_train_max)
X_test_max = np.where(np.isinf(X_test_max), 1e+10, X_test_max)
X_train_min = np.where(np.isinf(X_train_min), 1e+10, X_train_min)
X_test_min = np.where(np.isinf(X_test_min), 1e+10, X_test_min)

scaler = StandardScaler()
X_train_max = scaler.fit_transform(X_train_max)
X_test_max = scaler.transform(X_test_max)
X_train_min = scaler.fit_transform(X_train_min)
X_test_min = scaler.transform(X_test_min)

lr_max = LinearRegression()
lr_max.fit(X_train_max, y_train_max)

y_pred_max = lr_max.predict(X_test_max)
rmse_max = mean_squared_error(y_test_max, y_pred_max, squared=False)
mae_max = mean_absolute_error(y_test_max, y_pred_max)
r2_max = r2_score(y_test_max, y_pred_max)

lr_min = LinearRegression()
lr_min.fit(X_train_min, y_train_min)

y_pred_min = lr_min.predict(X_test_min)
rmse_min = mean_squared_error(y_test_min, y_pred_min, squared=False)
mae_min = mean_absolute_error(y_test_min, y_pred_min)
r2_min = r2_score(y_test_min, y_pred_min)

print(f'RMSE for Tmax: {rmse_max}')
print(f'MAE for Tmax: {mae_max}')
print(f'R² for Tmax: {r2_max}')
print(f'RMSE for Tmin: {rmse_min}')
print(f'MAE for Tmin: {mae_min}')
print(f'R² for Tmin: {r2_min}')


Missing values in X_train_max:
station              2
Present_Tmax        56
Present_Tmin        56
LDAPS_RHmin         50
LDAPS_RHmax         50
LDAPS_Tmax_lapse    50
LDAPS_Tmin_lapse    50
LDAPS_WS            50
LDAPS_LH            50
LDAPS_CC1           50
LDAPS_CC2           50
LDAPS_CC3           50
LDAPS_CC4           50
LDAPS_PPT1          50
LDAPS_PPT2          50
LDAPS_PPT3          50
LDAPS_PPT4          50
lat                  0
lon                  0
DEM                  0
Slope                0
Solar radiation      0
dtype: int64
Missing values in X_test_max:
station              0
Present_Tmax        14
Present_Tmin        14
LDAPS_RHmin         25
LDAPS_RHmax         25
LDAPS_Tmax_lapse    25
LDAPS_Tmin_lapse    25
LDAPS_WS            25
LDAPS_LH            25
LDAPS_CC1           25
LDAPS_CC2           25
LDAPS_CC3           25
LDAPS_CC4           25
LDAPS_PPT1          25
LDAPS_PPT2          25
LDAPS_PPT3          25
LDAPS_PPT4          25
lat                  0
lon   

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').