In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.impute import SimpleImputer

# Load the dataset
url = "https://raw.githubusercontent.com/dsrscientist/Dataset2/main/temperature.csv"
data = pd.read_csv(url)

# Explore the data
print(data.head())
print(data.info())
print(data.describe())

# Check for missing values
print(data.isnull().sum())

# Handle missing values
imputer = SimpleImputer(strategy='mean')
data_numeric = data.drop(['Date'], axis=1)  # Exclude Date column
data_numeric = pd.DataFrame(imputer.fit_transform(data_numeric), columns=data_numeric.columns)

# Separate features and target variables
X = data_numeric.drop(['Next_Tmax', 'Next_Tmin'], axis=1)
y_max = data_numeric['Next_Tmax']
y_min = data_numeric['Next_Tmin']

# Split data into training and testing sets
X_train, X_test, y_train_max, y_test_max, y_train_min, y_test_min = train_test_split(X, y_max, y_min, test_size=0.2, random_state=42)

# Build models for Next_Tmax and Next_Tmin separately
# Model for Next_Tmax prediction
model_max = RandomForestRegressor(n_estimators=100, random_state=42)
model_max.fit(X_train, y_train_max)

# Model for Next_Tmin prediction
model_min = RandomForestRegressor(n_estimators=100, random_state=42)
model_min.fit(X_train, y_train_min)

# Make predictions
y_pred_max = model_max.predict(X_test)
y_pred_min = model_min.predict(X_test)

# Evaluate the models
mae_max = mean_absolute_error(y_test_max, y_pred_max)
mse_max = mean_squared_error(y_test_max, y_pred_max)
rmse_max = mse_max ** 0.5

mae_min = mean_absolute_error(y_test_min, y_pred_min)
mse_min = mean_squared_error(y_test_min, y_pred_min)
rmse_min = mse_min ** 0.5

print("Next_Tmax Model Evaluation:")
print("MAE:", mae_max)
print("RMSE:", rmse_max)

print("\nNext_Tmin Model Evaluation:")
print("MAE:", mae_min)
print("RMSE:", rmse_min)


   station        Date  Present_Tmax  Present_Tmin  LDAPS_RHmin  LDAPS_RHmax  \
0      1.0  30-06-2013          28.7          21.4    58.255688    91.116364   
1      2.0  30-06-2013          31.9          21.6    52.263397    90.604721   
2      3.0  30-06-2013          31.6          23.3    48.690479    83.973587   
3      4.0  30-06-2013          32.0          23.4    58.239788    96.483688   
4      5.0  30-06-2013          31.4          21.9    56.174095    90.155128   

   LDAPS_Tmax_lapse  LDAPS_Tmin_lapse  LDAPS_WS    LDAPS_LH  ...  LDAPS_PPT2  \
0         28.074101         23.006936  6.818887   69.451805  ...         0.0   
1         29.850689         24.035009  5.691890   51.937448  ...         0.0   
2         30.091292         24.565633  6.138224   20.573050  ...         0.0   
3         29.704629         23.326177  5.650050   65.727144  ...         0.0   
4         29.113934         23.486480  5.735004  107.965535  ...         0.0   

   LDAPS_PPT3  LDAPS_PPT4      lat    