In [126]:
# https://platform.olimpiada-ai.ro/problems/20

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

In [127]:
train = pd.read_csv("/kaggle/input/daily-temperature/train.csv")
test = pd.read_csv("/kaggle/input/daily-temperature/test.csv")

len(train), len(test)

(500, 200)

In [128]:
train.head(3)

Unnamed: 0,SampleID,humidity,wind_speed,pressure,rainfall,cloud_cover,solar_radiation,day_of_year,temperature
0,1,52.472407,20.944851,1005.553988,5.190818,26.170568,679.263245,313,5.33
1,2,87.042858,16.082891,1016.257028,4.791819,24.69788,635.169456,328,3.43
2,3,73.919637,9.285828,1026.188375,0.256421,90.625458,501.470266,7,-1.81


In [129]:
def process_df(df):
    df['winter'] = ((df['day_of_year']<=59) & (df['day_of_year']>=365-31+1)).astype(int)
    df['spring'] = ((df['day_of_year']<=151) & (df['day_of_year']>=60)).astype(int)
    df['summer'] = ((df['day_of_year']<=243) & (df['day_of_year']>=152)).astype(int)
    df['autumn'] = ((df['day_of_year']<=365-31) & (df['day_of_year']>=244)).astype(int)
    return df

train = process_df(train)
test = process_df(test)

In [130]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

features = [c for c in train.columns if c not in ['SampleID', 'temperature']]
features_to_scale = ['humidity', 'wind_speed', 'pressure', 'rainfall', 'cloud_cover', 'solar_radiation', 'day_of_year']

X, y = train[features].values, train['temperature'].values
X_test = test[features].values

scaler = StandardScaler()

scaler.fit(X)
X = scaler.transform(X)
X_test = scaler.transform(X_test)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=42, test_size=0.2)

In [131]:
from sklearn.neural_network import MLPRegressor

model = MLPRegressor(max_iter=1100, random_state=42)

model.fit(X_train, y_train)



In [132]:
from sklearn.metrics import mean_squared_error as mse

y_pred = model.predict(X_valid).flatten()

score = mse(y_valid, y_pred, squared=False)

print(f"Score: {score:.5f}")

Score: 1.66174


In [133]:
y_pred = model.predict(X_test).flatten()

subm = pd.DataFrame({
    'SampleID': test['SampleID'],
    'temperature': y_pred
})

subm.head()

Unnamed: 0,SampleID,temperature
0,1001,-1.146663
1,1002,6.012697
2,1003,15.687572
3,1004,-1.820351
4,1005,4.56198


In [134]:
subm.to_csv("submission.csv", index=False)