In [20]:
import numpy as np
import polars as pl

In [21]:
def predict_function(test_variable: np.array) -> int:
	formula_intercept = 0.926
	formula_coefficients = [
		 0.001,   0.000,   0.001,  -0.001,  -0.020,
		 0.013,  -0.120,  -0.024,   0.145,  -0.210,
		 4.640,  -4.480,   0.009,  -0.019,   0.302,
        -0.292,  -0.150,  -0.023,   0.089,  -0.001,
        -0.075,   0.942
	]
	formula_coefficients = np.array(formula_coefficients).reshape(-1, 1)
	return int(float(np.dot(test_variable.reshape(1, -1), formula_coefficients)) + formula_intercept)

In [22]:
dataset = pl.read_csv('full_dataset_train.csv')
dataset = dataset.filter(
    (pl.col('SiteId') == 32) & \
    (pl.col('PublishYear') == 2022) & \
    (pl.col('PublishMonth') == 7) & \
    (((pl.col('PublishDay') == 13) & (pl.col('PublishHour') == 23)) | ((pl.col('PublishDay') >= 14) & (pl.col('PublishDay') <= 18)))
)
dataset = dataset.sort(['PublishDay', 'PublishHour'])
dataset.head()

SiteId,PublishYear,PublishMonth,PublishDay,PublishHour,AQI,Pollutant,Status,SO2,SO2_AVG,CO,CO_8hr,O3,O3_8hr,PM10,PM10_AVG,NO,NO2,NOx,WindDirec,WindSpeed,PM2.5,PM2.5_AVG
i64,i64,i64,i64,i64,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64
32,2022,7,13,23,23,0,0,0.7,1.0,0.2,0.1,19.5,25.3,8.0,13.0,1.0,5.6,6.6,358,0.5,4.0,5.0
32,2022,7,14,0,21,0,0,0.7,1.0,0.1,0.1,21.0,23.5,10.0,13.0,1.1,6.6,7.8,335,0.8,3.0,5.0
32,2022,7,14,1,19,0,0,0.7,1.0,0.1,0.1,20.4,21.7,12.0,12.0,1.1,7.3,8.4,162,0.5,2.0,4.0
32,2022,7,14,2,28,0,0,0.9,1.0,0.2,0.1,4.5,18.6,10.0,11.0,1.5,17.4,19.0,181,1.1,2.0,3.0
32,2022,7,14,3,23,0,0,1.3,1.0,0.2,0.1,6.4,16.2,17.0,12.0,1.5,14.9,16.5,175,1.7,7.0,3.0


In [23]:
dataset_features = dataset.drop('PM2.5')
dataset_target = dataset.select('PM2.5')

In [24]:
result = pl.DataFrame(schema = {
    'PublishTime': str,
    'SiteId': pl.Int64,
    'PM2.5': pl.Int64,
    'Predict_PM2.5': pl.Int64 
})
last = None
for features, target in zip(dataset_features.rows(), dataset_target.rows()):
    if last:
        time = f'{features[1]}/{features[2]}/{features[3]} {features[4]:02}:00'
        real_val = int(target[0])
        predict_val = predict_function(np.array(last))
        result.extend(pl.DataFrame({
            'PublishTime': time,
            'SiteId': 32,
            'PM2.5': real_val,
            'Predict_PM2.5': predict_val 
        }))

    last = features
result.head()

PublishTime,SiteId,PM2.5,Predict_PM2.5
str,i64,i64,i64
"""2022/7/14 00:0…",32,3,4
"""2022/7/14 01:0…",32,2,4
"""2022/7/14 02:0…",32,2,4
"""2022/7/14 03:0…",32,7,4
"""2022/7/14 04:0…",32,7,6


In [25]:
result.write_csv('./Submission.csv')