In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import statistics

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('mode.chained_assignment', None)

df = pd.read_csv('Train.csv')
submission = pd.read_csv('SampleSubmission.csv')

In [2]:
df = df.dropna(subset=['clicks']) 
df.reset_index(drop=True, inplace=True)

In [3]:
df = df[['ID', 'date','clicks']]
df = df.sort_values(by=['ID', 'date']).reset_index(drop=True)

In [4]:
grouped_df = df.groupby(['date', 'ID']).sum()
grouped_df = grouped_df.reset_index()
grouped_df['date'] = pd.to_datetime(grouped_df['date'])

In [5]:
grouped_df.head(2)

Unnamed: 0,date,ID,clicks
0,2020-01-01,ID_5da86e71bf5dee4cf5047046,36.0
1,2020-01-02,ID_5da86e71bf5dee4cf5047046,50.0


In [6]:
forecast_columns = ['clicks']

def dynamic_forecast_moving_average(series, window, forecast_horizon):
    values = list(series.dropna())
    if not values:
        return [np.nan] * forecast_horizon
    for _ in range(forecast_horizon):
        moving_avg = np.mean(values[-window:]) if len(values) >= window else np.mean(values)
        values.append(moving_avg)
    return values[-forecast_horizon:]

def add_dynamic_forecasts(group, forecast_horizon, window):
    group = group.set_index('date')
    group.index = pd.to_datetime(group.index)
    group = group.asfreq('D', method='ffill') 

    last_date = group.index.max()
    forecast_dates = pd.date_range(start=last_date, periods=forecast_horizon + 1, freq='D')[1:]
    forecast_data = {}

    for col in forecast_columns:
        forecast_data[col] = dynamic_forecast_moving_average(group[col], window, forecast_horizon)

    forecast_df = pd.DataFrame(forecast_data, index=forecast_dates)
    forecast_df['ID'] = group['ID'].iloc[0]
    forecast_df['is_forecast'] = True

    group = group.reset_index()
    forecast_df = forecast_df.reset_index().rename(columns={'index': 'date'})

    return forecast_df

all_data = []
window_size = 13
forecast_horizon = 16
for name, group in grouped_df.groupby(['ID']):
    forecast_df = add_dynamic_forecasts(group, forecast_horizon, window_size)
    all_data.append(pd.concat([group.reset_index(drop=True), forecast_df]))

grouped_df = pd.concat(all_data).sort_values(by=['ID', 'date'])
grouped_df['is_forecast'] = grouped_df['is_forecast'].fillna(False)

  grouped_df['is_forecast'] = grouped_df['is_forecast'].fillna(False)


In [7]:
train = grouped_df[grouped_df['is_forecast'] == False]
test = grouped_df[grouped_df['is_forecast'] == True]

In [8]:
sub = submission.copy()

sub[['year', 'month', 'day']] = sub['ID'].str.extract(r'_(\d{4})_(\d{2})_(\d{2})')
sub['ID'] = sub['ID'].str.replace(r'(_\d{4}_\d{2}_\d{2})$', '', regex=True)

sub['date'] = pd.to_datetime(sub[['year', 'month', 'day']])
sub.head(5)

Unnamed: 0,ID,clicks,year,month,day,date
0,ID_5da86e71bf5dee4cf5047046,0,2024,1,22,2024-01-22
1,ID_5da86e71bf5dee4cf5047046,0,2024,1,29,2024-01-29
2,ID_5e43c29e6279884e2827d894,0,2024,2,21,2024-02-21
3,ID_5e43c29e6279884e2827d894,0,2024,2,28,2024-02-28
4,ID_5e4e7b480e374330ee151305,0,2023,12,4,2023-12-04


In [9]:
filtered_test = pd.merge(test, sub[['ID', 'date']], on=['ID', 'date'])

In [10]:
merge_df = pd.merge(sub, filtered_test, on=['ID', 'date'], how='left')

click_sums = merge_df.groupby(['ID', 'date'])['clicks_y'].sum().reset_index()
click_sums.rename(columns={'clicks_y': 'sum_clicks'}, inplace=True)

sub = pd.merge(sub, click_sums, on=['ID', 'date'], how='left')
sub['clicks'] = sub['sum_clicks']

sub.drop(columns='sum_clicks', inplace=True)

In [11]:
sub['clicks'] = sub['clicks'].round()
submission['clicks'] = sub['clicks']

In [12]:
submission.to_csv('submission.csv', index=False)

In [13]:
submission.head(10)

Unnamed: 0,ID,clicks
0,ID_5da86e71bf5dee4cf5047046_2024_01_22,115.0
1,ID_5da86e71bf5dee4cf5047046_2024_01_29,115.0
2,ID_5e43c29e6279884e2827d894_2024_02_21,5.0
3,ID_5e43c29e6279884e2827d894_2024_02_28,5.0
4,ID_5e4e7b480e374330ee151305_2023_12_04,3.0
5,ID_5e4e7b480e374330ee151305_2023_12_11,3.0
6,ID_5e8ad56cd4090270567b6c58_2024_02_21,20.0
7,ID_5e8ad56cd4090270567b6c58_2024_02_28,20.0
8,ID_5e8b4f12d8709943490cd775_2023_09_24,9.0
9,ID_5e8b4f12d8709943490cd775_2023_10_01,9.0
