In diesem Laptop habe ich Laptops von Chris Deotte, Tawara, Oumouhou_h verwendet

In [1]:
import pandas as pd 
import numpy as np
from sklearn.linear_model import LinearRegression, MultiTaskLasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, make_scorer
from statsmodels.tsa.arima_model import ARIMA
path = '../input/godaddy-microbusiness-density-forecasting/'

In [2]:
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')
train["first_day_of_month"] = pd.to_datetime(train["first_day_of_month"])
test["first_day_of_month"] = pd.to_datetime(test["first_day_of_month"])

In [3]:
THRESHOLD = 8 
ACTIVE_THRESHOLD = 600
TRAIN_SZ = len(train)//3135  # 39
TEST_SZ = len(test)//3135    # 8

In [4]:

ids = train.cfips.unique() # Unique identifiers for regions

x_train = np.arange(TRAIN_SZ).reshape((-1,1))
x_test = np.arange(TRAIN_SZ-1,TRAIN_SZ+TEST_SZ).reshape((-1,1))

linear_preds = np.zeros(len(ids))
last_preds = np.zeros(len(ids))
seasonal_preds = np.zeros(len(ids))

sn_trend = 0
lin_trend = 0
ct=0
for i, c in enumerate(ids):
    df = train.loc[train.cfips == c]
    
    
    last = df.microbusiness_density.values[-1]
    active = df.active.values[-1]
    
    last_preds[i] = last
    
    # Seasonal forecast
    WIDTH1 = 5; WIDTH2 = 7; WIDTH3 = 7

   # Recent trend
    x0a = df.microbusiness_density.values[-1-WIDTH1:-1]
    x0 = np.median(x0a)
    # 1 year old trend
    x1 = df.microbusiness_density.values[-12-1+1]
    x2a = df.microbusiness_density.values[-12-1-WIDTH2+1:-12-1+1]
    x2 = np.median(x2a)
    # two year old trend
    x3 = df.microbusiness_density.values[-24-1+1]
    x4a = df.microbusiness_density.values[-24-1-WIDTH3+1:-24-1+1]
    x4 = np.median(x4a)


    p = last 
    if active >= ACTIVE_THRESHOLD:         
        if (x1 > x2) and (x3 > x4) and (last > x0):
            p *= 1.003
        elif (x1 < x2) and (x3 < x4) and (last < x0):
            p *= 0.997
    seasonal_preds[i] = p
    
    # Linear regression
    model = ElasticNet(alpha=0.5)
    model.fit(x_train, df.microbusiness_density)
    p = model.predict(x_train)
    err = p - df.microbusiness_density.values
    rng = df.microbusiness_density.max() - df.microbusiness_density.min()
    
    # Determine if the time series is linear or not
    s = 0
    for k in range(TRAIN_SZ):
        e = np.abs(err[k])
        r = e / (rng/2)
        s += r
    if (s > THRESHOLD) or (active < ACTIVE_THRESHOLD):
        # If the time series is not linear or activity is below the threshold
        # values use last value as prediction
        linear_preds[i] = last
    else:
        # Otherwise, use linear regression
        p2 = model.predict(x_test)
        shift = last - p2[0]
        linear_preds[i] = p2[1] + shift
        lin_trend += 1
        
# Final predictions using a combination of linear,
# seasonal and last value forecasts
final_preds = (linear_preds + last_preds + seasonal_preds) / 3

In [5]:
target = pd.DataFrame(data={"microbusiness_density":final_preds}, index=ids)

In [6]:

test = test.join(target, on="cfips")[["row_id", "microbusiness_density"]]

In [7]:
test.to_csv("submission.csv", index=False)

In [8]:
test

Unnamed: 0,row_id,microbusiness_density
0,1001_2022-11-01,3.467592
1,1003_2022-11-01,8.359798
2,1005_2022-11-01,1.232074
3,1007_2022-11-01,1.287240
4,1009_2022-11-01,1.833850
...,...,...
25075,56037_2023-06-01,2.823801
25076,56039_2023-06-01,26.273220
25077,56041_2023-06-01,4.009369
25078,56043_2023-06-01,3.126551


In [9]:
test

Unnamed: 0,row_id,microbusiness_density
0,1001_2022-11-01,3.467592
1,1003_2022-11-01,8.359798
2,1005_2022-11-01,1.232074
3,1007_2022-11-01,1.287240
4,1009_2022-11-01,1.833850
...,...,...
25075,56037_2023-06-01,2.823801
25076,56039_2023-06-01,26.273220
25077,56041_2023-06-01,4.009369
25078,56043_2023-06-01,3.126551
