In [1]:
import os
from pathlib import Path

from itertools import chain, product

import math
import numpy as np
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar

import time
from datetime import datetime, timedelta
import pymmwr

from matplotlib import cm
import matplotlib.pyplot as plt
import seaborn as sns

import lightgbm as lgb

# https://github.com/reichlab/timeseriesutils
from timeseriesutils import featurize

In [None]:
import sys
sys.path.append('code/Forecasts')
import loader 
import preprocess_and_plot
import forecast_model


In [4]:
print(loader.__file__)
print(preprocess_and_plot.__file__)
print(forecast_model.__file__)


/Users/dk29776/Dropbox/UTAustin/City-Level-Forecasting-local/epiENGAGE-GBQR/code/loader.py
/Users/dk29776/Dropbox/UTAustin/City-Level-Forecasting-local/epiENGAGE-GBQR/code/preprocess_and_plot.py
/Users/dk29776/Dropbox/UTAustin/City-Level-Forecasting-local/epiENGAGE-GBQR/code/forecast_model.py


In [None]:
dat = pd.read_csv('data/cdc_nssp_20250523.csv')
dat

Unnamed: 0,week_end,state,county,inc,hsa,hsa_counties,hsa_nci_id,fips,trend_source,population_county,...,population_hsa,urban_hsa,population_county_2020_hsa,pct_urban,area_km2_state,area_km2_hsa,density_state,density_hsa,pop_ratio,log_density_hsa
0,2022-10-01,Alabama,Bibb,,"Jefferson (Birmingham), AL - Shelby, AL","Bibb, Blount, Chilton, Cullman, Jefferson, She...",150,1007,HSA,22152.0,...,1270895.0,71118.0,1268497.0,0.056065,133966.74428,15869.019661,37.727669,80.086548,0.251451,4.383108
1,2022-10-01,Alabama,Calhoun,,"Calhoun (Anniston), AL - Cleburne, AL","Calhoun, Cleburne",177,1015,HSA,116141.0,...,131395.0,5294.0,131497.0,0.040259,133966.74428,3040.309648,37.727669,43.217637,0.025997,3.766249
2,2022-10-01,Alabama,Chilton,,"Jefferson (Birmingham), AL - Shelby, AL","Bibb, Blount, Chilton, Cullman, Jefferson, She...",150,1021,HSA,45500.0,...,1270895.0,71118.0,1268497.0,0.056065,133966.74428,15869.019661,37.727669,80.086548,0.251451,4.383108
3,2022-10-01,Alabama,Cleburne,,"Calhoun (Anniston), AL - Cleburne, AL","Calhoun, Cleburne",177,1029,HSA,15254.0,...,131395.0,5294.0,131497.0,0.040259,133966.74428,3040.309648,37.727669,43.217637,0.025997,3.766249
4,2022-10-01,Alabama,Coosa,,"Talladega, AL - Clay, AL","Clay, Coosa, Talladega",241,1037,HSA,10323.0,...,105681.0,2429.0,106772.0,0.022749,133966.74428,5266.861622,37.727669,20.065270,0.020909,2.998990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403317,,Virginia,Suffolk city,,,,853,51800,,96638.0,...,1202837.0,91772.0,1203918.0,0.076228,104751.70306,5444.225907,82.647811,220.938113,0.138936,5.397883
403318,,Virginia,Virginia Beach city,,,,853,51810,,457066.0,...,1202837.0,91772.0,1203918.0,0.076228,104751.70306,5444.225907,82.647811,220.938113,0.138936,5.397883
403319,,Virginia,Waynesboro city,,,,97,51820,,22574.0,...,164927.0,6873.0,164276.0,0.041838,104751.70306,5256.772130,82.647811,31.374196,0.019050,3.445986
403320,,Virginia,Williamsburg city,,,,5,51830,,15564.0,...,548609.0,41319.0,546823.0,0.075562,104751.70306,2031.003524,82.647811,270.117207,0.063368,5.598856


## preprocess_data(dat, state, pop_size)
- dat: Input dataset.
- state:
  - If not NULL/None, the data will be filtered to include only that state (use the state abbreviation).
  - If NULL/None, data from all states will be returned.
- pop_size: Filter the data to include only rows where the population is greater than or equal to pop_size.

In [6]:
df_500K = preprocess_and_plot.preprocess_data(dat, None, 500000)
df_500K

Unnamed: 0,wk_end_date,hsa_nci_id,inc,location,population,log_pop,epiweek,year,season_week,season
22,2022-10-01,All,0.17,Arizona_All,7268175.0,15.799016,39,2022,9,2022/23
28,2022-10-01,All,0.30,Arkansas_All,3032651.0,14.924948,39,2022,9,2022/23
55,2022-10-01,754,0.08,Colorado_754,775090.0,13.560734,39,2022,9,2022/23
56,2022-10-01,688,0.11,Colorado_688,2948626.0,14.896850,39,2022,9,2022/23
71,2022-10-01,All,0.11,Connecticut_All,3598348.0,15.095985,39,2022,9,2022/23
...,...,...,...,...,...,...,...,...,...,...
402873,2025-03-29,All,0.33,South Dakota_All,899194.0,13.709254,13,2025,36,2024/25
402891,2025-03-29,All,1.15,Tennessee_All,6986082.0,15.759430,13,2025,36,2024/25
402980,2025-03-29,All,1.37,Utah_All,3331187.0,15.018839,13,2025,36,2024/25
402995,2025-03-29,All,2.53,Virginia_All,8657499.0,15.973936,13,2025,36,2024/25


In [7]:
transform_df = preprocess_and_plot.transform_incidence(df_500K)
transform_df2 = transform_df[transform_df['hsa_nci_id'] == 'All']
transform_df2 = transform_df2.drop(columns=['hsa_nci_id'], errors='ignore')

transform_df2

Unnamed: 0,wk_end_date,inc,location,population,log_pop,epiweek,year,season_week,season,inc_4rt,inc_4rt_scale_factor,inc_4rt_cs,inc_4rt_center_factor
22,2022-10-01,0.17,Arizona_All,7268175.0,15.799016,39,2022,9,2022/23,0.424264,2.528050,-0.327743,0.494905
28,2022-10-01,0.30,Arkansas_All,3032651.0,14.924948,39,2022,9,2022/23,0.556776,2.631680,-0.281826,0.492592
71,2022-10-01,0.11,Connecticut_All,3598348.0,15.095985,39,2022,9,2022/23,0.346410,2.658894,-0.281812,0.411607
73,2022-10-01,0.08,Delaware_All,1005872.0,13.821365,39,2022,9,2022/23,0.300000,2.792479,-0.313088,0.420136
74,2022-10-01,0.10,District of Columbia_All,672079.0,13.418131,39,2022,9,2022/23,0.331662,2.401913,-0.304862,0.442372
...,...,...,...,...,...,...,...,...,...,...,...,...,...
402873,2025-03-29,0.33,South Dakota_All,899194.0,13.709254,13,2025,36,2024/25,0.583095,2.424353,-0.187854,0.427382
402891,2025-03-29,1.15,Tennessee_All,6986082.0,15.759430,13,2025,36,2024/25,1.077033,2.794609,-0.080970,0.464993
402980,2025-03-29,1.37,Utah_All,3331187.0,15.018839,13,2025,36,2024/25,1.174734,2.520761,0.012117,0.452065
402995,2025-03-29,2.53,Virginia_All,8657499.0,15.973936,13,2025,36,2024/25,1.593738,2.790464,0.105581,0.463517


In [8]:
#preprocess_and_plot.plot_by_location(transform_df2)

In [31]:
q_levels = [0.025, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.975]
q_labels = ['0.025', '0.05', '0.1', '0.25', '0.5', '0.75',  '0.9', '0.95', '0.975']
abbrev = 'state'



start_date = datetime(2024, 11, 15).date()
end_date   = datetime(2025, 1, 14).date()
#start_date = datetime(2025, 1, 15).date()
#end_date   = datetime(2025, 3, 30).date()

# Initialize the current date as the start date
current_date = start_date

#df = df.drop(columns=['epiweek'])

# Loop through the forecast dates (incrementing by 7 days for a weekly forecast)
while current_date <= end_date:
    forecast_date = current_date
    print("Forecast date:", forecast_date)

    ref_date = forecast_date - timedelta((forecast_date.weekday() + 2) % 7 - 7)
    print(f'reference date = {ref_date}')
    df, feat_names = preprocess_and_plot.build_features(transform_df2, featurize, ref_date)
    #print(df)
    #print(feat_names)
    #print(df.columns.tolist())
    df_train, df_test, x_train, x_test, y_train = forecast_model.prepare_train_test(df, feat_names)
    print(df_train.wk_end_date.max())
    print(df_test.wk_end_date.max())



    preds_df, feat_importances = forecast_model.generate_quantile_forecasts(
        df=df,
        feat_names=feat_names,
        q_levels=q_levels,
        q_labels=q_labels,
        num_bags=100,
        bag_frac_samples=0.7,
        ref_date=ref_date
    )
    #test_pred_qs_df.columns = q_labels

    output_path = Path(f'GBQR/model_output/US_NSSP_public_{abbrev}_pct')

    if not output_path.exists():
        output_path.mkdir(parents=True)

    preds_df.to_csv(f'GBQR/model_output/US_NSSP_public_{abbrev}_pct/{str(ref_date)}-GBQR.csv', index=False)
    current_date += timedelta(days=7)


Forecast date: 2024-11-15
reference date = 2024-11-16


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-11-02 00:00:00
2024-11-09 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-11-09 00:00:00
2024-11-16 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-11-16 00:00:00
2024-11-23 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-11-23 00:00:00
2024-11-30 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-11-30 00:00:00
2024-12-07 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-12-07 00:00:00
2024-12-14 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-12-14 00:00:00
2024-12-21 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-12-21 00:00:00
2024-12-28 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  .apply(taylor_coefs_one_column_grp,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train[col] = pd.to_numeric(x_train[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFra

2024-12-28 00:00:00
2025-01-04 00:00:00
bag number 1
bag number 2
bag number 3
bag number 4
bag number 5
bag number 6
bag number 7
bag number 8
bag number 9
bag number 10
bag number 11
bag number 12
bag number 13
bag number 14
bag number 15
bag number 16
bag number 17
bag number 18
bag number 19
bag number 20
bag number 21
bag number 22
bag number 23
bag number 24
bag number 25
bag number 26
bag number 27
bag number 28
bag number 29
bag number 30
bag number 31
bag number 32
bag number 33
bag number 34
bag number 35
bag number 36
bag number 37
bag number 38
bag number 39
bag number 40
bag number 41
bag number 42
bag number 43
bag number 44
bag number 45
bag number 46
bag number 47
bag number 48
bag number 49
bag number 50
bag number 51
bag number 52
bag number 53
bag number 54
bag number 55
bag number 56
bag number 57
bag number 58
bag number 59
bag number 60
bag number 61
bag number 62
bag number 63
bag number 64
bag number 65
bag number 66
bag number 67
bag number 68
bag number 69
bag