In [1]:
import numpy as np
import pandas as pd
from epiweeks import Week
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from mosqlient import get_predictions, get_prediction_by_id

import seaborn as sns
import dataframe_image as dfi
from itertools import product
import matplotlib.dates as mdates

plt.rcParams.update({'font.size': 14})

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def get_preds(model_id, predict_date = None):
    '''
    Function to fetch the predictions by model id
    '''

    if predict_date is not None: 
        list_of_preds = get_predictions(model_id = model_id, predict_date = predict_date)
    
    else: 
        list_of_preds = get_predictions(model_id = model_id)
    
    return list_of_preds 

In [3]:
%%time 
preds_det = np.empty((0, 7))

for model_id in [21,22,25, 26, 27,28,30,34]:

    print(model_id)
    list_of_preds = get_preds(model_id)
    
    for idx in np.arange(0, len(list_of_preds)): 

        df = list_of_preds[idx].to_dataframe()

        preds_det_ = np.array([[model_id, df.adm_1[0], 
                              f'{list_of_preds[idx].id}',df.date.min()[:4], list_of_preds[idx].predict_date, 
                              df.date.min(), df.date.max()]])

        preds_det = np.concatenate((preds_det, preds_det_), axis=0)

21
22
25
26
27
28
30
34
CPU times: user 968 ms, sys: 118 ms, total: 1.09 s
Wall time: 31.3 s


In [4]:
df_desc = pd.DataFrame(preds_det, columns = ['model_id', 'state', 'pred_id', 'year',
                                  'predict_date', 'min_date', 'max_date'])

df_desc.head()

Unnamed: 0,model_id,state,pred_id,year,predict_date,min_date,max_date
0,21,RJ,828,2024,2024-09-12,2024-10-06,2025-09-28
1,21,MT,827,2024,2024-09-12,2024-10-06,2025-09-28
2,21,MS,826,2024,2024-09-12,2024-10-06,2025-09-28
3,21,MT,825,2023,2024-09-12,2023-10-08,2024-09-29
4,21,MS,824,2023,2024-09-12,2023-10-08,2024-09-29


In [5]:
# Define custom function for selecting preds between the duplicated preds
def custom_filter(group):
    # Replace with custom logic, e.g., keep row with max 'Value'
    return group.loc[group.pred_id == group.pred_id.max()]

# Apply custom logic to handle duplicates based on 'Category' and 'Subcategory'
result = df_desc.groupby(['model_id', 'state',  'year']).apply(custom_filter).reset_index(drop=True)
result.head()

  result = df_desc.groupby(['model_id', 'state',  'year']).apply(custom_filter).reset_index(drop=True)


Unnamed: 0,model_id,state,pred_id,year,predict_date,min_date,max_date
0,21,AC,776,2022,2024-09-12,2022-10-09,2023-10-01
1,21,AC,792,2023,2024-09-12,2023-10-08,2024-09-29
2,21,AC,808,2024,2024-09-12,2024-10-06,2025-09-28
3,21,AL,461,2022,2024-09-02,2022-10-09,2023-10-01
4,21,AL,483,2023,2024-09-02,2023-10-08,2024-09-29


In [6]:
result.model_id.value_counts()

model_id
21    81
22    81
30    81
34    81
27    79
28    74
25    13
26     2
Name: count, dtype: int64

In [7]:
filter_ = pd.DataFrame(result.groupby(['model_id', 'state']).count()['pred_id']).reset_index()

filter_ = filter_.loc[filter_.pred_id < 3]

# Create list of pairs to exclude
exclude_pairs = list(set(zip(filter_.model_id, filter_.state)))

# Filter out rows where (column1, column2) matches any pair in exclude_pairs
filtered_df = result[~result.apply(lambda row: (row['model_id'], row['state']) in exclude_pairs, axis=1)]

filtered_df.head()

Unnamed: 0,model_id,state,pred_id,year,predict_date,min_date,max_date
0,21,AC,776,2022,2024-09-12,2022-10-09,2023-10-01
1,21,AC,792,2023,2024-09-12,2023-10-08,2024-09-29
2,21,AC,808,2024,2024-09-12,2024-10-06,2025-09-28
3,21,AL,461,2022,2024-09-02,2022-10-09,2023-10-01
4,21,AL,483,2023,2024-09-02,2023-10-08,2024-09-29


In [8]:
exclude_pairs

[('26', 'AM'),
 ('28', 'SC'),
 ('28', 'RJ'),
 ('28', 'SP'),
 ('27', 'ES'),
 ('25', 'AM'),
 ('28', 'RS')]

In [11]:
result.loc[(result.state == 'AM') & ( (result.model_id == '25') | (result.model_id == '26'))]

Unnamed: 0,model_id,state,pred_id,year,predict_date,min_date,max_date
162,25,AM,270,2024,2024-08-25,2024-01-07,2025-12-21
175,26,AM,176,2022,2024-08-15,2022-10-09,2023-09-24
176,26,AM,177,2023,2024-08-15,2023-10-08,2024-06-02
