# COVID Modeling 

In [590]:
# data handling
import pandas as pd
import numpy as np

# seaborn and matplotlib for visualization
import seaborn as sns
import matplotlib.pyplot as plt

In [591]:
train = pd.read_csv('../data/training_data.csv', index_col="Unnamed: 0")
train['Date'] = pd.to_datetime(train['Date'])
val = pd.read_csv('../data/validation_data.csv', index_col="Unnamed: 0")
test = pd.read_csv('../data/testing_data.csv', index_col="Unnamed: 0")

In [592]:
train.columns

Index(['Date', 'State', 'Total Pop', 'Day_of_Wk', 'Confirmed',
       'Confirmed_diff', 'Confirmed_rate', 'Confirmed_rate_diff', 'Deaths',
       'Deaths_diff', 'Deaths_rate', 'Deaths_rate_diff', 'Recovered',
       'Recovered_rate', 'Recovered_diff', 'Recovered_rate_diff', 'Active',
       'Active_diff', 'Active_rate_diff', 'Active_rate', 'Case_Fatality_Ratio',
       'Administered', 'Series_Complete_Yes', 'Month', 'Year',
       'Monthly Temp (F)', 'Monthly Avg Temp (F)'],
      dtype='object')

In [593]:
train.head()

Unnamed: 0,Date,State,Total Pop,Day_of_Wk,Confirmed,Confirmed_diff,Confirmed_rate,Confirmed_rate_diff,Deaths,Deaths_diff,...,Active_diff,Active_rate_diff,Active_rate,Case_Fatality_Ratio,Administered,Series_Complete_Yes,Month,Year,Monthly Temp (F),Monthly Avg Temp (F)
0,2020-04-12,Alabama,4903185,Sunday,3667,,0.000748,,93,,...,,,0.000708,2.61016,0.0,0.0,4,2020,61.55,63.096875
1,2020-04-13,Alabama,4903185,Monday,3870,203.0,0.000789,4.1e-05,99,6.0,...,165.0,3.4e-05,0.000741,2.651312,0.0,0.0,4,2020,61.55,63.096875
2,2020-04-14,Alabama,4903185,Tuesday,4041,171.0,0.000824,3.5e-05,114,15.0,...,204.0,4.2e-05,0.000783,2.883886,0.0,0.0,4,2020,61.55,63.096875
3,2020-04-15,Alabama,4903185,Wednesday,4307,266.0,0.000878,5.4e-05,118,4.0,...,118.0,2.4e-05,0.000807,2.895706,0.0,0.0,4,2020,61.55,63.096875
4,2020-04-16,Alabama,4903185,Thursday,4465,158.0,0.000911,3.2e-05,133,15.0,...,255.0,5.2e-05,0.000859,3.06099,0.0,0.0,4,2020,61.55,63.096875


## Modeling Methods and Metrics

Since our modeling will take and predict data at the _state_ level, we want our metrics to also be computed at the state level. In this case, we want to compute the root mean squared error, so we aggregate our real and predicted values by state, compute the RSME within the state, and then average the RSME accross all states. 

In [594]:
def RSME_df(df, col_names): 
    '''
    df has two columns, one with predictions and one with actual values,
    passed as `col_names` (order irrelevant)
    Returns the RSME of the predictions w.r.t the actual values 
    '''
    return np.sqrt(np.mean((df[col_names[0]] - df[col_names[1]])**2))

def compute_RSME_by_state(model, X, Y):
    '''
    Y must have 'State' in the index 
    '''
    Y_pred = pd.DataFrame(data=model.predict(X), index=Y.index, columns=Y.columns)
    combined_data = Y_pred.merge(Y, left_index=True, right_index=True, suffixes=('_pred','_actual'))    
    
    state_pred = combined_data.groupby('State').agg(RSME_df, col_names = combined_data.columns)
    state_pred.columns = ['RSME',"_"]
    state_pred = state_pred[['RSME']]
    return state_pred

def avg_state_RSME(model, X, Y):
    state_RSMEs = compute_RSME_by_state(model, X, Y)
    return state_RSMEs.mean()

In [595]:
metrics = ['Avg RSME']
datasets = ["Training", "Validation"]


def compute_stats(model, X, Y):
    avg_RSME = avg_state_RSME(model, X, Y)
    # more computed values go here
    return (avg_RSME)

def compute_model_stats(model, X_train, Y_train, X_val, Y_val):
    model_stats = {}
    train_metrics = compute_stats(model, X_train, Y_train)
    model_stats['training'] = dict(zip(metrics, train_metrics))
    val_metrics = compute_stats(model, X_val, Y_val)
    model_stats['validation'] =  dict(zip(metrics, val_metrics))
    return model_stats
    
def print_model_stats(model_stats):
    print("Model Statistics:")
    print('                | ',' | '.join(list(model_stats.keys()), ))
    print('-----------------------------------------')
    for var in model_stats['training'].keys():
        print("{var:<15} |   {train:.3f}   |   {val:.3f}".format(var = var,
                                      train = model_stats['training'][var], 
                                      val   = model_stats['validation'][var]))


In [596]:
iterables = [datasets, metrics]
col_idx = pd.MultiIndex.from_product(iterables, names=["", ""])
    
def make_fresh_record():
    record = pd.DataFrame(columns=col_idx)
    record.index.name = "Model"
    return record 

def record_model_stats(record, model_stats, model_name, override=False): 
    model_stats_df = pd.json_normalize(model_stats, sep='_')
    model_stats_df.columns = col_idx
    model_data = model_stats_df.iloc[0]
    model_data.name = model_name
    new_record = record.copy()
    # override or new entry
    if override or model_name not in record.index:
        new_record.loc[model_name,:] = model_data
    #exists and don't overide 
    else:
        print("Warning: A model with the name '{}' already exists in this record.".format(model_name))
        print("         Either change model_name or set 'override=True'.")
        return record
    return new_record

def make_and_record_model(record, processing_fun, train, val, name, override=False):
    model, (X_train, Y_train), (X_val, Y_val) = processing_fun(train, val)
    model_stats = compute_model_stats(model, X_train, Y_train, X_val, Y_val)
    record = record_model_stats(record, model_stats, name, override)
    return (record, model, model_stats, {"train_data": (X_train, Y_train), "val_data":(X_val, Y_val)})

### COVID Cases Modeling



In [597]:
def relabel_timeseries_data(X, Y, W, col_name="input"):
    timeseries_names = [col_name+'_day_'+str(i) for i in range(1-W,1)]

    target_day = Y.name
    Y.name = 'target_'+col_name
    Y = Y.reset_index()
    X = X.set_axis(timeseries_names, axis=1, inplace=False)
    X['Target_day'] = target_day
    Y['Target_day'] = target_day
    X = X.reset_index()
    X = X.set_index(['Target_day','State'])
    Y = Y.set_index(['Target_day','State'])
    return (X, Y)

def create_timeseries(df, col):
    return df.pivot_table(index = 'State', columns='Date',
                   values=col).sort_values(by = 'Date', axis='columns')


def convert_timeseries_to_data(df, W, col_name='input'):
    '''
    df is a dataframe, with columns sorted in increasing order by date
    splits rows into timeseries data with W columns of 'input' associated 
    with the W+1 column of 'output' and combined for all rows 
    '''
    d = df.shape[1]
    X = df.iloc[:, 0:W]
    Y = df.iloc[:,W]
    X, Y = relabel_timeseries_data(X, Y, W, col_name)

    for i in range(1,d-W):#1,3,..., d-W-1
        X_data = df.iloc[:, i:i+W] # i+W-1 = W+1,W+2,... d-1
        Y_data = df.iloc[:,i+W] # i+W = W+2,W+3,..., W+d-W = d
        X_data, Y_data = relabel_timeseries_data(X_data, Y_data, W, col_name)
        X = X.append(X_data)
        Y = Y.append(Y_data)

    return (X, Y)

### Model 1 

Feed in confirmed cases for the previous 14 days (since 2 weeks is a standard COVID incubation period) and predict confirmed cases for the next day. 

In [598]:
from sklearn.linear_model import LinearRegression

def model_1_pipeline(data, test_data=False): 
    window_size = 14
    conf_timeseries_data = create_timeseries(data,'Confirmed_diff')
    X_data, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_diff")
    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_1_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_1_pipeline(train)
    X_val, Y_val = model_1_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [599]:
model_record = make_fresh_record()

In [600]:
model_record, m1, m1_stats, m1_data = make_and_record_model(model_record, 
                                                  model_1_processing, train, val, 
                                                  "Confirmed_diff only, 14D")
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763


### Model 2 

In [601]:
def model_2_pipeline(data, test_data=False): 
    window_size = 14
    active_timeseries_data = create_timeseries(data,'Active_diff')
    X_active_data, _  = convert_timeseries_to_data(active_timeseries_data, 
                                                window_size, 
                                                col_name="Active_diff")
    
    conf_timeseries_data = create_timeseries(data,"Confirmed_diff")
    _, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    #X_data = X_active_data.merge(X_conf_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_active_data, Y_data)

def model_2_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_2_pipeline(train)
    X_val, Y_val = model_2_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [602]:
model_record, m2, m2_stats, m2_data = make_and_record_model(model_record, 
                                                  model_2_processing, train, val, 
                                                  "Active_diff only, 14D", override=True)
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763
"Active_diff only, 14D",988.687519,2680.69719


### Model 3

In [603]:
def model_3_pipeline(data, test_data=False): 
    window_size = 14
    active_timeseries_data = create_timeseries(data,'Active_diff')
    X_active_data, _  = convert_timeseries_to_data(active_timeseries_data, 
                                                window_size, 
                                                col_name="Active_diff")
    
    conf_timeseries_data = create_timeseries(data,"Confirmed_diff")
    X_conf_data, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    X_data = X_active_data.merge(X_conf_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_3_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_3_pipeline(train)
    X_val, Y_val     = model_3_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [604]:
model_record, m3, m3_stats, m3_data = make_and_record_model(model_record, 
                                                  model_3_processing, train, val, 
                                                  "Active_diff & Conf_diff, 14D", override=True)
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763
"Active_diff only, 14D",988.687519,2680.69719
"Active_diff & Conf_diff, 14D",594.104388,1479.993138


### Model 4

In [605]:
def model_4_pipeline(data, test_data=False): 
    window_size = 14
    recovered_timeseries_data = create_timeseries(data.fillna(0),'Recovered')
    X_rec_data, _  = convert_timeseries_to_data(recovered_timeseries_data, 
                                                window_size, 
                                                col_name="Recovered")
    
    conf_timeseries_data = create_timeseries(data,"Confirmed_diff")
    X_conf_data, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    X_data = X_rec_data.merge(X_conf_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_4_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_4_pipeline(train)
    X_val, Y_val     = model_4_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [606]:
model_record, m4, m4_stats, m4_data = make_and_record_model(model_record, 
                                                  model_4_processing, train, val, 
                                                  "Recovered & Conf_diff, 14D", override=True)
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763
"Active_diff only, 14D",988.687519,2680.69719
"Active_diff & Conf_diff, 14D",594.104388,1479.993138
"Recovered & Conf_diff, 14D",592.739733,1475.218196


### Model 5 

In [607]:
train.head()

Unnamed: 0,Date,State,Total Pop,Day_of_Wk,Confirmed,Confirmed_diff,Confirmed_rate,Confirmed_rate_diff,Deaths,Deaths_diff,...,Active_diff,Active_rate_diff,Active_rate,Case_Fatality_Ratio,Administered,Series_Complete_Yes,Month,Year,Monthly Temp (F),Monthly Avg Temp (F)
0,2020-04-12,Alabama,4903185,Sunday,3667,,0.000748,,93,,...,,,0.000708,2.61016,0.0,0.0,4,2020,61.55,63.096875
1,2020-04-13,Alabama,4903185,Monday,3870,203.0,0.000789,4.1e-05,99,6.0,...,165.0,3.4e-05,0.000741,2.651312,0.0,0.0,4,2020,61.55,63.096875
2,2020-04-14,Alabama,4903185,Tuesday,4041,171.0,0.000824,3.5e-05,114,15.0,...,204.0,4.2e-05,0.000783,2.883886,0.0,0.0,4,2020,61.55,63.096875
3,2020-04-15,Alabama,4903185,Wednesday,4307,266.0,0.000878,5.4e-05,118,4.0,...,118.0,2.4e-05,0.000807,2.895706,0.0,0.0,4,2020,61.55,63.096875
4,2020-04-16,Alabama,4903185,Thursday,4465,158.0,0.000911,3.2e-05,133,15.0,...,255.0,5.2e-05,0.000859,3.06099,0.0,0.0,4,2020,61.55,63.096875


In [608]:
def model_5_pipeline(data, test_data=False): 
    window_size = 14
    series_timeseries_data = create_timeseries(data.fillna(0),'Series_Complete_Yes')
    X_series_data, _  = convert_timeseries_to_data(series_timeseries_data, 
                                                window_size, 
                                                col_name="Series_Complete_Yes")
    
    conf_timeseries_data = create_timeseries(data,"Confirmed_diff")
    X_conf_data, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    X_data = X_series_data.merge(X_conf_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_5_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_5_pipeline(train)
    X_val, Y_val     = model_5_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [609]:
model_record, m5, m5_stats, m5_data = make_and_record_model(model_record, 
                                                  model_5_processing, train, val, 
                                                  "Series_complete & Conf_diff, 14D", override=True)
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763
"Active_diff only, 14D",988.687519,2680.69719
"Active_diff & Conf_diff, 14D",594.104388,1479.993138
"Recovered & Conf_diff, 14D",592.739733,1475.218196
"Series_complete & Conf_diff, 14D",593.976453,1432.701763


### Model 6

In [610]:
def model_6_pipeline(data, test_data=False): 
    window_size = 14
    admin_timeseries_data = create_timeseries(data.fillna(0),'Administered')
    X_vax_data, _  = convert_timeseries_to_data(admin_timeseries_data, 
                                                window_size, 
                                                col_name="Administered")
    
    conf_timeseries_data = create_timeseries(data,"Confirmed_diff")
    X_conf_data, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    X_data = X_vax_data.merge(X_conf_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_6_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_6_pipeline(train)
    X_val, Y_val     = model_6_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [611]:
model_record, m6, m6_stats, m6_data = make_and_record_model(model_record, 
                                                  model_6_processing, train, val, 
                                                  "Administered & Conf_diff, 14D", override=True)
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763
"Active_diff only, 14D",988.687519,2680.69719
"Active_diff & Conf_diff, 14D",594.104388,1479.993138
"Recovered & Conf_diff, 14D",592.739733,1475.218196
"Series_complete & Conf_diff, 14D",593.976453,1432.701763
"Administered & Conf_diff, 14D",593.976453,1432.701763


In [612]:
train.head()
train[['Administered', 'Series_Complete_Yes']].describe()

Unnamed: 0,Administered,Series_Complete_Yes
count,11417.0,11417.0
mean,0.0,0.0
std,0.0,0.0
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,0.0,0.0


Both of these variables are entirely empty for the time covered by "training" data, so neither changes the model... from just having "Confirmed_diff". 

### Model 7

In [613]:
def model_7_pipeline(data, test_data=False): 
    window_size = 14
    recovered_timeseries_data = create_timeseries(data.fillna(0),'Recovered')
    X_rec_data, _  = convert_timeseries_to_data(recovered_timeseries_data, 
                                                window_size, 
                                                col_name="Recovered")
    
    active_timeseries_data = create_timeseries(data,'Active_diff')
    X_active_data, _  = convert_timeseries_to_data(active_timeseries_data, 
                                                window_size, 
                                                col_name="Active_diff")
    
    timeseries_data = create_timeseries(data,"Confirmed_diff")
    X_conf_data, Y_data  = convert_timeseries_to_data(timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    X_data = X_conf_data.merge(X_rec_data, left_index=True, right_index=True)
    X_data = X_data.merge(X_active_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_7_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_7_pipeline(train)
    X_val, Y_val     = model_7_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [614]:
model_record, m7, m7_stats, m7_data = make_and_record_model(model_record, 
                                                  model_7_processing, train, val, 
                                                  "Active, Recovered, & Conf_diff, 14D", override=True)
model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763
"Active_diff only, 14D",988.687519,2680.69719
"Active_diff & Conf_diff, 14D",594.104388,1479.993138
"Recovered & Conf_diff, 14D",592.739733,1475.218196
"Series_complete & Conf_diff, 14D",593.976453,1432.701763
"Administered & Conf_diff, 14D",593.976453,1432.701763
"Active, Recovered, & Conf_diff, 14D",590.467725,1502.307988


Doesn't really help to include more, this is likely because of colinearity and not much new data being added. 

## Weather Models

In [615]:
weather_model_record = make_fresh_record()
weather_model_record = record_model_stats(weather_model_record, m1_stats, "Confirmed_diff only, 14D", override=False)

weather_model_record

Unnamed: 0_level_0,Training,Validation
Unnamed: 0_level_1,Avg RSME,Avg RSME
Model,Unnamed: 1_level_2,Unnamed: 2_level_2
"Confirmed_diff only, 14D",593.976453,1432.701763


In [616]:
train.head()

Unnamed: 0,Date,State,Total Pop,Day_of_Wk,Confirmed,Confirmed_diff,Confirmed_rate,Confirmed_rate_diff,Deaths,Deaths_diff,...,Active_diff,Active_rate_diff,Active_rate,Case_Fatality_Ratio,Administered,Series_Complete_Yes,Month,Year,Monthly Temp (F),Monthly Avg Temp (F)
0,2020-04-12,Alabama,4903185,Sunday,3667,,0.000748,,93,,...,,,0.000708,2.61016,0.0,0.0,4,2020,61.55,63.096875
1,2020-04-13,Alabama,4903185,Monday,3870,203.0,0.000789,4.1e-05,99,6.0,...,165.0,3.4e-05,0.000741,2.651312,0.0,0.0,4,2020,61.55,63.096875
2,2020-04-14,Alabama,4903185,Tuesday,4041,171.0,0.000824,3.5e-05,114,15.0,...,204.0,4.2e-05,0.000783,2.883886,0.0,0.0,4,2020,61.55,63.096875
3,2020-04-15,Alabama,4903185,Wednesday,4307,266.0,0.000878,5.4e-05,118,4.0,...,118.0,2.4e-05,0.000807,2.895706,0.0,0.0,4,2020,61.55,63.096875
4,2020-04-16,Alabama,4903185,Thursday,4465,158.0,0.000911,3.2e-05,133,15.0,...,255.0,5.2e-05,0.000859,3.06099,0.0,0.0,4,2020,61.55,63.096875


### Model 8 


In [633]:
def model_8_pipeline(data, test_data=False): 
    window_size = 14
    
    
    X_temp_data = data[['State','Date','Monthly Avg Temp (F)']].set_index(['Date','State'])
    
    conf_timeseries_data = create_timeseries(data,"Confirmed_diff")
    X_conf_data, Y_data  = convert_timeseries_to_data(conf_timeseries_data, 
                                                window_size, 
                                                col_name="Confirmed_Diff")
    
    X_data = X_conf_data.merge(X_temp_data, left_index=True, right_index=True)

    if test_data:
        return X_data
    else: 
        return (X_data, Y_data)

def model_8_processing(train, val):
    model = LinearRegression()
    X_train, Y_train = model_8_pipeline(train)
    X_val, Y_val     = model_8_pipeline(val)
    model.fit(X_train, Y_train)
    return (model, (X_train, Y_train), (X_val, Y_val))

In [644]:
train.set_index(['Date','State']).index.size

11417

In [645]:
# why is the size so big??, ah well we have weather values for all dates, ignoring the windowing!!
X_temp_data.index.size

11417

In [641]:
train['Monthly Avg Temp (F)'].isna().sum()

233

need to fill missing values... with the average monthly temp for that state... 

In [635]:
X_train, Y_train = model_8_pipeline(train)
(X_train.shape, Y_train.shape)

((2488906, 15), (10682, 1))

In [634]:
weather_model_record, m8, m8_stats, m8_data = make_and_record_model(weather_model_record, 
                                                  model_8_processing, train, val, 
                                                  "Avg Mon. Temp & Conf_diff, 14D", override=True)
weather_model_record

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').