Part 1

- Download and unzip the zip file provided to you here.
    - Unzip the files into your private repository.
- Load the provided joblib file. Display the .keys of the dictionary stored in the joblib file.
- Assign each of the keys from the dictionary as a separate variable.
- Use the preprocessor to transform the X_train/X_test data into processed dataframes. 
- Replace the column names with the correct feature names extracted from the preprocessor. 

# Imports

In [1]:
## Our standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Preprocessing tools
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer

## Models & evaluation metrics
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import joblib
import os

## setting random state for reproducibility
SEED = 321
np.random.seed(SEED)

## set pandas to display more columns
pd.set_option('display.max_columns',50)

import warnings
warnings.filterwarnings('ignore')

plt.style.use(['seaborn-muted','fivethirtyeight'])
sns.set_context('talk')

In [2]:
# example making new folder with os
os.makedirs('Data/',exist_ok=True) # Confirm folder created
os.listdir("Data/")

['saved_model.joblib']

In [3]:
# View joblib file in main folder
os.listdir()

['.git',
 '.gitattributes',
 '.ipynb_checkpoints',
 'Data',
 'saved_model.joblib',
 'Viral Spread Analysis.ipynb']

# Loading Joblib File Data

In [4]:
# Confirm the file was saved by loading it back in
loaded_joblib = joblib.load("saved_model.joblib")
loaded_joblib.keys()

ModuleNotFoundError: No module named 'pandas.core.indexes.numeric'

In [None]:
# Loading data and objects in a new notebook
X_train = loaded_joblib['X_train']
y_train = loaded_joblib['y_train']
X_test = loaded_joblib['X_test']
y_test = loaded_joblib['y_test']
preprocessor = loaded_joblib['preprocessor']
loaded_gridsearch = loaded_joblib['GridSearch']


In [None]:
# Define and fit best model
best_estimator = loaded_gridsearch.best_estimator_
# Evaluate best model
evaluate_regression(best_estimator, X_train, y_train, X_test, y_test)
