# Using a Provided Filepath

### Create a config folder
- Upload the provided filepaths.json file to your config folder

In [1]:
import json
# Open the .json file
with open('config/filepaths.json', 'r') as f:
    FPATHS = json.load(f)

FPATHS

{'data': {'raw': {'full': 'Data/loan_approval.csv'},
  'ml': {'train': 'Data/train.joblib', 'test': 'Data/test.joblib'},
  'nlp': {'nlp_full': 'data/nlp/nlp_full.csv'}},
 'models': {'linear_regression': 'models/logreg_pipe.joblib',
  'random_forest': 'models/rf_pipe.joblib'},
 'images': {'banner': 'Images/money_tree.png'}}

In [2]:
from pprint import pprint
pprint(FPATHS)

{'data': {'ml': {'test': 'Data/test.joblib', 'train': 'Data/train.joblib'},
          'nlp': {'nlp_full': 'data/nlp/nlp_full.csv'},
          'raw': {'full': 'Data/loan_approval.csv'}},
 'images': {'banner': 'Images/money_tree.png'},
 'models': {'linear_regression': 'models/logreg_pipe.joblib',
            'random_forest': 'models/rf_pipe.joblib'}}


In [3]:
# What are the keys?
FPATHS.keys()

dict_keys(['data', 'models', 'images'])

In [4]:
## Create folders

In [5]:
# These are the main folders 
# We can add these programmatically (use the custom function provided) or we can add them manually

import os
def create_directories_from_paths(nested_dict):
    """OpenAI. (2023). ChatGPT [Large language model]. https://chat.openai.com 
    Recursively create directories for file paths in a nested dictionary.
    Parameters:
    nested_dict (dict): The nested dictionary containing file paths.
    """
    for key, value in nested_dict.items():
        if isinstance(value, dict):
            # If the value is a dictionary, recurse into it
            create_directories_from_paths(value)
        elif isinstance(value, str):
            # If the value is a string, treat it as a file path and get the directory path
            directory_path = os.path.dirname(value)
            # If the directory path is not empty and the directory does not exist, create it
            if directory_path and not os.path.exists(directory_path):
                os.makedirs(directory_path)
                print(f"Directory created: {directory_path}")

# Use the function on your FPATHS dictionary
create_directories_from_paths(FPATHS)


## Upload the files to the appropriate folders

In [6]:
FPATHS['data']

{'raw': {'full': 'Data/loan_approval.csv'},
 'ml': {'train': 'Data/train.joblib', 'test': 'Data/test.joblib'},
 'nlp': {'nlp_full': 'data/nlp/nlp_full.csv'}}

In [7]:
FPATHS['data']['raw']

{'full': 'Data/loan_approval.csv'}

In [8]:
FPATHS['data']['raw']['full']

'Data/loan_approval.csv'

In [9]:
import pandas as pd
df = pd.read_csv(FPATHS['data']['raw']['full'])
df.head()

Unnamed: 0,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


In [10]:
import joblib
# load training data using FPATHS dictionary
X_train, y_train = joblib.load(FPATHS['data']['ml']['train'])
X_train.head()

Unnamed: 0,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value
3977,5,Graduate,Yes,7600000,18000000,12,452,7900000,4300000,23800000,4700000
2425,0,Not Graduate,No,6000000,22900000,16,302,12200000,6500000,17700000,7100000
1545,2,Not Graduate,Yes,8200000,27800000,16,827,11500000,8700000,26000000,10700000
173,2,Not Graduate,Yes,7400000,28200000,8,376,21900000,5000000,17400000,5200000
3405,1,Graduate,Yes,3400000,7000000,4,534,3100000,1600000,13000000,4000000


In [11]:
# Functionalize loading train and test
def load_Xy_data(fpath):
    return joblib.load(fpath)

In [13]:
# Test our function
X_test, y_test = load_Xy_data(fpath = FPATHS['data']['ml']['test'])
X_test.head()

Unnamed: 0,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value
1703,5,Graduate,No,5400000,19700000,20,423,6500000,10000000,15700000,7300000
1173,2,Graduate,No,5900000,14000000,8,599,4700000,9500000,17800000,6700000
308,3,Graduate,No,9600000,19900000,14,452,4200000,16200000,28500000,6600000
1322,2,Graduate,No,6200000,23400000,8,605,10000000,10800000,21800000,9200000
3271,3,Not Graduate,Yes,5800000,14100000,12,738,11700000,4400000,15400000,8400000


In [14]:
# Load a model from FPATHS dict
rf_model = joblib.load(FPATHS['models']['random_forest'])
rf_model

In [15]:
# Define function to load in model from dictionary with joblib
def load_model_ml(fpath):
    return joblib.load(fpath)

In [16]:
# Load model
linreg = load_model_ml(fpath = FPATHS['models']['linear_regression'])
linreg

In [19]:
from PIL import Image
image = Image.open(FPATHS['images']['banner'])
image.show()

FileNotFoundError: [Errno 2] No such file or directory: 'Images/money_tree.png'