# compress loan_model.ipynb

### import all required libraries

In [1]:
import bz2
import pickle
from joblib import load

### Load your existing model

In [2]:
model = load('loan_model.joblib')


In [3]:
def compressed_pickle(title, data):
    """Compress and save data using bzip2"""
    with bz2.BZ2File(title + '.pbz2', 'wb') as f:
        pickle.dump(data, f)


### Compress and save your model (will be ~5-10x smaller)

In [4]:
compressed_pickle('compressed_loan_model', model)

### Step 2: Create a Loading Utility

In [5]:
def load_compressed_model(file_path):
    """Load a bzip2 compressed model"""
    with bz2.BZ2File(file_path, 'rb') as f:
        return pickle.load(f)
    
# Usage example:
model = load_compressed_model('compressed_loan_model.pbz2')

### Step 3: Update Your Streamlit App

In [6]:
import streamlit as st
import bz2
import pickle

@st.cache_resource
def load_model():
    with bz2.BZ2File('compressed_loan_model.pbz2', 'rb') as f:
        return pickle.load(f)

model = load_model()

2025-06-12 20:02:24.618 
  command:

    streamlit run C:\Users\Admin\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


# Additional Optimization Options

### Option 1: Reduce Model Size Before Compression

###### If using RandomForest, reduce number of trees and depth

In [7]:
from sklearn.ensemble import RandomForestClassifier

# If using RandomForest, reduce number of trees and depth
from sklearn.datasets import make_classification

# Create sample data for demonstration
X_train, y_train = make_classification(n_samples=100, n_features=4, n_classes=2, random_state=42)

optimized_model = RandomForestClassifier(
    n_estimators=100,  # Reduced from default 100
    max_depth=10,      # Reduced from default None
    random_state=42
)
optimized_model.fit(X_train, y_train)

# Then compress this smaller model
compressed_pickle('optimized_compressed_model', optimized_model)

### Option 2: Use a More Efficient Model

In [8]:

from xgboost import XGBClassifier

# XGBoost models are typically smaller
xgb_model = XGBClassifier(
    n_estimators=50,
    max_depth=5,
    random_state=42
)
xgb_model.fit(X_train, y_train)

compressed_pickle('xgb_compressed_model', xgb_model)

# Verification steps

### Check the compressed file size:

In [9]:
import os
print(f"Original size: {os.path.getsize('loan_model.joblib')/1e6:.1f} MB")
print(f"Compressed size: {os.path.getsize('compressed_loan_model.pbz2')/1e6:.1f} MB")

Original size: 83.0 MB
Compressed size: 8.0 MB


### Verify the compressed model works:

In [10]:
test_prediction = model.predict(X_test[:1])
print(f"Test prediction: {test_prediction}")

NameError: name 'X_test' is not defined