# Deploying the app and chatbot

In [1]:
!pip install transformers sentence-transformers pandas streamlit

Collecting streamlit
  Downloading streamlit-1.44.0-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata 

In [2]:
import pandas as pd
import joblib
from transformers import pipeline
import re

## Load Models and Data

In [3]:
model_1m = joblib.load("1m_XGBoost_smote.pkl")
model_3m = joblib.load("3m_XGBoost_smote.pkl")
model_6m = joblib.load("6m_XGBoost_smote.pkl")

df = pd.read_csv("Processed_data_latest.csv")

## Define Features Used in the Models

In [4]:
model_features = [
    'distance_km',
    'pickup_day',
    'dependents_qty',
    'age_group_encoded',
    'scheduled_month',
    'scheduled_weekday_encoded'
]

## Getting the requirements for deploying the application and Load LLM for Natural Response

In [5]:
#install streamlit, one time execution
!pip install streamlit joblib



In [6]:
generator = pipeline("text2text-generation", model="google/flan-t5-large")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cpu


# Creating and testing Chatbot

In [7]:
# chatbot.py
import pandas as pd
import joblib
import re

# Default fallback values
default_features = {
    'distance_km': 10,
    'pickup_day': 4,  # Friday
    'dependents_qty': 2,
    'age_group_encoded': 2,
    'scheduled_month': 6,
    'scheduled_weekday_encoded': 4
}

def extract_features(text):
    features = default_features.copy()

    dist = re.search(r'(\d+)\s*km', text)
    dep = re.search(r'(\d+)\s*dependents?', text)
    age = re.search(r'(\d+)\s*-?year[- ]old', text)

    for d, v in {'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3,
                 'Friday': 4, 'Saturday': 5, 'Sunday': 6}.items():
        if d.lower() in text.lower():
            features['pickup_day'] = v
            features['scheduled_weekday_encoded'] = v
            break

    if dist:
        features['distance_km'] = int(dist.group(1))
    if dep:
        features['dependents_qty'] = int(dep.group(1))
    if age:
        a = int(age.group(1))
        features['age_group_encoded'] = 0 if a <= 18 else 1 if a <= 30 else 2 if a <= 45 else 3 if a <= 65 else 4

    return pd.DataFrame([features])

def predict(text):
    df = extract_features(text)

    p1 = model_1m.predict(df)[0]
    p3 = model_3m.predict(df)[0]
    p6 = model_6m.predict(df)[0]

    def yesno(p): return "✅ Likely to return" if p == 1 else "❌ Unlikely to return"

    return (
        f"📊 Prediction:\n"
        f"- 1 Month: {yesno(p1)}\n"
        f"- 3 Months: {yesno(p3)}\n"
        f"- 6 Months: {yesno(p6)}\n"
        f"(Some missing values were filled with defaults.)"
    )

# Test
if __name__ == "__main__":
    question = input("Ask a question about the client:\n")
    print(predict(question))


Ask a question about the client:
is client going to return if he lives 10 km away and have 2 dependents
📊 Prediction:
- 1 Month: ❌ Unlikely to return
- 3 Months: ❌ Unlikely to return
- 6 Months: ✅ Likely to return
(Some missing values were filled with defaults.)


In [8]:
print(df.columns.tolist())


['collect_scheduled_date', 'collect_token', 'pickup_date', 'quantity', 'title', 'unique_client', 'dependents_qty', 'household', 'preferred_languages', 'primary_contact', 'Sex', 'status', 'age_group', 'family_name', 'pickup_month', 'pickup_day', 'distance_km', 'days_since_last_visit', 'days_since_last_pickup', 'visit_count', 'visits_last_1m', 'visits_last_3m', 'visits_last_6m', 'client_returned', 'recent_returned_1m', 'recent_returned_3m', 'recent_returned_6m', 'missed_pickup', 'scheduled_but_missed', 'is_future_scheduled', 'schedule_pickup_gap_days', 'scheduled_month', 'scheduled_weekday', 'pickup_weekday']


# Deploying the app on Streamlit

In [9]:
#import streamlit
import streamlit as st

In [10]:
!pip install plotly
!pip install scikit-learn
!pip install xgboost



In [11]:
%%writefile app.py
import streamlit as st
import pandas as pd
import plotly.express as px
import joblib
import shap
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import xgboost as xgb
import re

# Load the dataset
data = pd.read_csv('Processed_data_latest.csv', encoding='latin1')

# Dashboard Page
def dashboard():
    try:
        st.image('logo.jpeg', use_container_width=True)
    except FileNotFoundError:
        st.warning("⚠️ Logo image not found. Please upload 'logo.jpeg' to your project directory.")

    st.subheader("💡 Abstract")
    st.write("""
    Islamic Family and Social Services Association (IFSSA) provides food hampers to individuals and families in need.
    This app predicts client retention and supports proactive outreach and planning using machine learning.
    """)

    st.subheader("👨‍💼 Project Purpose")
    st.write("""
    Our goal is to forecast which clients are likely to return for food hamper pickups. This improves outreach, minimizes waste, and supports better planning.
    """)

# EDA Page
def exploratory_data_analysis():
    st.title("📊 IFSSA Client Data Analysis")
    st.markdown("""
    <iframe width="600" height="450" src="https://lookerstudio.google.com/embed/reporting/f21f2db2-6992-4e62-89e1-1d7ac1b699ac/page/0NzEF" frameborder="0" style="border:0" allowfullscreen sandbox="allow-storage-access-by-user-activation allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox"></iframe>""", unsafe_allow_html=True)

# Prediction Page Template
def predict_page(month_label, model_file):
    st.title(f"🔮 Predict Client Return for {month_label} Month")
    st.write("Enter client details below:")

    distance = st.slider("Distance from IFSSA (km)", 0, 50, 10)
    pickup_day = st.selectbox("Pickup Day", ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
    dependents_qty = st.slider("Number of Dependents", 0, 10, 1)
    age_group = st.selectbox("Age Group", ['0-18', '19-30', '31-45', '46-65', '65+'])
    scheduled_month = st.selectbox("Scheduled Month", list(range(1, 13)))
    scheduled_weekday = st.selectbox("Scheduled Weekday", ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])

    if st.button("Predict"):
        try:
            model = joblib.load(model_file)
            pickup_map = {'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6}
            age_group_map = {'0-18': 0, '19-30': 1, '31-45': 2, '46-65': 3, '65+': 4}

            input_df = pd.DataFrame([{
                'distance_km': distance,
                'pickup_day': pickup_map[pickup_day],
                'dependents_qty': dependents_qty,
                'age_group_encoded': age_group_map[age_group],
                'scheduled_month': scheduled_month,
                'scheduled_weekday_encoded': pickup_map[scheduled_weekday]
            }])

            prediction = model.predict(input_df)[0]
            proba = model.predict_proba(input_df)[0]
            family_name = data['family_name'].dropna().sample(1).values[0]

            # Probability bar
            fig = go.Figure(go.Bar(
                x=[proba[0], proba[1]],
                y=['Not Return', 'Will Return'],
                orientation='h',
                marker=dict(color=['crimson', 'green'])
            ))
            fig.update_layout(
                title="Prediction Probabilities",
                xaxis_title="Probability",
                yaxis_title="Class",
                xaxis=dict(range=[0, 1]),
                height=300
            )
            st.plotly_chart(fig)

            # Suggestions
            if prediction == 1:
                st.success("✅ The client is likely to return.")
                st.info(f"Suggestion for {family_name}: Continue routine outreach and record future visits.")
            else:
                st.error("❌ The client is unlikely to return.")
                st.warning(f"Suggestion for {family_name}: Consider a follow-up call, support check-in, or sending a reminder message.")

            # SHAP
            explainer = shap.Explainer(model)
            shap_values = explainer(input_df)
            st.subheader("🧠 SHAP Explanation")
            fig, ax = plt.subplots(figsize=(10, 5))
            shap.plots.waterfall(shap_values[0], max_display=6, show=False)
            plt.tight_layout()
            st.pyplot(fig)

        except Exception as e:
            st.error(f"Prediction failed: {e}")

# Chatbot Page
def chatbot_page():
    st.title("💬 Chat with Prediction Bot")
    st.write("Ask about a hypothetical client in plain language (you can omit details):")

    user_input = st.text_area("Ask here (e.g. A 42-year-old with 2 dependents picks up Friday, lives 12 km away):")

    if st.button("🔍 Predict"):
        try:
            # Default values
            features = {
                'distance_km': 10,
                'pickup_day': 4,
                'dependents_qty': 2,
                'age_group_encoded': 2,
                'scheduled_month': 6,
                'scheduled_weekday_encoded': 4
            }

            dist = re.search(r'(\d+)\s*km', user_input)
            dep = re.search(r'(\d+)\s*dependents?', user_input)
            age = re.search(r'(\d+)\s*-?year[- ]old', user_input)

            for d, v in {'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6}.items():
                if d.lower() in user_input.lower():
                    features['pickup_day'] = v
                    features['scheduled_weekday_encoded'] = v
                    break

            if dist:
                features['distance_km'] = int(dist.group(1))
            if dep:
                features['dependents_qty'] = int(dep.group(1))
            if age:
                a = int(age.group(1))
                features['age_group_encoded'] = 0 if a <= 18 else 1 if a <= 30 else 2 if a <= 45 else 3 if a <= 65 else 4

            input_df = pd.DataFrame([features])

            model_1 = joblib.load("1m_XGBoost_smote.pkl")
            model_3 = joblib.load("3m_XGBoost_smote.pkl")
            model_6 = joblib.load("6m_XGBoost_smote.pkl")

            pred_1 = model_1.predict(input_df)[0]
            pred_3 = model_3.predict(input_df)[0]
            pred_6 = model_6.predict(input_df)[0]

            def yesno(p): return "✅ Likely to return" if p == 1 else "❌ Unlikely to return"

            st.markdown("### 📊 Prediction Results")
            st.markdown(f"- 1 Month: {yesno(pred_1)}")
            st.markdown(f"- 3 Months: {yesno(pred_3)}")
            st.markdown(f"- 6 Months: {yesno(pred_6)}")
            st.info("Note: Missing inputs were filled with default values.")

        except Exception as e:
            st.error(f"Prediction failed: {e}")

# Thank You Page
def thank_you_page():
    st.title("🙏 Thank You")
    st.write("We appreciate your interest in our project. For more information about IFSSA:")
    st.markdown("[Visit IFSSA Official Website](https://albertamentors.ca/islamic-family-social-services-association-ifssa/)")
    try:
        st.image("IFFSA_Family_2.png", caption="Islamic Family & Social Services Association")
    except:
        st.warning("IFSSA logo image not found.")

# Main App Logic
def main():
    st.sidebar.title("IFSSA Client Retention Prediction")
    app_page = st.sidebar.radio("Select a Page", [
        "Dashboard",
        "EDA",
        "Predicting Return in 1 Month",
        "Predicting Return in 3 Month",
        "Predicting Return in 6 Month",
        "Chat with Prediction Bot",  # ← added chatbot page
        "Thank You"
    ])

    if app_page == "Dashboard":
        dashboard()
    elif app_page == "EDA":
        exploratory_data_analysis()
    elif app_page == "Predicting Return in 1 Month":
        predict_page("1", "1m_XGBoost_smote.pkl")
    elif app_page == "Predicting Return in 3 Month":
        predict_page("3", "3m_XGBoost_smote.pkl")
    elif app_page == "Predicting Return in 6 Month":
        predict_page("6", "6m_XGBoost_smote.pkl")
    elif app_page == "Chat with Prediction Bot":
        chatbot_page()
    elif app_page == "Thank You":
        thank_you_page()

if __name__ == "__main__":
    main()


Writing app.py


## Install localtunnel

In [12]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K
added 22 packages in 4s
[1G[0K⠋[1G[0K
[1G[0K⠋[1G[0K3 packages are looking for funding
[1G[0K⠋[1G[0K  run `npm fund` for details
[1G[0K⠋[1G[0K

## Run streamlit in background

In [13]:
!streamlit run /content/app.py &>/content/logs.txt & curl ipv4.icanhazip.com

146.148.109.67


Copy this IP address.

## Expose the port 8501
Then just click in the `url` showed.

A `log.txt`file will be created.

In [14]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0Kyour url is: https://quiet-hands-shout.loca.lt
^C


# Generating the Requirements.txt file

In [15]:
!pip install pipreqs

Collecting pipreqs
  Downloading pipreqs-0.5.0-py3-none-any.whl.metadata (7.9 kB)
Collecting docopt==0.6.2 (from pipreqs)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ipython==8.12.3 (from pipreqs)
  Downloading ipython-8.12.3-py3-none-any.whl.metadata (5.7 kB)
Collecting yarg==0.1.9 (from pipreqs)
  Downloading yarg-0.1.9-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting jedi>=0.16 (from ipython==8.12.3->pipreqs)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting stack-data (from ipython==8.12.3->pipreqs)
  Downloading stack_data-0.6.3-py3-none-any.whl.metadata (18 kB)
Collecting executing>=1.2.0 (from stack-data->ipython==8.12.3->pipreqs)
  Downloading executing-2.2.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting asttokens>=2.1.0 (from stack-data->ipython==8.12.3->pipreqs)
  Downloading asttokens-3.0.0-py3-none-any.whl.metadata (4.7 kB)
Collecting pure-eval (from stack-data->ipython==8.12.3->pipr

In [16]:
!pipreqs #(to create a new requirements.txt file) Delete current file
# download scikit too
!pipreqs install scikit-learn
# import plotly too #(to create a new requirements.txt file) Delete current file
#!pipreqs --force #(to replace existing requirements.txt file)

INFO: Not scanning for jupyter notebooks.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
Please, verify manually the final list of requirements.txt to avoid possible dependency confusions.
INFO: Successfully saved requirements file in /content/requirements.txt
Usage:
    pipreqs [options] [<path>]


In [17]:
!pip freeze > requirements .txt