<a href="https://colab.research.google.com/github/abhisheksu38/Task_System1/blob/main/week4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install xgboost



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import joblib


In [3]:
# Load your uploaded dataset (modify the path if needed)
df = pd.read_csv('/content/Cleaned_marketing_campaign (1).csv')

# Fill missing values
df['Income'] = df['Income'].fillna(df['Income'].mean())

# Simulate Task Classification and Priority (if not present)
df['Task_Category'] = np.random.choice(['email', 'call', 'survey'], len(df))
df['Priority'] = np.random.choice(['low', 'medium', 'high'], len(df))


In [4]:
# Label encode target variables
le_task = LabelEncoder()
df['Task_Category_encoded'] = le_task.fit_transform(df['Task_Category'])

le_priority = LabelEncoder()
df['Priority_encoded'] = le_priority.fit_transform(df['Priority'])

# Feature selection (you can adjust this list)
features = ['Age', 'Income', 'Recency', 'Kidhome', 'Teenhome', 'Family_Size']
df = df.dropna(subset=features)  # Drop if required fields are missing
X = df[features]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [6]:
# Define task classification target
y_task = df['Task_Category_encoded']

# Split and train
X_task_train, X_task_test, y_task_train, y_task_test = train_test_split(X_scaled, y_task, test_size=0.2, random_state=42)

task_model = RandomForestClassifier(n_estimators=100, random_state=42)
task_model.fit(X_task_train, y_task_train)

# Evaluate
y_task_pred = task_model.predict(X_task_test)
print("Task Classification Report:\n", classification_report(y_task_test, y_task_pred))

# Save model
joblib.dump(task_model, 'final_task_classifier.pkl')


Task Classification Report:
               precision    recall  f1-score   support

           0       0.35      0.27      0.31       166
           1       0.30      0.33      0.32       144
           2       0.35      0.41      0.38       138

    accuracy                           0.33       448
   macro avg       0.34      0.34      0.33       448
weighted avg       0.34      0.33      0.33       448



['final_task_classifier.pkl']

In [8]:
# Define priority prediction target
y_priority = df['Priority_encoded']

# Split and train
X_pri_train, X_pri_test, y_pri_train, y_pri_test = train_test_split(X_scaled, y_priority, test_size=0.2, random_state=42)

priority_model = XGBClassifier(random_state=42, eval_metric='mlogloss')
priority_model.fit(X_pri_train, y_pri_train)

# Evaluate
y_pri_pred = priority_model.predict(X_pri_test)
print("Priority Prediction Report:\n", classification_report(y_pri_test, y_pri_pred))

# Save model
joblib.dump(priority_model, 'final_priority_predictor.pkl')


Priority Prediction Report:
               precision    recall  f1-score   support

           0       0.36      0.42      0.39       148
           1       0.38      0.33      0.35       161
           2       0.37      0.37      0.37       139

    accuracy                           0.37       448
   macro avg       0.37      0.37      0.37       448
weighted avg       0.37      0.37      0.37       448



['final_priority_predictor.pkl']

## Dashboard mockup or output summary

In [9]:
summary = pd.DataFrame({
    'Model': ['Task Classifier', 'Priority Predictor'],
    'Algorithm': ['Random Forest', 'XGBoost'],
    'Accuracy': [round(task_model.score(X_task_test, y_task_test), 4),
                 round(priority_model.score(X_pri_test, y_pri_test), 4)],
    'Saved Model File': ['final_task_classifier.pkl', 'final_priority_predictor.pkl']
})

print("📊 Model Summary:")
display(summary)
summary.to_csv("model_summary.csv", index=False)


📊 Model Summary:


Unnamed: 0,Model,Algorithm,Accuracy,Saved Model File
0,Task Classifier,Random Forest,0.3348,final_task_classifier.pkl
1,Priority Predictor,XGBoost,0.3705,final_priority_predictor.pkl


In [13]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m77.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m122.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [20]:
# app.py
import streamlit as st
import pandas as pd
import joblib

# Load models and scaler
@st.cache_resource
def load_models():
    task_model = joblib.load("final_task_classifier.pkl")
    priority_model = joblib.load("final_priority_predictor.pkl")
    scaler = joblib.load("scaler.pkl")
    return task_model, priority_model, scaler

task_model, priority_model, scaler = load_models()

st.set_page_config(page_title="Task Assignment Dashboard", layout="wide")
st.title("🧠 Task Classification & Priority Prediction")

uploaded_file = st.file_uploader("📤 Upload your task dataset (.csv)", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.subheader("🔍 Input Data Preview")
    st.dataframe(df.head())

    # Required input features
    required_cols = ['Age', 'Income', 'Recency', 'Kidhome', 'Teenhome', 'Family_Size']
    missing_cols = [col for col in required_cols if col not in df.columns]

    if missing_cols:
        st.error(f"❌ Missing columns: {missing_cols}")
    else:
        # Fill missing values if any
        df[required_cols] = df[required_cols].fillna(df[required_cols].mean())

        # Scale features
        X = scaler.transform(df[required_cols])

        # Predictions
        task_pred = task_model.predict(X)
        priority_pred = priority_model.predict(X)

        # Add to output
        df['Predicted_Task_Category'] = task_pred
        df['Predicted_Priority'] = priority_pred

        st.success("✅ Predictions complete!")
        st.subheader("📊 Prediction Output")
        st.dataframe(df[['Predicted_Task_Category', 'Predicted_Priority']].head())

        # Option to download
        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button("⬇️ Download Full Result CSV", data=csv, file_name='predictions_output.csv', mime='text/csv')


