In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import streamlit
import joblib


In [2]:
model = joblib.load("models/best_random_forest_monthlycharges.pkl")

Future Prediction

In [3]:
# Example: Predict MonthlyCharges_scaled for new/future data

# Load or create your new data as a DataFrame (must have same features as training)
# Example: new_data = pd.read_csv("future_customers.csv")
# For demonstration, let's create a dummy row (replace with your actual data)
new_data = pd.DataFrame({
    'InternetService_FE': [1],
    'StreamingTV_FE': [0],
    'StreamingMovies_FE': [1],
    'DeviceProtection_FE': [1],
    'OnlineSecurity_FE': [0],
    'OnlineBackup_FE': [1],
    'TechSupport_FE': [0],
    'MultipleLines_FE': [1],
    'PhoneService_FE': [1],
    'tenure': [12],
    'PaymentMethod_FE': [2],
    'Contract_FE': [1],
    'PaperlessBilling_FE': [1],
    'Churn': [0],
    'SeniorCitizen': [0],
    'Partner_No': [1],
    'Partner_Yes': [0]
})

# Predict
future_pred = model.predict(new_data)
print("Predicted MonthlyCharges:", future_pred[0])

Predicted MonthlyCharges: 89.24028869047623


In [4]:
# Example: Predict MonthlyCharges_scaled for new/future data

# Load or create your new data as a DataFrame (must have same features as training)
# Example: new_data = pd.read_csv("future_customers.csv")
# For demonstration, let's create a dummy row (replace with your actual data)
new_data = pd.DataFrame({
    'InternetService_FE': [0.3437],
    'StreamingTV_FE': [0.3989],
    'StreamingMovies_FE': [0.3954],
    'DeviceProtection_FE': [0.3438],
    'OnlineSecurity_FE': [0.2866],
    'OnlineBackup_FE': [0.4384],
    'TechSupport_FE': [0.49311],
    'MultipleLines_FE': [0.4813],
    'PhoneService_FE': [0.9031],
    'tenure': [34],
    'PaymentMethod_FE': [0.2288],
    'Contract_FE': [0.2091],
    'PaperlessBilling_FE': [0.4077],
    'Churn': [0],
    'SeniorCitizen': [0],
    'Partner_No': [0],
    'Partner_Yes': [1]
})

# Predict
future_pred = model.predict(new_data)
print("Predicted MonthlyCharges:", future_pred[0])

Predicted MonthlyCharges: 55.36302662147579


In [5]:
######################################################################################

Decode

In [6]:
import joblib

# Load the frequency encoders dictionary
freq_encoders = joblib.load('models/frequency_encoders.pkl')

def decode_frequency(value, freq_dict):
    # Find the category with the closest frequency value
    return min(freq_dict.items(), key=lambda x: abs(x[1] - value))[0]

# Example usage for all your frequency encoded columns
fe_columns = [
     'MultipleLines_FE', 'InternetService_FE', 'OnlineSecurity_FE',
    'OnlineBackup_FE', 'DeviceProtection_FE', 'TechSupport_FE', 'StreamingTV_FE',
    'StreamingMovies_FE', 'Contract_FE', 'PaperlessBilling_FE', 'PaymentMethod_FE'
]

decoded = {}
for col in fe_columns:
    # Remove '_FE' to get the original column name for the encoder dict
    orig_col = col.replace('_FE', '')
    value = new_data[col].iloc[0]
    decoded_value = decode_frequency(value, freq_encoders[orig_col])
    decoded[col] = decoded_value

print("Decoded frequency encoded values:")
for col, val in decoded.items():
    print(f"{col}: {val}")

Decoded frequency encoded values:
MultipleLines_FE: No
InternetService_FE: DSL
OnlineSecurity_FE: Yes
OnlineBackup_FE: No
DeviceProtection_FE: Yes
TechSupport_FE: No
StreamingTV_FE: No
StreamingMovies_FE: No
Contract_FE: One year
PaperlessBilling_FE: No
PaymentMethod_FE: Mailed check


In [7]:
def decode_onehot(row):
    partner = "Yes" if row['Partner_Yes'] == 1 else "No"
    return partner

partner = decode_onehot(new_data.iloc[0])
print(f"Partner: {partner}")

Partner: Yes


In [19]:
import pandas as pd
import joblib

# 1. Original (decoded) values (Churn as "Yes"/"No")
original_data = pd.DataFrame({
    'InternetService': ['DSL'],
    'StreamingTV': ['No'],
    'StreamingMovies': ['No'],
    'DeviceProtection': ['Yes'],
    'OnlineSecurity': ['Yes'],
    'OnlineBackup': ['No'],
    'TechSupport': ['No'],
    'MultipleLines': ['No'],
    'PhoneService': ['Yes'],
    'tenure': [34],
    'PaymentMethod': ['Mailed check'],
    'Contract': ['One year'],
    'PaperlessBilling': ['No'],
    'Churn': ['No'],           # <-- "Yes" or "No" allowed
    'SeniorCitizen': [0],
    'Partner': ['No']
})

# 2. Convert Churn to numeric (0/1)
original_data['Churn'] = original_data['Churn'].map({'No': 0, 'Yes': 1})

# 3. Load frequency encoders
freq_encoders = joblib.load('models/frequency_encoders.pkl')

# 4. Frequency encode categorical columns (do NOT drop 'Churn')
for col in freq_encoders:
    fe_col = col + '_FE'
    original_data[fe_col] = original_data[col].map(freq_encoders[col])
    if col not in ['Churn']:
        original_data.drop(col, axis=1, inplace=True)

# 5. Manually add one-hot columns for Partner
original_data['Partner_Yes'] = [1 if original_data.get('Partner', ['No'])[0] == 'Yes' else 0]
original_data['Partner_No'] = [1 if original_data.get('Partner', ['No'])[0] == 'No' else 0]
if 'Partner' in original_data.columns:
    original_data.drop('Partner', axis=1, inplace=True)

# 6. Select and order columns as in training (including Churn)
model_features = [
    'InternetService_FE', 'StreamingTV_FE', 'StreamingMovies_FE',
    'DeviceProtection_FE', 'OnlineSecurity_FE', 'OnlineBackup_FE', 'TechSupport_FE',
    'MultipleLines_FE', 'PhoneService_FE', 'tenure', 'PaymentMethod_FE', 'Contract_FE',
    'PaperlessBilling_FE', 'Churn', 'SeniorCitizen', 'Partner_No', 'Partner_Yes'
]
X_new = original_data[model_features]

# 7. Load model and predict
model = joblib.load("models/best_random_forest_monthlycharges.pkl")
predicted_monthlycharges = model.predict(X_new)[0]

# 8. Churn label mapping for output
churn_value = original_data['Churn'].iloc[0]
churn_label = "Yes" if churn_value == 1 else "No"

print("Predicted MonthlyCharges:", predicted_monthlycharges)

Predicted MonthlyCharges: 55.36302662147579
