In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load and preprocess data
df = pd.read_csv('customer (2).csv')
categorical_cols = df.select_dtypes(include='object').columns.drop('Churn')
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
df_encoded['Churn'] = df_encoded['Churn'].map({'Yes': 1, 'No': 0})

X = df_encoded.drop(['Churn', 'CustomerID'], axis=1, errors='ignore')
y = df_encoded['Churn']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict churn probabilities for all customers
df['Churn_Probability'] = rf.predict_proba(X)[:, 1]

# Segment customers: High risk if probability > 0.7
df['Risk_Segment'] = pd.cut(df['Churn_Probability'], bins=[0, 0.7, 1], labels=['Low/Medium', 'High'])

# Show high-risk customers
high_risk_customers = df[df['Risk_Segment'] == 'High']
print("High-risk customers:\n", high_risk_customers[['CustomerID', 'Churn_Probability', 'Risk_Segment']])

# Retention strategy suggestion
print("\nRetention Strategy: Offer discounts or loyalty benefits to high-risk customers to reduce churn.")

# --- Deployment Example with Streamlit ---
# Save this as app.py and run: streamlit run app.py

"""
import streamlit as st
import pandas as pd
import pickle

# Load model and data
model = pickle.load(open('rf_model.pkl', 'rb'))
df = pd.read_csv('customer (2).csv')
categorical_cols = df.select_dtypes(include='object').columns.drop('Churn')

st.title("Customer Churn Prediction")

# User input
user_input = {}
for col in df.columns:
    if col in categorical_cols:
        user_input[col] = st.selectbox(col, df[col].unique())
    elif col not in ['CustomerID', 'Churn']:
        user_input[col] = st.number_input(col, float(df[col].min()), float(df[col].max()))

# Preprocess input
input_df = pd.DataFrame([user_input])
input_encoded = pd.get_dummies(input_df, columns=categorical_cols, drop_first=True)
input_encoded = input_encoded.reindex(columns=model.feature_names_in_, fill_value=0)

# Predict
if st.button('Predict Churn'):
    prob = model.predict_proba(input_encoded)[0, 1]
    st.write(f"Churn Probability: {prob:.2f}")
    if prob > 0.7:
        st.warning("High risk! Recommend retention offer.")
    else:
        st.success("Low/Medium risk.")
"""

High-risk customers:
      CustomerID  Churn_Probability Risk_Segment
0             1               1.00         High
1             2               1.00         High
2             3               1.00         High
3             4               1.00         High
4             5               1.00         High
..          ...                ...          ...
995         996               0.96         High
996         997               1.00         High
997         998               0.99         High
998         999               0.99         High
999        1000               1.00         High

[882 rows x 3 columns]

Retention Strategy: Offer discounts or loyalty benefits to high-risk customers to reduce churn.


