In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import os

print("1. Downloading data directly from the internet...")


# We use a direct link to the raw CSV so file paths don't matter
url = "https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv"
df = pd.read_csv(url)

print("   Success! Data downloaded.")

# CLEANING & PREPARATION
# Save IDs
customer_ids = df['customerID']
df = df.drop(columns=['customerID'])

# Fix TotalCharges
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

# Map Churn to 0/1
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# One-Hot Encoding
df_final = pd.get_dummies(df)

# TRAIN MODEL
print("2. Training the Machine Learning Model...")
X = df_final.drop('Churn', axis=1)
y = df_final['Churn']
model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X, y)

#  GENERATE RISK SCORES
all_risk_scores = model.predict_proba(X)[:, 1]

# CREATE EXPORT FILE
export_df = df_final.copy()
export_df['Churn_Probability'] = all_risk_scores
# Align IDs correctly
export_df['CustomerID'] = customer_ids.loc[export_df.index]

# SAVE THE RESULT
print("3. Saving the final file...")
export_df.to_csv('Ready_For_PowerBI.csv', index=False)

print("\nSUCCESS! ===============================================")
print(f"I have saved the file 'Ready_For_PowerBI.csv' here:")
print(os.getcwd())
print("==========================================================")

1. Downloading data directly from the internet...
   Success! Data downloaded.
2. Training the Machine Learning Model...
3. Saving the final file...

I have saved the file 'Ready_For_PowerBI.csv' here:
C:\Users\Hey
