In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer

In [35]:
# Load your regression dataset
data = pd.read_csv('/content/Customer-Churn.csv')

In [36]:
# Data preprocessing (modify as per your dataset)
data = data.drop("customerID", axis=1)  # Drop customerID as it's not relevant for regression

In [37]:
# Handle non-numeric values in the 'TotalCharges' column
data["TotalCharges"] = pd.to_numeric(data["TotalCharges"], errors="coerce")  # Convert TotalCharges to numeric

In [38]:
# Convert the target variable 'Churn' to binary (1 for 'Yes' and 0 for 'No')
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})

In [39]:
# Encode specified categorical variables using one-hot encoding
categorical_cols = ["gender", "Partner", "Dependents", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod"]

data = pd.get_dummies(data, columns=categorical_cols, drop_first=True)

In [40]:
# Handle missing values with SimpleImputer (using 'most_frequent' strategy)
imputer = SimpleImputer(strategy='most_frequent')
data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

In [41]:
# Split the data into training and testing sets
X = data.drop('Churn', axis=1)  # Features
y = data['Churn']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
# Initialize and train the Random Forest Regression model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [47]:
# Make predictions on the test data
y_pred = rf_model.predict(X_test)
print(y_pred)

[0.71 0.18 0.02 ... 0.04 0.   0.42]


In [44]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [45]:
print(f"Mean Squared Error: {mse}")
print(f"R-squared (R2) Score: {r2}")

Mean Squared Error: 0.14479682041005426
R-squared (R2) Score: 0.2561036559967371
