In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


# Data Loading 

In [30]:
# Load datasets
train_data_url = "https://raw.githubusercontent.com/FlipRoboTechnologies/ML-Datasets/main/Bank%20Marketing/termdeposit_train.csv"
test_data_url = "https://raw.githubusercontent.com/FlipRoboTechnologies/ML-Datasets/main/Bank%20Marketing/termdeposit_test.csv"
train_data = pd.read_csv(train_data_url)
test_data = pd.read_csv(test_data_url)


# Data preprocessing

In [31]:
# Data preprocessing
X_train = train_data.drop(columns=["ID", "subscribed"])
y_train = train_data["subscribed"]
X_test = test_data.drop(columns=["ID"])

# Encode categorical variables
categorical_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ],
    remainder="passthrough"
)

# Random Forest Classifier 

In [32]:
# Model training
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier())
])

# Fit the model
model.fit(X_train, y_train)

# Make predictions on test data
test_predictions = model.predict(X_test)

# Save predictions to a file
test_data["subscribed"] = test_predictions
test_data.to_csv("predicted_test_data.csv", index=False)

# Predictions for Subscribtions

In [33]:
# Create a DataFrame with IDs and predictions
predictions_df = pd.DataFrame({'ID': test_data['ID'], 'Predicted_Subscribed': test_predictions})

# Print the DataFrame
print("Predictions for test data:")
print(predictions_df)


Predictions for test data:
          ID Predicted_Subscribed
0      38441                   no
1      40403                   no
2       3709                   no
3      37422                   no
4      12527                   no
...      ...                  ...
13559  23465                   no
13560  11743                   no
13561  28292                   no
13562  45163                  yes
13563  34839                   no

[13564 rows x 2 columns]
