In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score



In [2]:
# Reading the train and test datasets
train_data = pd.read_csv('https://raw.githubusercontent.com/dsrscientist/dataset5/main/termdeposit_train.csv')
test_data = pd.read_csv('https://raw.githubusercontent.com/dsrscientist/dataset5/main/termdeposit_test.csv')



In [3]:
# Encoding categorical variables using LabelEncoder
le = LabelEncoder()
for feature in train_data.columns:
    if train_data[feature].dtype == 'object':
        train_data[feature] = le.fit_transform(train_data[feature])
for feature in test_data.columns:
    if test_data[feature].dtype == 'object':
        test_data[feature] = le.fit_transform(test_data[feature])



In [4]:
# Separating features and target variable for training data
X = train_data.drop('subscribed', axis=1)
y = train_data['subscribed']



In [5]:
# Splitting the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:

# Creating the model and fitting the data
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)



In [7]:
# Predicting on the validation set and calculating accuracy
y_pred = model.predict(X_val)
acc = accuracy_score(y_val, y_pred)
print('Validation Accuracy:', acc)



Validation Accuracy: 0.9097946287519747


In [8]:
# Predicting on the test set and saving the results
test_pred = model.predict(test_data)
output = pd.DataFrame({'ID': test_data['ID'], 'subscribed': test_pred})
output.to_csv('termdeposit_pred.csv', index=False)
print('Predictions saved to termdeposit_pred.csv')


Predictions saved to termdeposit_pred.csv
