In [8]:
pip install scikit-learn==1.3.2



In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load the CSV file into a DataFrame
data = pd.read_csv('/content/latest.csv')

# Feature engineering - creating a new column for transaction type
data['transaction_type'] = data.apply(lambda row: 1 if row['credit_amount'] != "0" else 0, axis=1)
label_encoder = LabelEncoder()
data['transaction_type'] = label_encoder.fit_transform(data['transaction_type'])

data['date'] = label_encoder.fit_transform(data['date'])

# Define features and target variable
features = ['account_no', 'credit_amount', 'debit_amount', 'transaction_type']
target = 'result'

# Split the data into training and testing sets
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

print("started fitting model")
# Train a RandomForestClassifier (you can choose other models as well)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))



started fitting model
Accuracy: 0.6394509525499941
              precision    recall  f1-score   support

  Successful       0.70      0.85      0.77     65068
Unsuccessful       0.30      0.15      0.20     27893

    accuracy                           0.64     92961
   macro avg       0.50      0.50      0.48     92961
weighted avg       0.58      0.64      0.60     92961



In [10]:
import json
from sklearn.linear_model import LogisticRegression
import sklearn.externals as extjoblib
import joblib
import pickle

pickle.dump(model, open('model3.pkl', 'wb'))


In [12]:
new_input = pd.DataFrame({
    'account_no': [409000611074],
    'credit_amount': [0],
    'debit_amount': [9],
    'transaction_type': 1
})

probability = model.predict_proba(new_input)
print("Probability of being successful/unsuccessful:", probability)

Probability of being successful/unsuccessful: [[0.25720238 0.74279762]]
