In [13]:
import pandas as pd
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Read the Excel file into a Pandas DataFrame.
df = pd.read_csv("people_wealth_status.csv")

# Convert the categorical features to numeric values.
x_encoder = LabelEncoder()
y_encoder = LabelEncoder()
df["race_encoded"] = x_encoder.fit_transform(df["race"])
df["wealth_status_encoded"] = y_encoder.fit_transform(df["wealth_status"])

# Split the data into features (X) and target variable (y).
X = df[["race_encoded"]]
y = df["wealth_status_encoded"]

# Initialize the LightGBM dataset.
lgb_data = lgb.Dataset(X, label=y)

# Set the parameters for the LightGBM model.
params = {
    "objective": "multiclass",
    "num_classes": len(y_encoder.classes_)
}

# Train the LightGBM model.
model = lgb.train(params, lgb_data, num_boost_round=100)

# Make predictions on the training data.
y_pred = model.predict(X)
y_pred_labels = np.argmax(y_pred, axis=1)  # Obtain the index of the maximum predicted value for each sample

# Decode the labels back to original values.
predicted_wealth_status = y_encoder.inverse_transform(y_pred_labels)

# Evaluate the model's accuracy.
accuracy = (predicted_wealth_status == df["wealth_status"]).mean()
print(f"Accuracy: {accuracy}")


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2
[LightGBM] [Info] Number of data points in the train set: 43, number of used features: 1
[LightGBM] [Info] Start training from score -0.670158
[LightGBM] [Info] Start training from score -0.716678
Accuracy: 1.0


In [14]:
def predict_wealth_status(race):
    # Check if the race value is present in the encoder's classes.

    # Encode the race value.
    race_to_predict_encoded = x_encoder.transform([race])
    print(race_to_predict_encoded)

    # Make a single prediction.
    prediction = model.predict(pd.DataFrame({"race_encoded": race_to_predict_encoded}))
    print(prediction)

    # Decode the predicted label.
    predicted_wealth_status_encoded = prediction.argmax()
    print(predicted_wealth_status_encoded)
    predicted_wealth_status = y_encoder.inverse_transform([predicted_wealth_status_encoded])
    print(predicted_wealth_status)

    return predicted_wealth_status

In [15]:
# Test the predict_wealth_status function.
race = "indian"
wealth_status = predict_wealth_status(race)
print(f"Wealth status for {race}: {wealth_status}")

[1]
[[9.99978582e-01 2.14183119e-05]]
0
['b40']
Wealth status for indian: ['b40']


In [16]:
model.save_model("wealth_status_model.txt")

<lightgbm.basic.Booster at 0x271dd33da50>

In [4]:
new_encoder = LabelEncoder()
new_encoder.fit_transform(["malay", "chinese", "indian", "others"])

array([2, 0, 1, 3], dtype=int64)

In [66]:
new_encoder.transform(['chinese'])

array([0])

In [67]:
new_encoder.inverse_transform([0, 0, 1])

array(['chinese', 'chinese', 'indian'], dtype='<U7')

In [46]:
import lightgbm as lgb

# Create a LightGBM dataset from the training data
train_data = lgb.Dataset(X_train, label=y_train)

# Set the parameters for the LightGBM model
params = {
    'objective': 'binary',  # Use binary classification as the objective
    'metric': 'binary_logloss'  # Evaluation metric
}

# Train the LightGBM model
model = lgb.train(params, train_data)


[LightGBM] [Info] Number of positive: 2, number of negative: 2
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 4, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


In [47]:
from sklearn.metrics import accuracy_score

# Make predictions on the testing set
y_pred = model.predict(X_test)
y_pred = [round(value) for value in y_pred]  # Convert predicted probabilities to binary values

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")




Accuracy: 0.5


In [60]:
print(X_test)

X_pred = pd.DataFrame({'race_encoded': [1]})
Y_out = model.predict(X_pred)
Y_out = [round(value) for value in Y_out]
print(Y_out)


   race_encoded
0             1
1             0
[0]
