In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

# Load data
data = pd.read_csv("DATA/data_commerce.csv")

# Encode target variable
data['Status'] = data['Status'].map({'Delivered': 1, 'Cancelled': 0, 'Pending': 0})

# Convert dates to datetime
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
data['RegistrationDate'] = pd.to_datetime(data['RegistrationDate'])

# Feature engineering: Days since registration
data['DaysSinceRegistration'] = (data['InvoiceDate'] - data['RegistrationDate']).dt.days

# Select features and target
features = ['Quantity', 'UnitPrice', 'Age', 'Stock', 'Rating', 'DaysSinceRegistration']
X = data[features]
y = data['Status']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numeric features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

# Evaluate model
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_proba))


Classification Report:
              precision    recall  f1-score   support

           0       0.67      1.00      0.80     72164
           1       0.00      0.00      0.00     36218

    accuracy                           0.67    108382
   macro avg       0.33      0.50      0.40    108382
weighted avg       0.44      0.67      0.53    108382

ROC-AUC Score: 0.49954599909375585


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# Load data
data = pd.read_csv("DATA/data_commerce.csv")

# Encode target variable
data['Status'] = data['Status'].map({'Delivered': 1, 'Cancelled': 0, 'Pending': 0})

# Convert dates to datetime
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
data['RegistrationDate'] = pd.to_datetime(data['RegistrationDate'])

# Feature engineering: Days since registration
data['DaysSinceRegistration'] = (data['InvoiceDate'] - data['RegistrationDate']).dt.days

# Select features and target
features = ['Quantity', 'UnitPrice', 'Age', 'Stock', 'Rating', 'DaysSinceRegistration']
X = data[features]
y = data['Status']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numeric features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Save the model and scaler
joblib.dump(model, "logistic_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Model trained and saved!")


Model trained and saved!


In [8]:
import joblib
import numpy as np

# Load the model and scaler
model = joblib.load("logistic_model.pkl")
scaler = joblib.load("scaler.pkl")

# User input
print("Enter the details below:")
quantity = float(input("Quantity: "))
unit_price = float(input("Unit Price: "))
age = int(input("Customer Age: "))
stock = int(input("Stock Available: "))
rating = float(input("Product Rating: "))
days_since_registration = int(input("Days Since Registration: "))

# Prepare input for prediction
input_data = np.array([[quantity, unit_price, age, stock, rating, days_since_registration]])
input_scaled = scaler.transform(input_data)

# Predict
probability = model.predict_proba(input_scaled)[0][1]  # Probability of purchase
prediction = model.predict(input_scaled)[0]  # Binary prediction (0 or 1)

# Output result
if prediction == 1:
    print(f"The customer is likely to purchase the product with a probability of {probability:.2f}.")
else:
    print(f"The customer is unlikely to purchase the product with a probability of {probability:.2f}.")


Enter the details below:
The customer is unlikely to purchase the product with a probability of 0.32.




###THE THIRD TRY

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

# Load data
data = pd.read_csv("DATA/data_commerce.csv")

# Convert dates to datetime
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
data['RegistrationDate'] = pd.to_datetime(data['RegistrationDate'])

# Feature engineering: Days since registration
data['DaysSinceRegistration'] = (data['InvoiceDate'] - data['RegistrationDate']).dt.days

# Select features and target (Stock needed in the future)
features = ['Quantity', 'UnitPrice', 'Age', 'Stock', 'Rating', 'DaysSinceRegistration']
target = 'Stock'

X = data[features]
y = data[target]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale numeric features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train regression model
model = RandomForestRegressor()
model.fit(X_train, y_train)

# Save the model and scaler
joblib.dump(model, "stock_prediction_model.pkl")
joblib.dump(scaler, "scaler2.pkl")

# Evaluate the model
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f"Model trained! Mean Absolute Error: {mae}")


Model trained! Mean Absolute Error: 0.0


In [2]:
import joblib
import numpy as np

# Load the model and scaler
model = joblib.load("stock_prediction_model.pkl")
scaler = joblib.load("scaler.pkl")

# User input
print("Enter the details below:")
quantity = float(input("Quantity sold: "))
unit_price = float(input("Unit Price: "))
age = int(input("Customer Age: "))
stock = int(input("Current Stock: "))
rating = float(input("Product Rating: "))
days_since_registration = int(input("Days Since Registration: "))

# Prepare input for prediction
input_data = np.array([[quantity, unit_price, age, stock, rating, days_since_registration]])
input_scaled = scaler.transform(input_data)

# Predict
predicted_stock = model.predict(input_scaled)[0]

# Output result
print(f"The predicted stock needed for the future is: {predicted_stock:.2f} units.")


Enter the details below:


The predicted stock needed for the future is: 10.00 units.


