In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
df = pd.read_csv("leads_data.csv")

# Encode categorical variables
label_encoders = {}
categorical_columns = ["email_domain", "country", "job_title", "industry", "purchase_history"]
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Define feature columns and target variable
X = df.drop("purchase_history", axis=1)  # Assuming 'purchase_history' is the target variable
y = df["purchase_history"]

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, y_train)

# Make predictions
y_pred = log_reg_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Regressão Logística - Precisão: {accuracy}")
print("Regressão Logística - Relatório de Classificação:")
print(report)
