In [14]:
import numpy as np
import pandas as pd

# Seed for reproducibility
np.random.seed(42)

# Larger dataset (e.g., 1000 samples)
n_samples = 100000

# Features
age = np.random.randint(18, 70, n_samples)
time_on_site = np.random.normal(5, 1.5, n_samples)
income = np.random.randint(20000, 120000, n_samples)

# Improved logic for target generation
clicked_ad = (
    (time_on_site * 0.6) + (age * 0.04) + (income / 50000) + np.random.randn(n_samples) * 0.3
) > 5.5  # clearly adjusted threshold for clearer separation
clicked_ad = clicked_ad.astype(int)

# DataFrame
df = pd.DataFrame({
    'Age': age,
    'DailyTimeOnSite': time_on_site,
    'Income': income,
    'ClickedAd': clicked_ad
})

# Save larger dataset
df.to_csv('ad_clicks_large.csv', index=False)
print("Large synthetic dataset created and saved!")


Large synthetic dataset created and saved!


In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load dataset
df = pd.read_csv('ad_clicks.csv')

# Features & target
X = df[['Age', 'DailyTimeOnSite', 'Income']]
y = df['ClickedAd']

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Explicitly set multi_class='ovr'
model = LogisticRegression(max_iter=1000, solver='lbfgs', multi_class='ovr')
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, predictions))

# Save explicitly trained model
joblib.dump(model, 'logistic_model_ovr.pkl')
print("Explicitly trained model saved successfully!")


Accuracy: 0.72
              precision    recall  f1-score   support

           0       0.73      0.76      0.74        21
           1       0.72      0.68      0.70        19

    accuracy                           0.73        40
   macro avg       0.72      0.72      0.72        40
weighted avg       0.72      0.72      0.72        40

Explicitly trained model saved successfully!


In [20]:
import joblib
import coremltools as ct

# Load the saved logistic regression model
model = joblib.load('logistic_model_ovr.pkl')

# Define the input feature names explicitly
input_features = ['Age', 'DailyTimeOnSite', 'Income']

# Clearly define the output feature name and numeric labels
output_feature_name = 'ClickedAd'
class_labels = [0, 1]  # Numeric labels clearly matching trained model

# Convert model to CoreML format
coreml_model = ct.converters.sklearn.convert(
    model,
    input_features=input_features,
    output_feature_names=output_feature_name
)

# Save CoreML model
coreml_model.save('AdClickPredictor.mlmodel')
print("✅ CoreML model created and saved successfully!")


✅ CoreML model created and saved successfully!
