# Importing dependencies

In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,StandardScaler, PowerTransformer

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import precision_score, accuracy_score

## Load the data

In [None]:
df = pd.read_csv('consumer_electronics_sales_data.csv')
df.head(3)

### PreProcessing

In [3]:
df = df.drop('ProductID', axis = 1)

# Train Test Split

In [5]:
x = df.drop('PurchaseIntent', axis = 1)
y = df['PurchaseIntent']

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

# Column Transformer

In [21]:
clf = ColumnTransformer([
    ('one_hot', OneHotEncoder(sparse_output=False, drop='first'),[0,1]),
    ('standardisation', StandardScaler(), [3,4,5,6]),
    ('Distribution', PowerTransformer(method = 'box-cox', standardize=True), [2])
], remainder='passthrough')

# Pipeline

In [69]:
pipe = Pipeline([
    ('transformation', clf),
    ('model', LogisticRegression(solver='newton-cg',max_iter=800, penalty='l2', random_state=42, verbose=2))
])

pipe.fit(x_train,y_train)

## Saving Pipeline in HTML

In [None]:
from sklearn.utils import estimator_html_repr

estimator_html_repr(pipe)

In [94]:
with open("ppeline.html", "w", encoding='utf-8') as f:
    f.write(estimator_html_repr(pipe))

In [70]:
pred = pipe.predict(x_test)

# Metrics

In [71]:
accuracy_score(y_test, pred)

0.8522222222222222

In [73]:
precision_score(y_test, pred)

0.8559077809798271

# Saving the Model

In [79]:
import pickle

In [82]:
pickle.dump(df,open('df.pkl','wb'))
pickle.dump(pipe,open('pipe.pkl','wb'))