In [9]:
import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.datasets import make_moons, load_iris, load_diabetes

import matplotlib.pyplot as plt

import joblib
import torch
import pickle
import warnings

In [None]:
pd.options.mode.chained_assignment = None
warnings.simplefilter(action='ignore', category=FutureWarning)

with open("Amazon_products.pkl", 'rb') as f:
    dt = pickle.load(f)

In [31]:
dt

Unnamed: 0,asin,title,price,list_price,rating,reviews,sold_past_month,is_bestseller,is_prime,is_amazon_choice,has_sustainability_features,available_offers,amazon_choice_type,brand,free_delivery_date,fastest_delivery_date
0,B0DJK7NW1J,"15.6 Inch Laptops, Windows 11 Laptop Computers...",199.98,679.99,4.4 out of 5 stars,48,100+,False,False,False,False,,,,,
1,B0BS4BP8FB,Acer Aspire 3 A315-24P-R7VH Slim Laptop | 15.6...,279.99,321.99,4.4 out of 5 stars,39243,8K+,False,False,True,False,6,Overall Pick,,,
2,B0DKDY78K3,"Newest Gaming Laptop, Laptop with AMD Ryzen 7 ...",649.99,1699.99,4.8 out of 5 stars,18,100+,False,False,False,False,,,,,
3,B0CPL25J3W,"HP Portable Laptop, Student and Business, 14"" ...",197.35,269.00,4.1 out of 5 stars,1678,1K+,False,False,False,False,25,,,,
4,B0947BJ67M,"HP 14 Laptop, Intel Celeron N4020, 4 GB RAM, 6...",176.00,209.99,4.0 out of 5 stars,1861,4K+,False,False,False,False,50,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1237,B0B5GRGCX5,"Lenovo L15 Portable Monitor, 15.6” Display, Fu...",162.00,180.44,4.3 out of 5 stars,46,500+,False,False,False,False,28,,,,
1238,B0CH9XW8RK,ARZOPA Portable Monitor 15.6'' FHD 1080P - Ult...,85.99,,4.4 out of 5 stars,1092,3K+,False,True,False,False,4,,,"Tue, Nov 19",
1239,B0CY896H5G,"Laptop Screen Extender 14"" 1080P FHD IPS, Port...",299.99,,4.4 out of 5 stars,422,1K+,False,True,False,False,3,,,"Tue, Nov 19",
1240,B0C77WJ6F5,Yodoit Portable Monitor for Laptop 1920×1080 1...,49.99,69.99,4.0 out of 5 stars,416,1K+,False,True,False,False,,,,"Tue, Nov 19",


In [11]:
all_dt = dt[["price", "list_price", "rating", "sold_past_month", "reviews", "is_bestseller", "is_prime", "is_amazon_choice"]]

In [13]:
def str_to_float(s):
    if isinstance(s, str):
        s = float("".join(s.split(',')))
    return s

def stars_to_rating(s):
    if isinstance(s, str):
        r = float(s.split()[0])
        return int(r >= 4.5)
    return s

def spm(s):
    if isinstance(s, str):
        if s[-2] == "K":
            return int(s[:-2]) * 1000
        return int(s[:-1])
    return s

In [16]:
all_dt.dropna(inplace=True, subset=['rating'])

In [18]:

def upd_dt(X):
    X["price"] = X["price"].apply(str_to_float)
    X["rating"] = X['rating'].apply(stars_to_rating)
    X['sold_past_month'] = X['sold_past_month'].apply(spm)
    return X

all_dt = upd_dt(all_dt)
all_dt

Unnamed: 0,price,rating,sold_past_month
0,199.98,0,100.0
1,279.99,0,8000.0
2,649.99,1,100.0
3,197.35,0,1000.0
4,176.00,0,4000.0
...,...,...,...
1237,162.00,0,500.0
1238,85.99,0,3000.0
1239,299.99,0,1000.0
1240,49.99,0,1000.0


In [20]:
all_dt

Unnamed: 0,price,rating,sold_past_month
0,199.98,0,100.0
1,279.99,0,8000.0
2,649.99,1,100.0
3,197.35,0,1000.0
4,176.00,0,4000.0
...,...,...,...
1237,162.00,0,500.0
1238,85.99,0,3000.0
1239,299.99,0,1000.0
1240,49.99,0,1000.0


In [19]:
mean_price = all_dt['price'].mean()
mean_spm = all_dt["sold_past_month"].mean()

all_dt['price'].fillna(mean_price, inplace=True)
# all_dt["rating"].fillna(mean_rating, inplace=True)
all_dt["sold_past_month"].fillna(mean_spm, inplace=True)

In [21]:
X = torch.Tensor(all_dt[["price", "sold_past_month"]].to_numpy())
y = torch.Tensor(all_dt["rating"].to_numpy())

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [27]:
class OurNonLinearNet(torch.nn.Module):
    loss_fun = torch.nn.BCEWithLogitsLoss()
    def __init__(self, input_features, out_features, hidden_units):
        super().__init__()
        self.fc1 = torch.nn.Linear(input_features, hidden_units)
        self.relu1 = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(hidden_units, hidden_units)
        self.relu2 = torch.nn.ReLU()
        self.fc3 = torch.nn.Linear(hidden_units, out_features)
        self.optimizer = torch.optim.SGD(self.parameters(), lr=0.005)


    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return x


    def fit(self, X, y, n_epochs=500, print_loss=True):
        for epoch in range(n_epochs+1):

            self.train()
            y_logits = self(X).squeeze()
            loss = self.loss_fun(y_logits, y)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            if print_loss and (epoch % 20 == 0):
                print(f"Epoch {epoch}: loss = {loss:.5f}")

    def predict(self, x):
        return torch.round(torch.sigmoid(self.forward(x)))


    def predict_from_logit(self, x_logit):
        return torch.round(torch.sigmoid(x_logit))

In [28]:
net = OurNonLinearNet(2, 1, 800)

In [79]:
def learn_process(model, optimizer, n_epochs, X_train, y_train):
    for epoch in range(n_epochs+1):

        model.train()

        y_logits = model(X_train).squeeze()
#         print(y_logits)
        print(y_train)
        break
        loss = loss_fun(y_logits, y_train)


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 20 == 0:
            print(f"Epoch {epoch}: loss = {loss:.5f}")

In [29]:
net.fit(X_train, y_train)

Epoch 0: loss = 13.83124
Epoch 20: loss = 2.36554
Epoch 40: loss = 2.38020
Epoch 60: loss = 2.37994
Epoch 80: loss = 2.37960
Epoch 100: loss = 2.37927
Epoch 120: loss = 2.37894
Epoch 140: loss = 2.37861
Epoch 160: loss = 2.37828
Epoch 180: loss = 2.37795
Epoch 200: loss = 2.37762
Epoch 220: loss = 2.37729
Epoch 240: loss = 2.37696
Epoch 260: loss = 2.37664
Epoch 280: loss = 2.37631
Epoch 300: loss = 2.37598
Epoch 320: loss = 2.37565
Epoch 340: loss = 2.37532
Epoch 360: loss = 2.37500
Epoch 380: loss = 2.37467
Epoch 400: loss = 2.37434
Epoch 420: loss = 2.37401
Epoch 440: loss = 2.37368
Epoch 460: loss = 2.37336
Epoch 480: loss = 2.37303
Epoch 500: loss = 2.37270


tensor([1., 1., 1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0.,
        1., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1.,
        1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1.,
        0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 1.,
        1., 0., 0., 1., 0., 0., 0., 1., 0., 1., nan, 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0.,
        0., 0., 1., 0., 1., 0., nan, 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0.,
        1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1.,
        1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0.,
        1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1.,
        1., 0., 1., nan, 1., 0., 1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 1.,
        1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1.,
        0., 1., 0., 1., 0., 0., 0., 1

In [30]:
print(metrics.accuracy_score(y_test.detach().numpy(), net.predict(X_test).detach().numpy()))

0.45098039215686275
