# **Nesterov Accelerated Gradient (NAG)**

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import tensorflow
import keras
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df = pd.read_csv('extended_data.csv')
df.head()

Unnamed: 0,model_year,brand,model,type,miles_per_gallon,premium_version,msrp,collection_car
0,2016,Toyota,Land Cruiser Base,SUV,13.0,1,84900.0,0
1,2014,RAM,ProMaster 2500 Window Van High Roof,Van,15.0,0,35000.0,0
2,2002,Ford,Mustang GT,Coupe,16.0,0,26250.0,0
3,2012,BMW,428 Gran Coupe i xDrive,Sedan,27.0,1,45000.0,0
4,2008,Mercedes-Benz,SL-Class SL500 Roadster,Convertible,18.0,1,100000.0,1


In [3]:
df.shape

(28143, 8)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28143 entries, 0 to 28142
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   model_year        28143 non-null  int64  
 1   brand             28143 non-null  object 
 2   model             28143 non-null  object 
 3   type              28143 non-null  object 
 4   miles_per_gallon  28126 non-null  float64
 5   premium_version   28143 non-null  int64  
 6   msrp              28126 non-null  float64
 7   collection_car    28143 non-null  int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 1.7+ MB


In [5]:
df.isnull().sum()

model_year           0
brand                0
model                0
type                 0
miles_per_gallon    17
premium_version      0
msrp                17
collection_car       0
dtype: int64

In [6]:
df['collection_car'].value_counts()

collection_car
0    24279
1     3864
Name: count, dtype: int64

In [7]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['collection_car']),
                                                    df['collection_car'],
                                                    test_size=0.2,
                                                    random_state=42)

In [8]:
numerical_columns = [0, 4, 5, 6]
categorical_columns = [1, 2, 3]

In [9]:
handle_numerical = Pipeline(steps=[
    ('impute', KNNImputer(n_neighbors=11)),
    ('scale', MinMaxScaler())
])

In [10]:
handle_categorical = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encode', OneHotEncoder(drop='first', handle_unknown='ignore'))
])

In [11]:
preprocessing = ColumnTransformer(transformers=[
    ('numerical', handle_numerical, numerical_columns),
    ('categorical', handle_categorical, categorical_columns)
])

In [12]:
X_train = preprocessing.fit_transform(X_train)
X_test = preprocessing.transform(X_test)

In [13]:
model = Sequential()

model.add(Dense(X_train.shape[1], activation='relu'))
for i in range(1, 21):
    model.add(Dense(int(X_train.shape[1] / i), activation='relu'))

model.add(Dense(1, activation='sigmoid'))

In [15]:
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)

model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
model.fit(X_train, y_train, epochs=100, batch_size=X_train.shape[0], validation_data=(X_test, y_test))

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 46s/step - accuracy: 0.8633 - loss: 0.0000e+00 - val_accuracy: 0.8602 - val_loss: 0.0000e+00
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17s/step - accuracy: 0.8633 - loss: 0.0000e+00 - val_accuracy: 0.8602 - val_loss: 0.0000e+00
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step - accuracy: 0.8633 - loss: 0.0000e+00 - val_accuracy: 0.8602 - val_loss: 0.0000e+00
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16s/step - accuracy: 0.8633 - loss: 0.0000e+00 - val_accuracy: 0.8602 - val_loss: 0.0000e+00
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17s/step - accuracy: 0.8633 - loss: 0.0000e+00 - val_accuracy: 0.8602 - val_loss: 0.0000e+00
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20s/step - accuracy: 0.8633 - loss: 0.0000e+00 - val_accuracy: 0.8602 - val_loss: 0.0

<keras.src.callbacks.history.History at 0x1f8b8f1a420>

In [95]:
y_pred = model.predict(X_test)

[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step


In [96]:
y_pred = (y_pred > 0.5).astype(int)

In [97]:
accuracy_score(y_test, y_pred)

0.8601883105347309

In [98]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      1.00      0.92      4842
           1       0.00      0.00      0.00       787

    accuracy                           0.86      5629
   macro avg       0.43      0.50      0.46      5629
weighted avg       0.74      0.86      0.80      5629

