# Milestone 4: Second Model

## Imports

In [52]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report

## Data Loading

In [56]:
price_guide_tcg = pd.read_csv("price_guide_tcg_cleaned.csv", low_memory=False)
price_guide_tcg.dropna()
price_guide_tcg.describe()

Unnamed: 0,id,loose-price,graded-price,box-only-price,manual-only-price,bgs-10-price,condition-17-price,condition-18-price,sales-volume,hp
count,30300.0,30132.0,23242.0,23345.0,22502.0,22504.0,22653.0,22507.0,30300.0,25631.0
mean,1815578.0,9.113746,40.4734,50.029936,169.867231,260.75189,144.303368,101.864726,48.108482,96.110569
std,1610918.0,57.19715,203.630556,280.75307,1698.911993,2705.228337,641.144056,1019.301576,102.838685,52.140362
min,25228.0,0.01,0.14,0.99,0.98,0.99,1.0,1.0,1.0,10.0
25%,888708.8,1.34,12.06,13.0,34.91,52.0,44.0,21.0,5.0,60.0
50%,959665.5,1.99,16.065,18.0,42.72,65.0,50.0,26.0,12.0,80.0
75%,2254008.0,4.2925,29.8075,33.0,90.045,137.0,99.99,54.0,38.0,120.0
max,7747251.0,4223.26,17033.54,27550.0,221599.74,332400.0,42316.0,132960.0,1837.0,340.0


In [58]:
subset_df = price_guide_tcg[['rarity', 'types', 'generation', 'bgs-10-price', 'graded-price', 'hp', 'sales-volume']].copy()
print(subset_df.head())

         rarity          types generation  bgs-10-price  graded-price     hp  \
0      Uncommon            NaN      Sixth           NaN           NaN    NaN   
1      Uncommon            NaN      Sixth           NaN           NaN    NaN   
2      Uncommon            NaN      Sixth           NaN           NaN    NaN   
3      Uncommon            NaN      Sixth          58.0         15.11    NaN   
4  Rare Holo EX  ['Lightning']      Sixth          62.0         28.09  170.0   

   sales-volume  
0           5.0  
1           1.0  
2           1.0  
3           7.0  
4          81.0  


In [60]:
thresholds = {
    'types': 1000  ,        # Minimum count for 'types'
    'rarity': 1000  ,    # Minimum count for 'rarity'
    'generation':1500
}

In [62]:
for col, threshold in thresholds.items():
    counts = subset_df[col].value_counts()
    valid_categories = counts[counts >= threshold].index
    subset_df = subset_df[subset_df[col].isin(valid_categories)]

In [64]:
subset_df.shape

(18487, 7)

In [66]:
subset_df = subset_df.dropna()

In [68]:
subset_df.shape

(12759, 7)

In [70]:
subset_df.head()

Unnamed: 0,rarity,types,generation,bgs-10-price,graded-price,hp,sales-volume
9,Uncommon,['Grass'],Sixth,58.0,15.01,70.0,7.0
12,Common,['Psychic'],Sixth,36.0,9.28,60.0,29.0
13,Common,['Psychic'],Sixth,90.0,16.34,60.0,14.0
16,Uncommon,['Grass'],Sixth,58.0,23.0,120.0,24.0
17,Uncommon,['Grass'],Sixth,58.0,15.01,120.0,12.0


## Model 2

In [73]:
subset_df['rarity_grouped'] = subset_df['rarity'].map(
    lambda x: 'Common/Uncommon' if x in ['Common', 'Uncommon'] else 'Rare/Rare Holo'
)

In [75]:
subset_df['rarity_encoded'] = subset_df['rarity_grouped'].map(
    {'Common/Uncommon': 0, 'Rare/Rare Holo': 1}
)

In [77]:
encoder = OneHotEncoder()
types_encoded = encoder.fit_transform(subset_df[['types']]).toarray()
generation_encoded = encoder.fit_transform(subset_df[['generation']]).toarray()
numerical_features = ['bgs-10-price', 'graded-price', 'hp', 'sales-volume']

In [79]:
scaler = StandardScaler()
numerical_data = scaler.fit_transform(subset_df[numerical_features])

In [81]:
final_features = np.hstack([types_encoded, generation_encoded, numerical_data])
print(f"Final features shape: {final_features.shape}")

Final features shape: (12759, 21)


In [83]:
y = subset_df['rarity_encoded'].values
X_tensor = torch.tensor(final_features, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

In [85]:
dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [87]:
class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(64, 32)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.sigmoid(self.fc3(x))
        return x

In [89]:
input_size = final_features.shape[1]
model = NeuralNet(input_size)

criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [91]:
epochs = 50
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

In [92]:
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        predictions = (outputs > 0.5).float()
        y_true.extend(y_batch.numpy())
        y_pred.extend(predictions.numpy())

## Model 2 -- Evaluation

In [94]:
y_true = np.array(y_true).flatten()
y_pred = np.array(y_pred).flatten()


In [95]:
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Classification Report:\n", classification_report(y_true, y_pred))

Accuracy: 0.7855276907001045
Classification Report:
               precision    recall  f1-score   support

         0.0       0.79      0.88      0.83      2285
         1.0       0.78      0.65      0.71      1543

    accuracy                           0.79      3828
   macro avg       0.78      0.76      0.77      3828
weighted avg       0.79      0.79      0.78      3828

