In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
import numpy as np

# Check if MPS is available and set the device accordingly
device = 'cpu'
print(f'Using device: {device}')

df = pd.read_csv('result.csv')

# Add duration in minutes
df["duration_mins"] = df["duration_ms"] / 60000

# Define popularity levels
data = df.copy()
data.loc[((df.popularity >= 0) & (df.popularity <= 45)), "popularity_level"] = 0
data.loc[((df.popularity > 45) & (df.popularity <= 60)), "popularity_level"] = 1
data.loc[((df.popularity > 60) & (df.popularity <= 100)), "popularity_level"] = 2
data["popularity_level"] = data["popularity_level"].astype("int")
data['popularity_level'].value_counts()

print(data['popularity_level'].value_counts())


Using device: cpu
popularity_level
0    3356
1    3222
2    2938
Name: count, dtype: int64


In [46]:
# Define features and target
features = ["loudness", "energy", "speechiness", "danceability", "tempo", "key", "liveness", "instrumentalness", "valence", "acousticness", "duration_mins"]
target = "popularity_level"

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ('minmax', MinMaxScaler(), [
        'tempo', 'duration_mins', 'loudness', 
        'energy', 'speechiness', 'danceability', 'liveness', 
        'instrumentalness', 'valence',  
        'acousticness'
    ]),
    ('categorical', OneHotEncoder(), ['key'])
], remainder='passthrough')

# Creating a pipeline with make_pipeline
pipeline = make_pipeline(preprocessor)

# Fit and transform the data using the pipeline
X_train_transformed = pipeline.fit_transform(X_train, y_train)
X_test_transformed = pipeline.transform(X_test)

# Print shape to determine input size
print(X_train_transformed.shape)

class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.dropout1 = nn.Dropout(0.01)
        
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.dropout2 = nn.Dropout(0.01)
        
        self.fc3 = nn.Linear(32, 16)
        self.bn3 = nn.BatchNorm1d(16)
        self.dropout3 = nn.Dropout(0.01)
        
        self.fc4 = nn.Linear(16, 3)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x

# Initialize the model with the correct input size
input_size = X_train_transformed.shape[1]
model = Net(input_size).to(device)

# Convert data to PyTorch tensors and move to device
X_train_tensor = torch.tensor(X_train_transformed, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_transformed, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.03)



# Function to calculate accuracy
def calculate_accuracy(predictions, labels):
    _, predicted_classes = torch.max(predictions, 1)
    correct = (predicted_classes == labels).float()
    accuracy = correct.sum() / len(correct)
    return accuracy

# Training loop with evaluation
epochs = 1000
from tqdm import tqdm
progress_bar = tqdm(range(epochs), desc='Training')

for epoch in progress_bar:
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test_tensor)
            test_loss = criterion(test_outputs, y_test_tensor)

            # Calculate accuracy
            train_accuracy = calculate_accuracy(outputs, y_train_tensor)
            test_accuracy = calculate_accuracy(test_outputs, y_test_tensor)
            
            progress_bar.set_postfix({
                'Training Loss': f'{loss.item():.4f}',
                'Test Loss': f'{test_loss.item():.4f}',
                'Training Accuracy': f'{train_accuracy:.4f}',
                'Test Accuracy': f'{test_accuracy:.4f}'
            })

(7612, 22)


Training: 100%|██████████| 1000/1000 [00:09<00:00, 105.49it/s, Training Loss=0.4974, Test Loss=1.9728, Training Accuracy=0.8119, Test Accuracy=0.4632]
