In [55]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np


In [56]:
# Check if MPS is available and set the device accordingly
device = 'cpu'
print(f'Using device: {device}')

Using device: cpu


In [57]:
# Sample data
# data = {
#     "acousticness": [0.8050, 0.9450, 0.7660, 0.8730, 0.8380],
#     "year": [1947, 1956, 1959, 1959, 1959],
#     "Weeks on Chart": [10, 8, 13, 12, 2],
#     "loudness": [-12.850, -10.064, -10.163, -8.596, -7.576],
#     "energy": [0.354, 0.302, 0.317, 0.626, 0.796],
#     "speechiness": [0.0369, 0.0290, 0.0309, 0.0465, 0.0675],
#     "danceability": [0.838, 0.308, 0.249, 0.467, 0.602],
#     "Average Previous Week Position": [56.0, 33.0, 45.0, 47.0, 4.0],
#     "Week Position": [135.0, 73.0, 118.0, 129.0, 13.0],
#     "duration_ms": [150160, 184933, 177893, 146893, 144442],
#     "tempo": [96.638, 67.086, 90.207, 83.814, 111.646],
#     "key": [1, 3, 9, 4, 9],
#     "liveness": [0.3010, 0.1790, 0.2920, 0.2530, 0.0784],
#     "instrumentalness": [0.000000, 0.023800, 0.000001, 0.000033, 0.002550],
#     "valence": [0.976, 0.214, 0.171, 0.880, 0.558],
#     "acousticness": [0.8050, 0.9450, 0.7660, 0.8730, 0.8380],
#     "popularity": [0, 7, 15, 21, 11]
# }

df = pd.read_csv('result.csv')

df

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,popularity,speechiness,tempo,valence,year,Song,Weeks on Chart,Average Previous Week Position,Week Position
0,0.8050,gene autry,0.838,150160,0.354,0.000000,1,0.3010,-12.850,0,0.0369,96.638,0.976,1947,here comes santa claus right down santa claus ...,10,56.0,135.0
1,0.9450,ray price,0.308,184933,0.302,0.023800,3,0.1790,-10.064,7,0.0290,67.086,0.214,1956,danny boy,8,33.0,73.0
2,0.7660,johnny mathis,0.249,177893,0.317,0.000001,9,0.2920,-10.163,15,0.0309,90.207,0.171,1959,someone,13,45.0,118.0
3,0.8730,conway twitty,0.467,146893,0.626,0.000033,4,0.2530,-8.596,21,0.0465,83.814,0.880,1959,mona lisa,12,47.0,129.0
4,0.8380,jimmy reed,0.602,144442,0.796,0.002550,9,0.0784,-7.576,11,0.0675,111.646,0.558,1959,down in virginia,2,4.0,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13654,0.2300,breland,0.883,158329,0.299,0.000008,2,0.1620,-8.382,68,0.1050,136.982,0.550,2020,my truck,2,8.0,5.0
13655,0.7600,giveon,0.640,260776,0.355,0.000070,10,0.1140,-7.757,68,0.0650,119.513,0.437,2020,like i want you,3,2.0,4.0
13656,0.1060,sean paul,0.951,218573,0.600,0.000000,0,0.0712,-4.675,1,0.0686,125.040,0.822,2020,temperature,31,85.0,180.0
13657,0.0236,lil uzi vert,0.775,234627,0.720,0.000000,11,0.1140,-5.353,66,0.1930,155.086,0.490,2020,p2,6,43.0,115.0


In [58]:
# Extract features and target
features = ["year", "Weeks on Chart", "loudness", "energy", "speechiness", "danceability", "Average Previous Week Position", "Week Position", "duration_ms", "tempo", "key", "liveness", "instrumentalness", "valence", "acousticness"]
X = df[features].values
y = df["popularity"].values

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [59]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(len(features), 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(0.3)
        
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.dropout2 = nn.Dropout(0.3)
        
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.dropout3 = nn.Dropout(0.3)
        
        self.fc4 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x

model = Net().to(device)

In [63]:
# Convert data to PyTorch tensors and move to device
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.01)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Training loop with evaluation
epochs = 1000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    # scheduler.step()
    
    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test_tensor)
            test_loss = criterion(test_outputs, y_test_tensor)
            print(f'Epoch [{epoch+1}/{epochs}], '
                  f'Training Loss: {loss.item():.4f}, '
                  f'Test Loss: {test_loss.item():.4f}')

Epoch [10/1000], Training Loss: 103.5156, Test Loss: 144.9700
Epoch [20/1000], Training Loss: 104.1945, Test Loss: 146.5417
Epoch [30/1000], Training Loss: 104.2439, Test Loss: 145.9766
Epoch [40/1000], Training Loss: 104.4535, Test Loss: 146.3252
Epoch [50/1000], Training Loss: 103.2500, Test Loss: 146.1093
Epoch [60/1000], Training Loss: 104.3159, Test Loss: 146.8744
Epoch [70/1000], Training Loss: 104.5884, Test Loss: 146.7084
Epoch [80/1000], Training Loss: 102.0526, Test Loss: 146.8077
Epoch [90/1000], Training Loss: 104.1805, Test Loss: 146.5468
Epoch [100/1000], Training Loss: 101.1379, Test Loss: 146.6470
Epoch [110/1000], Training Loss: 102.4756, Test Loss: 146.8343
Epoch [120/1000], Training Loss: 104.1645, Test Loss: 146.8743
Epoch [130/1000], Training Loss: 102.3819, Test Loss: 146.5654
Epoch [140/1000], Training Loss: 101.3460, Test Loss: 146.5269
Epoch [150/1000], Training Loss: 102.5660, Test Loss: 147.1671
Epoch [160/1000], Training Loss: 105.0394, Test Loss: 146.4534
E