In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score

# Load the data
df_spotify = pd.read_csv("SpotifyFeatures.csv")
X = df_spotify[['acousticness', 'danceability', 'duration_ms', 'energy', 'tempo']]
y = df_spotify['popularity']

# Transform the labels into a binary classification problem
y_binary = y.apply(lambda x: 1 if x > 74 else 0)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [4]:
# Define the base models
model_1 = RandomForestRegressor(n_estimators=100, random_state=42)
model_2 = LinearRegression()

# Fit the base models to the training data
model_1.fit(X_train, y_train)
model_2.fit(X_train, y_train)

# Make predictions with the base models on the test data
y_pred_1 = model_1.predict(X_test)
y_pred_2 = model_2.predict(X_test)

# Print the accuracy score of the models
rf_mae = mean_absolute_error(y_test, y_pred_1)
print("RF MAE:", rf_mae)
lr_mae = mean_absolute_error(y_test, y_pred_2)
print("LR MAE:", lr_mae)

RF MAE: 0.017286919939610883
LR MAE: 0.03204867679350833


In [3]:
# Concatenate the predictions from the base models and use them as input to the meta-model
X_meta = np.concatenate([y_pred_1.reshape(-1, 1), y_pred_2.reshape(-1, 1)], axis=1)

# Define and fit the meta-model
meta_model = LinearRegression()
meta_model.fit(X_meta, y_test)

# Make final predictions with the meta-model
y_pred = meta_model.predict(X_meta)

# Convert the predictions to binary labels (0 or 1)
y_pred_binary = y_pred.round().astype(int)

In [4]:
# Compute and print the accuracy score
accuracy = accuracy_score(y_test, y_pred_binary)

print("Accuracy:", accuracy)

Accuracy: 0.9954237834353851
