# **Import Library**

In [None]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier

## **Data Preparation**

In [None]:
# Load the data into a DataFrame
file_path = Path("data.csv")
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [None]:
# Remove songs created before 1960
year_limit=1960
data_1960_df = data[data['year']>year_limit]
data_1960_df.describe()

Unnamed: 0,valence,year,acousticness,danceability,duration_ms,energy,explicit,instrumentalness,key,liveness,loudness,mode,popularity,speechiness,tempo
count,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0,118900.0
mean,0.538535,1990.49032,0.35155,0.554089,238614.2,0.57305,0.100168,0.110133,5.214449,0.201429,-9.940915,0.707881,42.517502,0.075705,119.842238
std,0.260172,17.291647,0.326737,0.174301,111621.3,0.248501,0.300225,0.258921,3.534181,0.178627,5.075899,0.454739,15.320393,0.090851,30.023464
min,0.0,1961.0,0.0,0.0,14708.0,0.0,0.0,0.0,0.0,0.0,-60.0,0.0,0.0,0.0,0.0
25%,0.331,1976.0,0.0427,0.437,180960.0,0.392,0.0,0.0,2.0,0.0938,-12.557,0.0,31.0,0.0335,96.555
50%,0.546,1990.0,0.25,0.563,222320.0,0.59,0.0,6.2e-05,5.0,0.129,-9.01,1.0,42.0,0.0431,117.8555
75%,0.756,2005.0,0.64,0.68,271667.0,0.777,0.0,0.0145,8.0,0.254,-6.26,1.0,53.0,0.0706,138.51325
max,1.0,2020.0,0.996,0.988,5403500.0,1.0,1.0,1.0,11.0,1.0,3.744,1.0,100.0,0.964,243.507


In [None]:
# Use only essential columns
cleanData = data_1960_df[['valence', 'year', 'acousticness', 'danceability',
       'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mode', 
       'speechiness', 'tempo', 'popularity']]

In [None]:
# Split data into X and y
X = cleanData.iloc[:, 0:14].values
y = cleanData.iloc[:, 14].values

In [None]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Fitting Standard Scaller
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)

In [None]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## **Random Forest**

In [None]:
# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=78)

In [None]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)


In [None]:
# Making predictions using the testing data
predictions_rf = rf_model.predict(X_test_scaled)

In [None]:
# Calculating the accuracy score
acc_score_rf = accuracy_score(y_test, predictions_rf)

In [None]:
print(f"Accuracy Score : {acc_score_rf}")

Accuracy Score : 0.04827586206896552


## **Decision Trees**

In [None]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

In [None]:
# Fitting the model
model = model.fit(X_train_scaled, y_train)

In [None]:
# Making predictions using the testing data
predictions_dt = model.predict(X_test_scaled)

In [None]:
# Calculating the accuracy score
acc_score_dt = accuracy_score(y_test, predictions_dt)

In [None]:
# Displaying results
print(f"Accuracy Score : {acc_score_dt}")

Accuracy Score : 0.03875525651808242


## **Gradient Boosting**

In [None]:
# Create Gradient Boosting Classifier

classifier = GradientBoostingClassifier(n_estimators=20,
                                            learning_rate=0.5,
                                            max_features=5,
                                            max_depth=3,
                                            random_state=0)
classifier.fit(X_train_scaled, y_train.ravel())


GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.5, loss='deviance', max_depth=3,
                           max_features=5, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=20,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=0, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [None]:
# Score the model
acc_score_gbc = classifier.score(
        X_test_scaled,
        y_test)

## **Deep Learning Neural Networks**

In [None]:
nn = Sequential()

# Hidden layer
nn.add(Dense(units=64, input_dim=14, activation="relu"))

# Second hidden layer
nn.add(Dense(units=32, activation="relu"))

# third hidden layer
nn.add(Dense(units=16, activation="relu"))

# fouth hidden layer
nn.add(Dense(units=8, activation="relu"))

# Output layer
nn.add(Dense(units=1, activation="linear"))

In [None]:
# Compile the model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

In [None]:
# Fit the model
model_1 = nn.fit(X_train_scaled, y_train, validation_split=0.3, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
predictions_deep = nn.predict(X_test_scaled)

In [None]:
predictions_deep = predictions_deep.astype(int)

In [None]:
acc_score_deep = accuracy_score(y_test, predictions_deep)

In [None]:
acc_score_deep

0.04063919259882254

## **Compare Models**

In [None]:
#Model Accuracy Score
print(f"Accuracy Score for Deeplearning Model: {acc_score_deep}")
print(f"Accuracy Score for Gradient Boosting Model: {acc_score_gbc}")
print(f"Accuracy Score for Decision Tree Model: {acc_score_dt}")
print(f"Accuracy Score for Random Forest Model: {acc_score_rf}")

Accuracy Score for Deeplearning Model: 0.04063919259882254
Accuracy Score for Gradient Boosting Model: 0.008847771236333053
Accuracy Score for Decision Tree Model: 0.03875525651808242
Accuracy Score for Random Forest Model: 0.04827586206896552
