## Input Dataset

In [3]:
import pandas as pd

In [4]:
sad_df = pd.read_csv('../InputData/sad.csv')
happy_df = pd.read_csv('../InputData/happy.csv')
workout_df = pd.read_csv('../InputData/workout.csv')

### 0 = workout, 1 = sad, 2 = happy.

In [5]:
mood_sad = ['Sad']*sad_df.shape[0]
mood_happy = ['Happy']*happy_df.shape[0]
mood_workout = ['workout']*workout_df.shape[0]
sad_df['Moods'] = mood_sad
happy_df['Moods'] = mood_happy
workout_df['Moods'] =mood_workout

In [6]:
test_df = pd.concat([sad_df,happy_df,workout_df])
print(test_df.shape)
test_df.head()

(5331, 19)


Unnamed: 0.1,Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mood,Moods
0,0,everything i wanted,everything i wanted,Billie Eilish,2019-11-13,245425,82,0.704,0.902,0.704,0.225,0.657,0.106,-14.454,0.0994,120.006,4,1,Sad
1,1,ghostin,"thank u, next",Ariana Grande,2019-02-08,271466,69,0.287,0.418,0.287,0.364,1.8e-05,0.185,-8.295,0.0306,103.777,4,1,Sad
2,2,Too Good At Goodbyes,The Thrill Of It All (Special Edition),Sam Smith,2017-11-03,201000,74,0.681,0.64,0.681,0.372,0.0,0.169,-8.237,0.0432,91.873,4,1,Sad
3,3,i love you,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",Billie Eilish,2019-03-29,291796,80,0.421,0.952,0.421,0.131,0.00453,0.109,-18.435,0.0382,137.446,4,1,Sad
4,4,I Fall Apart,Stoney (Deluxe),Post Malone,2016-12-09,223346,80,0.556,0.0689,0.556,0.538,0.0,0.196,-5.408,0.0382,143.95,4,1,Sad


In [7]:
test_df.to_csv("../InputData/input_dataset.csv",sep = ',')

## Machine Learning Model

### Initial Works

In [8]:
# Initial imports
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [9]:
X = test_df.drop(labels=['Unnamed: 0','name','album','artist','release_date','mood','length','danceability.1','Moods'],axis=1)
y = test_df['mood']

In [10]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [11]:
# Creating StandardScaler instance
scaler = StandardScaler()

# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Mutinomial Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

In [13]:
X = test_df.drop(labels=['Unnamed: 0','name','album','artist','release_date','mood','length','danceability.1','Moods'],axis=1)
y = test_df['mood']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
# define the multinomial logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model.fit(X_train_scaled,y_train)
# report the model performance
y_pred = model.predict(X_test_scaled)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Logistic regression model accuracy: 0.767


## Decision Tree

In [15]:
# Creating the decision tree classifier instance
model = tree.DecisionTreeClassifier()

# Fitting the model
model = model.fit(X_train_scaled, y_train)

# Making predictions using the testing data
predictions = model.predict(X_test_scaled)

# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1","Actual 2"], columns=["Predicted 0", "Predicted 1","Predicted 2"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1,Predicted 2
Actual 0,68,27,71
Actual 1,36,781,70
Actual 2,59,62,159


Accuracy Score : 0.7561890472618155
Classification Report
              precision    recall  f1-score   support

           0       0.42      0.41      0.41       166
           1       0.90      0.88      0.89       887
           2       0.53      0.57      0.55       280

    accuracy                           0.76      1333
   macro avg       0.61      0.62      0.62      1333
weighted avg       0.76      0.76      0.76      1333



## Random Forest

In [16]:
from sklearn.ensemble import RandomForestClassifier

In [17]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)
# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)

# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1","Actual 2"], columns=["Predicted 0", "Predicted 1","Predicted 2"])

cm_df

# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1,Predicted 2
Actual 0,63,36,67
Actual 1,11,823,53
Actual 2,29,63,188


Accuracy Score : 0.805701425356339
Classification Report
              precision    recall  f1-score   support

           0       0.61      0.38      0.47       166
           1       0.89      0.93      0.91       887
           2       0.61      0.67      0.64       280

    accuracy                           0.81      1333
   macro avg       0.70      0.66      0.67      1333
weighted avg       0.80      0.81      0.80      1333

