# Import Data

In [1]:
from sqlalchemy import create_engine
import pandas as pd
import psycopg2
db_password = 'Snakefarm'

#Initialize DB string
db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/spotify_capstone"

#Create database engine
engine = create_engine(db_string)

# Connection parameters, yours will be different
param_dic = {
    "host"      : "localhost",
    "database"  : "spotify_capstone",
    "user"      : "postgres",
    "password"  : "snakefarm"
}
def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturally we get a list of tupples
    tupples = cursor.fetchall()
    cursor.close()
    
    # We just need to turn it into a pandas dataframe
    df = pd.DataFrame(tupples, columns=column_names)
    return df


# Connect to the database
conn = connect(param_dic)
column_names = ['track_name', 'artist_name', 'song_and_artist', 'track_id', 'year', 'valence', 'acoustic', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrument', 'key_value', 'liveness', 'loudness', 'mode_value', 'popularity', 'speechiness', 'tempo']
# Execute the "SELECT *" query
spotify_df = postgresql_to_dataframe(conn, "select * from  spotify_values", column_names)


#Read in new billboard DF
conn = connect(param_dic)
column_names_bb = ['track_name', 'album', 'artist_name', 'track_id', 'year', 'duration_ms',
       'popularity', 'danceability', 'acoustic', 'energy', 'instrument',
       'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature',
       'explicit', 'valence', 'key_value', 'mode_value', 'billboard_year',
       'index']
# Execute the "SELECT *" query for BB data
billboard_master_df =postgresql_to_dataframe(conn, "select * from billboard_master", column_names_bb)

#Create list of track_ids from billboard
billboard_master_df_id_list = billboard_master_df['track_id'].tolist()

# Filter out billboard songs in spotify_df
inverse_boolean_series = ~spotify_df.track_id.isin(billboard_master_df_id_list)
spotify_filtered_df = spotify_df[inverse_boolean_series]
spotify_filtered_df.head()

#Join billboard and filtered Spotify DF
joined_df = pd.concat([billboard_master_df,spotify_filtered_df], axis=0, ignore_index=True)

#Add billboard top 100 column to joined df
joined_df['top_100'] = 0

for i, track_id in joined_df.track_id.iteritems():
    if track_id in billboard_master_df.track_id.values:
        joined_df['top_100'][i]= 1
        
joined_df['top_100'].value_counts()

joined_df


Connecting to the PostgreSQL database...
Connection successful
Connecting to the PostgreSQL database...
Connection successful


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined_df['top_100'][i]= 1


Unnamed: 0,track_name,album,artist_name,track_id,year,duration_ms,popularity,danceability,acoustic,energy,...,tempo,time_signature,explicit,valence,key_value,mode_value,billboard_year,index,song_and_artist,top_100
0,Goodnight+ Irene,Goodnight+ Irene,The Weavers and Gordon Jenkins Orchestra,0ovbd86qDYXYTythqAmofL,2013,153991,5,0.367,0.991,0.229,...,138.38,4,False,0.617,4,True,1950.0,0.0,,1
1,Mona Lisa,The Nat King Cole Story,Nat King Cole,3k5ycyXX5qsCjLd7R2vphp,1991,207573,42,0.214,0.903,0.194,...,86.198,3,False,0.339,1,True,1950.0,1.0,,1
2,The Third Man Theme,The Third Man Theme And Other Viennese Favorit...,Anton Karas,7rRGujA12UJcRUz7DxUDwQ,2006,265360,16,0.382,0.851,0.348,...,73.265,4,False,0.862,7,True,1950.0,2.0,,1
3,Sam's Song (The Happy Tune),Swinging On A Star,Bing Crosby,0xHbFWoqTXy0dRFWQmMbJm,1999,172133,0,0.751,0.894,0.133,...,117.781,4,False,0.559,5,False,1950.0,3.0,,1
4,"A Simple Melody (From ""Watch your Step"")",Musical Moments to Remember: Bing Crosby Vol. ...,Bing Crosby,1V3Ml3V5bKT7a7DR1ueBbm,2014,171182,26,0.665,0.984,0.126,...,139.152,3,False,0.794,10,True,1950.0,4.0,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173556,"""Der Rosenkavalier* Op.59 / Act 3: """"Zur Stell...",,['Richard Strauss'* 'Alfred Poell'* 'Ludwig We...,0yRjRgvO8kR6E9fehn07tE,1954,308600,0,0.424,0.976,0.448,...,82.35,,False,0.578,5,True,,,"""Der Rosenkavalier* Op.59 / Act 3: """"Zur Stell...",0
173557,Jacôk - Mountain Dancde,,['Krosno Ensemble'],0yVOxC0rsuYapJh7NkMgkX,1954,80827,0,0.462,0.985,0.0949,...,72.953,,False,0.96,2,True,,,Jacôk - Mountain Dancde ['Krosno Ensemble'],0
173558,Easter Hymn,,['Girolamo Cavazzoni'* 'Flor Peeters'],0yZj9jxtCYdzkDBX6LGmrL,1954,138427,0,0.138,0.42,0.0161,...,70.063,,False,0.439,0,False,,,Easter Hymn ['Girolamo Cavazzoni'* 'Flor Peete...,0
173559,Jodi Bolo,,['Arijit Singh'],5wS1sJr2rzh9AKYFpkqqnA,2020,272562,0,0.42,0.696,0.682,...,112.009,,False,0.394,11,False,,,Jodi Bolo ['Arijit Singh'],0


# Function Creation

### Random Forest Function for different years

In [128]:
#Random Forest Function for years
def random_forest_func(joined_df, year):
    #import
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    from collections import Counter
    from imblearn.combine import SMOTEENN
    
    #Create dataframes based on year
    billboard_filtered_df_func = joined_df[(joined_df['billboard_year'] == year)]
    spotify_year_filter_df_func = joined_df[(joined_df['year'] <= year) & (joined_df['year'] >= (year -3)) & (joined_df['top_100'] == 0)]

    year_joined_df_func = pd.concat([billboard_filtered_df_func,spotify_year_filter_df_func], axis=0, ignore_index=True)
    
    #Create X and Y
    X_year_joined = year_joined_df_func[['valence',
       'acoustic', 'danceability', 'duration_ms', 'energy',
       'instrument', 'key_value', 'liveness', 'loudness',
       'speechiness', 'tempo']]

    y = year_joined_df_func['top_100']

    #SCALE DATA
    data_scaler = StandardScaler()
    X_scaled = data_scaler.fit_transform(X_year_joined)
    current_data_df= pd.DataFrame(X_scaled, columns = X_year_joined.columns)

    #SPLIT INTO TRAINING AND TESTING
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)
    smote_enn = SMOTEENN(random_state=0)

    #APPLY SMOTEEN SAMPLING
    X_SMOTEEN, y_SMOTEEN = smote_enn.fit_resample(X_scaled, y)
    #Train the Random Forest model
    # Create a random forest classifier.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
    from imblearn.metrics import classification_report_imbalanced

    rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

    # Fitting the model
    rf_model = rf_model.fit(X_SMOTEEN, y_SMOTEEN)

    #Predict
    y_pred = rf_model.predict(X_test)
    balanced_accuracy_score(y_test, y_pred)

    # Calculating the confusion matrix.
    cm = confusion_matrix(y_test, y_pred)

    # Create a DataFrame from the confusion matrix.
    cm_df = pd.DataFrame(
        cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

    # Calculating the accuracy score.
    acc_score = balanced_accuracy_score(y_test, y_pred)

    # Displaying results of SMOTEEN Random Forest
    # return (f"Confusion Matrix {display(cm_df)} Accuracy Score : {acc_score} Classification Report {classification_report_imbalanced(y_test, y_pred)}" )
    print(f"Confusion Matrix {year}")
    display (cm_df)
    print(f"Accuracy Score : {acc_score}")
    print("Classification Report")
    print(classification_report_imbalanced(y_test, y_pred))
    print(*X_year_joined.columns, sep =', ')
    print()
    print(f"Data was originally {Counter(y)} and was SMOTEEN sampled to {Counter(y_SMOTEEN)}.")
    print()
    #sort features by their importance.
    display(sorted(zip(rf_model.feature_importances_, X_year_joined.columns), reverse=True))

### Random Forest Function for years (pretty print)

In [129]:
#Random Forest Function for years
def random_forest_func_pprint(joined_df, year):
    #import
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    from collections import Counter
    from imblearn.combine import SMOTEENN
    
    #Create dataframes based on year
    billboard_filtered_df_func = joined_df[(joined_df['billboard_year'] == year)]
    spotify_year_filter_df_func = joined_df[(joined_df['year'] <= year) & (joined_df['year'] >= (year -3)) & (joined_df['top_100'] == 0)]

    year_joined_df_func = pd.concat([billboard_filtered_df_func,spotify_year_filter_df_func], axis=0, ignore_index=True)
    
    #Create X and Y
    X_year_joined = year_joined_df_func[['valence',
       'acoustic', 'danceability', 'duration_ms', 'energy',
       'instrument', 'key_value', 'liveness', 'loudness',
       'speechiness', 'tempo']]

    y = year_joined_df_func['top_100']

    #SCALE DATA
    data_scaler = StandardScaler()
    X_scaled = data_scaler.fit_transform(X_year_joined)
    current_data_df= pd.DataFrame(X_scaled, columns = X_year_joined.columns)

    #SPLIT INTO TRAINING AND TESTING
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)
    smote_enn = SMOTEENN(random_state=0)

    #APPLY SMOTEEN SAMPLING
    X_SMOTEEN, y_SMOTEEN = smote_enn.fit_resample(X_scaled, y)
    #Train the Random Forest model
    # Create a random forest classifier.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
    from imblearn.metrics import classification_report_imbalanced

    rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

    # Fitting the model
    rf_model = rf_model.fit(X_SMOTEEN, y_SMOTEEN)

    #Predict
    y_pred = rf_model.predict(X_test)
    balanced_accuracy_score(y_test, y_pred)

    # Calculating the confusion matrix.
    cm = confusion_matrix(y_test, y_pred)

    # Create a DataFrame from the confusion matrix.
    cm_df = pd.DataFrame(
        cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

    # Calculating the accuracy score.
    acc_score = balanced_accuracy_score(y_test, y_pred)

    # Displaying results of SMOTEEN Random Forest
    # return (f"Confusion Matrix {display(cm_df)} Accuracy Score : {acc_score} Classification Report {classification_report_imbalanced(y_test, y_pred)}" )
    print ('\033[1m' + 'Results for ' + str(year))
    #print(f"Confusion Matrix {year}")
    display (cm_df)
    print(f"Accuracy Score : {acc_score}")
    print("Classification Report")
    print(classification_report_imbalanced(y_test, y_pred))
#     print(*X_year_joined.columns, sep =', ')
#     print()
#     print(f"Data was originally {Counter(y)} and was SMOTEEN sampled to {Counter(y_SMOTEEN)}.")
#     print()
    #sort features by their importance.
   # display(sorted(zip(rf_model.feature_importances_, X_year_joined.columns), reverse=True))

# Testing

In [164]:
# Run function for different years
i = 1950
while i <= 2020:
    random_forest_func_pprint(joined_df, i)
    i +=1

[1mResults for 1950


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1878,11
Actual 1,0,5


Accuracy Score : 0.9970884065643197
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1889
          1       0.31      1.00      0.99      0.48      1.00      0.99         5

avg / total       1.00      0.99      1.00      1.00      1.00      0.99      1894

[1mResults for 1951


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1963,13
Actual 1,0,5


Accuracy Score : 0.9967105263157895
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1976
          1       0.28      1.00      0.99      0.43      1.00      0.99         5

avg / total       1.00      0.99      1.00      1.00      1.00      0.99      1981

[1mResults for 1952


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1996,7
Actual 1,0,4


Accuracy Score : 0.9982526210683974
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      1.00      1.00      1.00      1.00      2003
          1       0.36      1.00      1.00      0.53      1.00      1.00         4

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      2007

[1mResults for 1953


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1981,7
Actual 1,0,5


Accuracy Score : 0.9982394366197183
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      1.00      1.00      1.00      1.00      1988
          1       0.42      1.00      1.00      0.59      1.00      1.00         5

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      1993

[1mResults for 1954


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1980,8
Actual 1,0,5


Accuracy Score : 0.9979879275653923
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      1.00      1.00      1.00      1.00      1988
          1       0.38      1.00      1.00      0.56      1.00      1.00         5

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      1993

[1mResults for 1955


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1983,4
Actual 1,0,5


Accuracy Score : 0.9989934574735783
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      1.00      1.00      1.00      1.00      1987
          1       0.56      1.00      1.00      0.71      1.00      1.00         5

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      1992

[1mResults for 1956


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1960,29
Actual 1,0,19


Accuracy Score : 0.9927099044746104
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.98      1989
          1       0.40      1.00      0.99      0.57      0.99      0.99        19

avg / total       0.99      0.99      1.00      0.99      0.99      0.98      2008

[1mResults for 1957


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1961,26
Actual 1,0,20


Accuracy Score : 0.9934574735782586
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1987
          1       0.43      1.00      0.99      0.61      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      2007

[1mResults for 1958


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1951,33
Actual 1,0,22


Accuracy Score : 0.9916834677419355
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1984
          1       0.40      1.00      0.98      0.57      0.99      0.99        22

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      2006

[1mResults for 1959


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1926,38
Actual 1,0,22


Accuracy Score : 0.9903258655804481
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1964
          1       0.37      1.00      0.98      0.54      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1986

[1mResults for 1960


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1890,35
Actual 1,0,21


Accuracy Score : 0.990909090909091
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1925
          1       0.38      1.00      0.98      0.55      0.99      0.98        21

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1946

[1mResults for 1961


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1881,32
Actual 1,0,22


Accuracy Score : 0.9916361735493988
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1913
          1       0.41      1.00      0.98      0.58      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1935

[1mResults for 1962


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1844,39
Actual 1,0,23


Accuracy Score : 0.989644184811471
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1883
          1       0.37      1.00      0.98      0.54      0.99      0.98        23

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1906

[1mResults for 1963


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1845,51
Actual 1,0,22


Accuracy Score : 0.9865506329113924
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1896
          1       0.30      1.00      0.97      0.46      0.99      0.98        22

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1918

[1mResults for 1964


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1880,39
Actual 1,0,20


Accuracy Score : 0.9898384575299635
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1919
          1       0.34      1.00      0.98      0.51      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1939

[1mResults for 1965


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1885,43
Actual 1,1,18


Accuracy Score : 0.9625327582441581
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.96      0.93      1928
          1       0.30      0.95      0.98      0.45      0.96      0.92        19

avg / total       0.99      0.98      0.95      0.98      0.96      0.93      1947

[1mResults for 1966


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1916,34
Actual 1,0,21


Accuracy Score : 0.9912820512820513
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1950
          1       0.38      1.00      0.98      0.55      0.99      0.98        21

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1971

[1mResults for 1967


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1920,22
Actual 1,0,20


Accuracy Score : 0.994335736354274
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1942
          1       0.48      1.00      0.99      0.65      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1962

[1mResults for 1968


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1914,31
Actual 1,0,21


Accuracy Score : 0.9920308483290489
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1945
          1       0.40      1.00      0.98      0.58      0.99      0.99        21

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1966

[1mResults for 1969


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1921,29
Actual 1,0,21


Accuracy Score : 0.9925641025641025
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.98      1950
          1       0.42      1.00      0.99      0.59      0.99      0.99        21

avg / total       0.99      0.99      1.00      0.99      0.99      0.98      1971

[1mResults for 1970


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1905,39
Actual 1,0,22


Accuracy Score : 0.9899691358024691
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1944
          1       0.36      1.00      0.98      0.53      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1966

[1mResults for 1971


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1910,35
Actual 1,0,20


Accuracy Score : 0.9910025706940875
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1945
          1       0.36      1.00      0.98      0.53      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1965

[1mResults for 1972


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1920,25
Actual 1,0,22


Accuracy Score : 0.993573264781491
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1945
          1       0.47      1.00      0.99      0.64      0.99      0.99        22

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1967

[1mResults for 1973


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1912,26
Actual 1,0,20


Accuracy Score : 0.9932920536635708
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1938
          1       0.43      1.00      0.99      0.61      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1958

[1mResults for 1974


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1907,32
Actual 1,0,19


Accuracy Score : 0.9917483238782878
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1939
          1       0.37      1.00      0.98      0.54      0.99      0.99        19

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1958

[1mResults for 1975


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1901,34
Actual 1,0,22


Accuracy Score : 0.9912144702842377
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1935
          1       0.39      1.00      0.98      0.56      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1957

[1mResults for 1976


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1891,42
Actual 1,0,20


Accuracy Score : 0.9891360579410243
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1933
          1       0.32      1.00      0.98      0.49      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1953

[1mResults for 1977


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1907,29
Actual 1,0,20


Accuracy Score : 0.9925103305785123
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.98      1936
          1       0.41      1.00      0.99      0.58      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.98      1956

[1mResults for 1978


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1899,35
Actual 1,0,20


Accuracy Score : 0.9909513960703206
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1934
          1       0.36      1.00      0.98      0.53      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1954

[1mResults for 1979


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1913,25
Actual 1,0,20


Accuracy Score : 0.9935500515995872
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1938
          1       0.44      1.00      0.99      0.62      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1958

[1mResults for 1980


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1906,23
Actual 1,0,22


Accuracy Score : 0.9940383618455157
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1929
          1       0.49      1.00      0.99      0.66      0.99      0.99        22

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1951

[1mResults for 1981


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1890,37
Actual 1,0,20


Accuracy Score : 0.9903995848469123
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1927
          1       0.35      1.00      0.98      0.52      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1947

[1mResults for 1982


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1892,34
Actual 1,0,19


Accuracy Score : 0.9911734164070612
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1926
          1       0.36      1.00      0.98      0.53      0.99      0.98        19

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1945

[1mResults for 1983


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1867,49
Actual 1,1,21


Accuracy Score : 0.9644856709052951
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      0.95      0.99      0.96      0.93      1916
          1       0.30      0.95      0.97      0.46      0.96      0.93        22

avg / total       0.99      0.97      0.95      0.98      0.96      0.93      1938

[1mResults for 1984


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1885,39
Actual 1,1,19


Accuracy Score : 0.9648648648648648
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.96      0.93      1924
          1       0.33      0.95      0.98      0.49      0.96      0.93        20

avg / total       0.99      0.98      0.95      0.98      0.96      0.93      1944

[1mResults for 1985


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1873,53
Actual 1,0,22


Accuracy Score : 0.9862409138110073
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1926
          1       0.29      1.00      0.97      0.45      0.99      0.98        22

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1948

[1mResults for 1986


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1880,44
Actual 1,0,19


Accuracy Score : 0.9885654885654886
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1924
          1       0.30      1.00      0.98      0.46      0.99      0.98        19

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1943

[1mResults for 1987


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1872,36
Actual 1,0,22


Accuracy Score : 0.9905660377358491
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1908
          1       0.38      1.00      0.98      0.55      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1930

[1mResults for 1988


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1859,39
Actual 1,1,21


Accuracy Score : 0.9669987546699876
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.97      0.94      1898
          1       0.35      0.95      0.98      0.51      0.97      0.93        22

avg / total       0.99      0.98      0.95      0.98      0.97      0.94      1920

[1mResults for 1989


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1883,29
Actual 1,0,20


Accuracy Score : 0.9924163179916319
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1912
          1       0.41      1.00      0.98      0.58      0.99      0.99        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1932

[1mResults for 1990


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1892,30
Actual 1,0,10


Accuracy Score : 0.9921956295525494
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1922
          1       0.25      1.00      0.98      0.40      0.99      0.99        10

avg / total       1.00      0.98      1.00      0.99      0.99      0.98      1932

[1mResults for 1991


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1897,36
Actual 1,0,21


Accuracy Score : 0.9906880496637351
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1933
          1       0.37      1.00      0.98      0.54      0.99      0.98        21

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1954

[1mResults for 1992


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1905,43
Actual 1,0,22


Accuracy Score : 0.9889630390143738
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1948
          1       0.34      1.00      0.98      0.51      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1970

[1mResults for 1993


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1895,45
Actual 1,0,20


Accuracy Score : 0.9884020618556701
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1940
          1       0.31      1.00      0.98      0.47      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1960

[1mResults for 1994


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1899,32
Actual 1,0,22


Accuracy Score : 0.9917141377524599
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1931
          1       0.41      1.00      0.98      0.58      0.99      0.99        22

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1953

[1mResults for 1995


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1901,41
Actual 1,0,22


Accuracy Score : 0.9894438722966015
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1942
          1       0.35      1.00      0.98      0.52      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1964

[1mResults for 1996


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1904,31
Actual 1,0,23


Accuracy Score : 0.9919896640826873
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1935
          1       0.43      1.00      0.98      0.60      0.99      0.99        23

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1958

[1mResults for 1997


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1886,45
Actual 1,0,22


Accuracy Score : 0.9883480062143967
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1931
          1       0.33      1.00      0.98      0.49      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1953

[1mResults for 1998


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1901,30
Actual 1,0,20


Accuracy Score : 0.9922320041429311
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1931
          1       0.40      1.00      0.98      0.57      0.99      0.99        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1951

[1mResults for 1999


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1860,60
Actual 1,0,21


Accuracy Score : 0.984375
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.98      0.98      0.97      1920
          1       0.26      1.00      0.97      0.41      0.98      0.97        21

avg / total       0.99      0.97      1.00      0.98      0.98      0.97      1941

[1mResults for 2000


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1869,50
Actual 1,0,21


Accuracy Score : 0.9869723814486712
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1919
          1       0.30      1.00      0.97      0.46      0.99      0.98        21

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1940

[1mResults for 2001


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1881,44
Actual 1,0,21


Accuracy Score : 0.9885714285714285
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1925
          1       0.32      1.00      0.98      0.49      0.99      0.98        21

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1946

[1mResults for 2002


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1893,40
Actual 1,0,20


Accuracy Score : 0.9896533885152612
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1933
          1       0.33      1.00      0.98      0.50      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1953

[1mResults for 2003


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1898,35
Actual 1,0,20


Accuracy Score : 0.9909467149508535
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1933
          1       0.36      1.00      0.98      0.53      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1953

[1mResults for 2004


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1893,42
Actual 1,1,20


Accuracy Score : 0.9653377630121815
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.97      0.93      1935
          1       0.32      0.95      0.98      0.48      0.97      0.93        21

avg / total       0.99      0.98      0.95      0.98      0.97      0.93      1956

[1mResults for 2005


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1870,49
Actual 1,0,20


Accuracy Score : 0.9872329338196977
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1919
          1       0.29      1.00      0.97      0.45      0.99      0.98        20

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1939

[1mResults for 2006


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1866,42
Actual 1,1,21


Accuracy Score : 0.9662664379645511
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.97      0.94      1908
          1       0.33      0.95      0.98      0.49      0.97      0.93        22

avg / total       0.99      0.98      0.95      0.98      0.97      0.94      1930

[1mResults for 2007


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1873,37
Actual 1,0,20


Accuracy Score : 0.9903141361256544
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1910
          1       0.35      1.00      0.98      0.52      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1930

[1mResults for 2008


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1845,63
Actual 1,0,20


Accuracy Score : 0.9834905660377358
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.98      0.98      0.96      1908
          1       0.24      1.00      0.97      0.39      0.98      0.97        20

avg / total       0.99      0.97      1.00      0.98      0.98      0.96      1928

[1mResults for 2009


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1880,37
Actual 1,1,18


Accuracy Score : 0.9640337149603273
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.96      0.93      1917
          1       0.33      0.95      0.98      0.49      0.96      0.93        19

avg / total       0.99      0.98      0.95      0.99      0.96      0.93      1936

[1mResults for 2010


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1888,47
Actual 1,1,19


Accuracy Score : 0.9628552971576227
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.96      0.93      1935
          1       0.29      0.95      0.98      0.44      0.96      0.92        20

avg / total       0.99      0.98      0.95      0.98      0.96      0.93      1955

[1mResults for 2011


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1898,52
Actual 1,0,21


Accuracy Score : 0.9866666666666667
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1950
          1       0.29      1.00      0.97      0.45      0.99      0.98        21

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1971

[1mResults for 2012


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1887,55
Actual 1,0,20


Accuracy Score : 0.9858393408856849
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1942
          1       0.27      1.00      0.97      0.42      0.99      0.97        20

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1962

[1mResults for 2013


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1895,48
Actual 1,0,22


Accuracy Score : 0.9876479670612455
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1943
          1       0.31      1.00      0.98      0.48      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1965

[1mResults for 2014


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1904,27
Actual 1,1,20


Accuracy Score : 0.9691992799191143
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      0.95      0.99      0.97      0.94      1931
          1       0.43      0.95      0.99      0.59      0.97      0.94        21

avg / total       0.99      0.99      0.95      0.99      0.97      0.94      1952

[1mResults for 2015


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1862,53
Actual 1,0,20


Accuracy Score : 0.9861618798955614
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.99      0.99      0.97      1915
          1       0.27      1.00      0.97      0.43      0.99      0.98        20

avg / total       0.99      0.97      1.00      0.98      0.99      0.97      1935

[1mResults for 2016


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1843,34
Actual 1,0,20


Accuracy Score : 0.9909429941395844
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1877
          1       0.37      1.00      0.98      0.54      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1897

[1mResults for 2017


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1831,41
Actual 1,0,22


Accuracy Score : 0.9890491452991452
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1872
          1       0.35      1.00      0.98      0.52      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.98      1894

[1mResults for 2018


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1845,44
Actual 1,0,22


Accuracy Score : 0.988353626257279
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1889
          1       0.33      1.00      0.98      0.50      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1911

[1mResults for 2019


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1836,44
Actual 1,0,22


Accuracy Score : 0.9882978723404255
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.97      1880
          1       0.33      1.00      0.98      0.50      0.99      0.98        22

avg / total       0.99      0.98      1.00      0.98      0.99      0.97      1902

[1mResults for 2020


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1903,40
Actual 1,1,21


Accuracy Score : 0.9669793664904318
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      0.95      0.99      0.97      0.94      1943
          1       0.34      0.95      0.98      0.51      0.97      0.93        22

avg / total       0.99      0.98      0.95      0.98      0.97      0.94      1965



# Create X and Y

In [85]:
#Remove Popularity, explicit, mode_value and filter billboard values for year
i = 1970
billboard_filtered_df = joined_df[(joined_df['billboard_year'] <= i) & (billboard_filtered_df['billboard_year'] >= (i-3))]
spotify_year_filter_df = joined_df[(joined_df['year'] <=i) & (joined_df['year'] >= (i-3)) & (joined_df['top_100'] == 0)]

year_joined_df = pd.concat([billboard_filtered_df,spotify_year_filter_df], axis=0, ignore_index=True)



X_billboard_filtered_df = year_joined_df[['valence',
       'acoustic', 'danceability', 'duration_ms', 'energy',
       'instrument', 'key_value', 'liveness', 'loudness',
       'speechiness', 'tempo']]

y = year_joined_df['top_100']

In [74]:
billboard_filtered_df

Unnamed: 0,track_name,album,artist_name,track_id,year,duration_ms,popularity,danceability,acoustic,energy,...,tempo,time_signature,explicit,valence,key_value,mode_value,billboard_year,index,song_and_artist,top_100
1270,To Sir With Love (From 'To Sir With Love') - 2...,To Sir With Love (The Complete Mickie Most Rec...,Lulu,53zi1BLGIcIgpDUBCG8R71,2011,167666.0,14,0.548,0.3570,0.398,...,97.551,4.0,False,0.280,4,1,1967.0,0.0,,1
1271,The Letter - Mono Single Version,The Letter/Neon Rainbow,The Box Tops,3j4QPgiDGGipjfLgtikzrL,1905,116173.0,33,0.668,0.0613,0.534,...,138.893,4.0,False,0.930,9,0,1967.0,1.0,,1
1272,Ode to Billie Joe,The Most Sampled Jazz Songs,Various Artists,6GG3JVtdpXTpCnzzEQSUXb,2013,384533.0,16,0.681,0.6560,0.167,...,124.169,4.0,False,0.316,0,1,1967.0,2.0,,1
1273,Windy - Remastered Version,Insight Out,The Association,1lHiKWnlJtjzA4vHnjAr39,1905,176600.0,47,0.676,0.3630,0.681,...,132.953,4.0,False,0.901,5,1,1967.0,3.0,,1
1274,I'm a Believer - 2006 Remaster,More of The Monkees (Deluxe Edition),The Monkees,3G7tRC24Uh09Hmp1KZ7LQ2,1967,167373.0,73,0.526,0.7070,0.775,...,80.106,4.0,False,0.962,0,1,1967.0,4.0,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1661,Look What They've Done To My Song Ma,The New Seekers - Their Very Best,The New Seekers,39xD0YKVFAxYxq0Pf4v1Cb,2008,192413.0,21,0.542,0.6860,0.414,...,140.824,4.0,False,0.438,9,1,1970.0,95.0,,1
1662,Walk A Mile In My Shoes,Don't It Make You Want To Go Home,Joe South,0UI5wNYgWqgIuKHh5zwl2i,1968,223373.0,14,0.549,0.5800,0.430,...,113.847,4.0,False,0.715,5,1,1970.0,96.0,,1
1663,The Thrill Is Gone,Completely Well,B.B. King,4NQfrmGs9iQXVQI9IpRhjM,1969,324733.0,62,0.547,0.3440,0.460,...,90.768,4.0,False,0.543,11,0,1970.0,97.0,,1
1664,It's Only Make Believe,Glen Campbell Goodtime Album,Glen Campbell,2eot2orcEQn5wR11DSVcbO,1970,147480.0,42,0.448,0.1910,0.558,...,112.742,3.0,False,0.537,0,1,1970.0,98.0,,1


In [82]:
spotify_year_filter_df

Unnamed: 0,track_name,album,artist_name,track_id,year,duration_ms,popularity,danceability,acoustic,energy,...,tempo,time_signature,explicit,valence,key_value,mode_value,billboard_year,index,song_and_artist,top_100
6776,Here Comes The Sun - Remastered 2009,,['The Beatles'],6dGnYIeXmHdcikdzNNDMm2,1969,185733.0,82,0.557,0.03390,0.540,...,129.171,,False,0.394,9,1,,,Here Comes The Sun - Remastered 2009 ['The Bea...,0
6848,Fortunate Son,,['Creedence Clearwater Revival'],4BP3uh0hFLFRb5cjsgLqDh,1969,140773.0,81,0.640,0.20100,0.663,...,132.770,,False,0.663,0,1,,,Fortunate Son ['Creedence Clearwater Revival'],0
7186,All Along the Watchtower,,['Jimi Hendrix'],2aoo2jlRnM3A0NyLQqMN2f,1968,240800.0,78,0.438,0.00255,0.805,...,113.253,,False,0.564,8,1,,,All Along the Watchtower ['Jimi Hendrix'],0
7187,Sweet Caroline,,['Neil Diamond'],62AuGbAkt8Ox2IrFFb8GKV,1969,203573.0,78,0.529,0.61100,0.127,...,63.050,,False,0.578,11,1,,,Sweet Caroline ['Neil Diamond'],0
7369,Whole Lotta Love - 1990 Remaster,,['Led Zeppelin'],0hCB0YR03f6AmQaHbwWDe8,1969,333893.0,77,0.412,0.04840,0.902,...,89.740,,False,0.422,9,1,,,Whole Lotta Love - 1990 Remaster ['Led Zeppelin'],0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128208,Silas Stingy,,['The Who'],0PUnUGPUbzr15lz8BwuNTi,1967,187240.0,14,0.613,0.23600,0.434,...,129.445,,False,0.288,2,0,,,Silas Stingy ['The Who'],0
128209,Laundromat Blues,,['Albert King'],23mic51yKD9gVlAqvlcCMA,1967,201467.0,14,0.447,0.19000,0.420,...,202.570,,False,0.657,3,1,,,Laundromat Blues ['Albert King'],0
129455,You're A Big Boy Now,,"""[""""The Lovin' Spoonful""""]""",0JrDSbL4lnB0p2C6h2crVN,1967,152347.0,13,0.650,0.51700,0.623,...,133.677,,False,0.724,3,0,,,"""You're A Big Boy Now [""""The Lovin' Spoonful""""]""",0
129456,Apartment #9,,['Tammy Wynette'],1JzSRo8ZsUgNzrb6qtySG8,1967,175253.0,13,0.591,0.78200,0.176,...,78.786,,False,0.681,5,1,,,Apartment #9 ['Tammy Wynette'],0


In [91]:
random_forest_func(joined_df, 1970)

UnboundLocalError: local variable 'billboard_filtered_df' referenced before assignment

In [221]:
###Testing to print confusion matrix

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from collections import Counter
from imblearn.combine import SMOTEENN

year = 1999
#Create dataframes based on year
billboard_filtered_df_func = joined_df[(joined_df['billboard_year'] == year)]
spotify_year_filter_df_func = joined_df[(joined_df['year'] <= year) & (joined_df['year'] >= (year -3)) & (joined_df['top_100'] == 0)]

year_joined_df_func = pd.concat([billboard_filtered_df_func,spotify_year_filter_df_func], axis=0, ignore_index=True)

#Create X and Y
X_year_joined = year_joined_df_func[['valence',
   'acoustic', 'danceability', 'duration_ms', 'energy',
   'instrument', 'key_value', 'liveness', 'loudness',
   'speechiness', 'tempo']]

y = year_joined_df_func['top_100']

#SCALE DATA
data_scaler = StandardScaler()
X_scaled = data_scaler.fit_transform(X_year_joined)
current_data_df= pd.DataFrame(X_scaled, columns = X_year_joined.columns)

#SPLIT INTO TRAINING AND TESTING
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)
smote_enn = SMOTEENN(random_state=0)

#APPLY SMOTEEN SAMPLING
X_SMOTEEN, y_SMOTEEN = smote_enn.fit_resample(X_scaled, y)
#Train the Random Forest model
# Create a random forest classifier.
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
from imblearn.metrics import classification_report_imbalanced

rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

# Fitting the model
rf_model = rf_model.fit(X_SMOTEEN, y_SMOTEEN)

#Predict
y_pred = rf_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

# Calculating the confusion matrix.
cm = confusion_matrix(y_test, y_pred)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

# Calculating the accuracy score.
acc_score = balanced_accuracy_score(y_test, y_pred)

# Displaying results of SMOTEEN Random Forest
# return (f"Confusion Matrix {display(cm_df)} Accuracy Score : {acc_score} Classification Report {classification_report_imbalanced(y_test, y_pred)}" )
print ('\033[1m' + 'Results for ' + str(year))
#print(f"Confusion Matrix {year}")
display (cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report_imbalanced(y_test, y_pred))




[1mResults for 1999


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1860,60
Actual 1,0,21


Accuracy Score : 0.984375
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.97      1.00      0.98      0.98      0.97      1920
          1       0.26      1.00      0.97      0.41      0.98      0.97        21

avg / total       0.99      0.97      1.00      0.98      0.98      0.97      1941



# Single Input Testing

In [254]:
single_test = X_test[1827]
single_test_reshape = single_test.reshape(1,-1)
df = rf_model.predict(single_test_reshape)
#pd.DataFrame(y_test, columns = ['y'])

df = pd.DataFrame(df, columns = {'X'})

df.loc[df['X']]


Unnamed: 0,X
0,0


In [223]:
xy = df.loc[df[] == 1]
pd.DataFrame(X_test)

SyntaxError: invalid syntax (<ipython-input-223-324925dfb0c7>, line 1)

In [224]:
df

Unnamed: 0,X
0,1


In [233]:
X_test[1826]

single_test = X_test[1827]
single_test_reshape = single_test.reshape(1,-1)

In [235]:
single_test_reshape

array([[ 0.65464846,  0.19177695, -0.56840791,  0.58633489,  0.73038939,
        -0.40747258, -0.93086039, -0.19221485,  0.31160685, -0.44752916,
         0.90937981]])

In [251]:
import numpy as np
sample_array =  np.array([ 0.1,  0.19177695, -0.56840791,  0.58633489,  0.73038939,
        -0.40747258, -0.93086039, -0.23,  0.31160685, -0.44752916,
         0.90937981])
sample_array_r = sample_array.reshape(1, -1)