# Import Data

In [26]:
from sqlalchemy import create_engine
import pandas as pd
import psycopg2
db_password = 'Snakefarm'

#Initialize DB string
db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/spotify_capstone"

#Create database engine
engine = create_engine(db_string)

# Connection parameters, yours will be different
param_dic = {
    "host"      : "localhost",
    "database"  : "spotify_capstone",
    "user"      : "postgres",
    "password"  : "snakefarm"
}
def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturally we get a list of tupples
    tupples = cursor.fetchall()
    cursor.close()
    
    # We just need to turn it into a pandas dataframe
    df = pd.DataFrame(tupples, columns=column_names)
    return df


# Connect to the database
conn = connect(param_dic)
column_names = ['track_name', 'artist_name', 'song_and_artist', 'track_id', 'year', 'valence', 'acoustic', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrument', 'key_value', 'liveness', 'loudness', 'mode_value', 'popularity', 'speechiness', 'tempo']
# Execute the "SELECT *" query
spotify_df = postgresql_to_dataframe(conn, "select * from  spotify_values", column_names)


#Read in new pitchfork DF
conn = connect(param_dic)
column_names_bb = ['track_name', 'album', 'artist_name', 'track_id', 'year', 'duration_ms',
       'popularity', 'danceability', 'acoustic', 'energy', 'instrument',
       'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature',
       'explicit', 'valence', 'key_value', 'mode_value', 'billboard_year',
       'index']
# Execute the "SELECT *" query for pitchfork data

billboard_master_df =postgresql_to_dataframe(conn, "select * from pitchfork_master", column_names_bb)

#Create list of track_ids from billboard
billboard_master_df_id_list = billboard_master_df['track_id'].tolist()

# Filter out billboard songs in spotify_df
inverse_boolean_series = ~spotify_df.track_id.isin(billboard_master_df_id_list)
spotify_filtered_df = spotify_df[inverse_boolean_series]
spotify_filtered_df.head()

#Join billboard and filtered Spotify DF
joined_df = pd.concat([billboard_master_df,spotify_filtered_df], axis=0, ignore_index=True)

#Add billboard top 100 column to joined df
joined_df['top_100'] = 0

for i, track_id in joined_df.track_id.iteritems():
    if track_id in billboard_master_df.track_id.values:
        joined_df['top_100'][i]= 1
        
joined_df['top_100'].value_counts()

joined_df


Connecting to the PostgreSQL database...
Connection successful
Connecting to the PostgreSQL database...
Connection successful


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined_df['top_100'][i]= 1


Unnamed: 0,track_name,album,artist_name,track_id,year,duration_ms,popularity,danceability,acoustic,energy,...,tempo,time_signature,explicit,valence,key_value,mode_value,billboard_year,index,song_and_artist,top_100
0,Hey Ya!,NOW #1's,Various Artists,5dBdCRTTkibsoj93j1PLvi,2006,233453,0,0.735,0.0752,0.969,...,79.522,4,False,0.966,0,True,2003.0,0.0,,1
1,Crazy In Love (feat. Jay-Z),Dangerously In Love,Beyoncé,5IVuqXILoxVWvWEPm82Jxr,2003,236133,78,0.646,0.00249,0.77,...,99.165,4,False,0.681,2,False,2003.0,1.0,,1
2,Cry Me a River,Justified,Justin Timberlake,7Lf7oSEVdzZqTA0kEDSlS5,2002,288333,76,0.624,0.575,0.653,...,73.884,4,False,0.565,8,False,2003.0,2.0,,1
3,House Of Jealous Lovers,House Of Jealous Lovers,The Rapture,7lsMHdLHRRkGElzVPn91ZW,2002,306120,25,0.733,0.000428,0.693,...,130.038,4,False,0.827,1,True,2003.0,3.0,,1
4,Move Your Feet,D-D-Don't Don't Stop The Beat,Junior Senior,2XhMPOTay1bIxYoWYQ5QzF,2002,181826,0,0.747,0.046,0.904,...,118.877,4,False,0.846,9,True,2003.0,4.0,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171916,"""Der Rosenkavalier* Op.59 / Act 3: """"Zur Stell...",,['Richard Strauss'* 'Alfred Poell'* 'Ludwig We...,0yRjRgvO8kR6E9fehn07tE,1954,308600,0,0.424,0.976,0.448,...,82.35,,False,0.578,5,True,,,"""Der Rosenkavalier* Op.59 / Act 3: """"Zur Stell...",0
171917,Jacôk - Mountain Dancde,,['Krosno Ensemble'],0yVOxC0rsuYapJh7NkMgkX,1954,80827,0,0.462,0.985,0.0949,...,72.953,,False,0.96,2,True,,,Jacôk - Mountain Dancde ['Krosno Ensemble'],0
171918,Easter Hymn,,['Girolamo Cavazzoni'* 'Flor Peeters'],0yZj9jxtCYdzkDBX6LGmrL,1954,138427,0,0.138,0.42,0.0161,...,70.063,,False,0.439,0,False,,,Easter Hymn ['Girolamo Cavazzoni'* 'Flor Peete...,0
171919,Jodi Bolo,,['Arijit Singh'],5wS1sJr2rzh9AKYFpkqqnA,2020,272562,0,0.42,0.696,0.682,...,112.009,,False,0.394,11,False,,,Jodi Bolo ['Arijit Singh'],0


# Function Creation


### Random Forest Function for different decades

In [23]:
#Random Forest Function for decades
import pandas as pd
accuracy_results_df_decade = pd.DataFrame()
def random_forest_func_pprint(joined_df, year):
    #import
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    from collections import Counter
    from imblearn.combine import SMOTEENN
    from sklearn.metrics import classification_report
    import pandas as pd
    
    #Create dataframes based on year
    billboard_filtered_df_func = joined_df[(joined_df['billboard_year'] <= (year + 9)) & (joined_df['billboard_year']>= year)]
    spotify_year_filter_df_func = joined_df[(joined_df['year'] <= (year + 9)) & (joined_df['year'] >= (year -3)) & (joined_df['top_100'] == 0)]

    year_joined_df_func = pd.concat([billboard_filtered_df_func,spotify_year_filter_df_func], axis=0, ignore_index=True)
    
    #Create X and Y
    X_year_joined = year_joined_df_func[['valence',
       'acoustic', 'danceability', 'duration_ms', 'energy',
       'instrument', 'key_value', 'liveness', 'loudness',
       'speechiness', 'tempo']]

    y = year_joined_df_func['top_100']

    #SCALE DATA
    data_scaler = StandardScaler()
    X_scaled = data_scaler.fit_transform(X_year_joined)
    current_data_df= pd.DataFrame(X_scaled, columns = X_year_joined.columns)

    #SPLIT INTO TRAINING AND TESTING
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)
    smote_enn = SMOTEENN(random_state=0)

    #APPLY SMOTEEN SAMPLING
    X_SMOTEEN, y_SMOTEEN = smote_enn.fit_resample(X_scaled, y)
    #Train the Random Forest model
    # Create a random forest classifier.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
    from imblearn.metrics import classification_report_imbalanced

    rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

    # Fitting the model
    rf_model = rf_model.fit(X_SMOTEEN, y_SMOTEEN)

    #Predict
    y_pred = rf_model.predict(X_test)
    balanced_accuracy_score(y_test, y_pred)

    # Calculating the confusion matrix.
    cm = confusion_matrix(y_test, y_pred)

    # Create a DataFrame from the confusion matrix.
    cm_df = pd.DataFrame(
        cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

    # Calculating the accuracy score.
    acc_score = balanced_accuracy_score(y_test, y_pred)

    # Displaying results of SMOTEEN Random Forest
    # return (f"Confusion Matrix {display(cm_df)} Accuracy Score : {acc_score} Classification Report {classification_report_imbalanced(y_test, y_pred)}" )
    print ('\033[1m' + 'Results for ' + str(year) + 's')
    #print(f"Confusion Matrix {year}")
    display (cm_df)
    print(f"Accuracy Score : {acc_score}")
    print("Classification Report")
    print(classification_report_imbalanced(y_test, y_pred))
#     print(*X_year_joined.columns, sep =', ')
#     print()
#     print(f"Data was originally {Counter(y)} and was SMOTEEN sampled to {Counter(y_SMOTEEN)}.")
#     print()
    #sort features by their importance.
   # display(sorted(zip(rf_model.feature_importances_, X_year_joined.columns), reverse=True))

    # Put results into dataframe
    report_dict = classification_report(y_test, y_pred, output_dict=True)
    results_df = pd.DataFrame(report_dict).transpose()
    results_df['Decade'] = year
    
    global accuracy_results_df_decade
    accuracy_results_df_decade = accuracy_results_df_decade.append(results_df)

### Random Forest Function all years

In [22]:
#Random Forest Function
import pandas as pd
accuracy_results_df_all_years = pd.DataFrame()
def random_forest_func_all_years(data_to_scale):
    #import
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    from collections import Counter
    from imblearn.combine import SMOTEENN
    from sklearn.metrics import classification_report
    import pandas as pd

    #SCALE DATA
    data_scaler = StandardScaler()
    X_scaled = data_scaler.fit_transform(data_to_scale)
    current_data_df= pd.DataFrame(X_scaled, columns = data_to_scale.columns)

    #SPLIT INTO TRAINING AND TESTING
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)
    smote_enn = SMOTEENN(random_state=0)

    #APPLY SMOTEEN SAMPLING
    X_SMOTEEN, y_SMOTEEN = smote_enn.fit_resample(X_scaled, y)
    #Train the Random Forest model
    # Create a random forest classifier.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
    from imblearn.metrics import classification_report_imbalanced

    rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

    # Fitting the model
    rf_model = rf_model.fit(X_SMOTEEN, y_SMOTEEN)

    #Predict
    y_pred = rf_model.predict(X_test)
    balanced_accuracy_score(y_test, y_pred)

    # Calculating the confusion matrix.
    cm = confusion_matrix(y_test, y_pred)

    # Create a DataFrame from the confusion matrix.
    cm_df = pd.DataFrame(
        cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

    # Calculating the accuracy score.
    acc_score = balanced_accuracy_score(y_test, y_pred)

    # Displaying results of SMOTEEN Random Forest
    # return (f"Confusion Matrix {display(cm_df)} Accuracy Score : {acc_score} Classification Report {classification_report_imbalanced(y_test, y_pred)}" )
    print("Confusion Matrix")
    display (cm_df)
    print(f"Accuracy Score : {acc_score}")
    print("Classification Report")
    print(classification_report_imbalanced(y_test, y_pred))
    print(*data_to_scale.columns, sep =', ')
    print()
    print(f"Data was originally {Counter(y)} and was SMOTEEN sampled to {Counter(y_SMOTEEN)}.")
    print()
    #sort features by their importance.
    display(sorted(zip(rf_model.feature_importances_, data_to_scale.columns), reverse=True))
    
    ## Put results into dataframe
    report_dict = classification_report(y_test, y_pred, output_dict=True)
    results_df = pd.DataFrame(report_dict).transpose()
    results_df['Year'] = '1950-2020'
    
    global accuracy_results_df_all_years
    
    accuracy_results_df_all_years = accuracy_results_df_all_years.append(results_df)

In [20]:
#Random Forest Function for single year
import pandas as pd
accuracy_results_df_single_year = pd.DataFrame()
def random_forest_func_single_year(joined_df, year):
    #import
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    from collections import Counter
    from imblearn.combine import SMOTEENN
    from sklearn.metrics import classification_report
    import pandas as pd
    
    #Create dataframes based on year
    billboard_filtered_df_func = joined_df[(joined_df['billboard_year'] == year)]
    spotify_year_filter_df_func = joined_df[(joined_df['year'] <= year) & (joined_df['year'] >= (year -3)) & (joined_df['top_100'] == 0)]

    year_joined_df_func = pd.concat([billboard_filtered_df_func,spotify_year_filter_df_func], axis=0, ignore_index=True)
    
    #Create X and Y
    X_year_joined = year_joined_df_func[['valence',
       'acoustic', 'danceability', 'duration_ms', 'energy',
       'instrument', 'key_value', 'liveness', 'loudness',
       'speechiness', 'tempo']]

    y = year_joined_df_func['top_100']

    #SCALE DATA
    data_scaler = StandardScaler()
    X_scaled = data_scaler.fit_transform(X_year_joined)
    current_data_df= pd.DataFrame(X_scaled, columns = X_year_joined.columns)

    #SPLIT INTO TRAINING AND TESTING
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state=1)
    smote_enn = SMOTEENN(random_state=0)

    #APPLY SMOTEEN SAMPLING
    X_SMOTEEN, y_SMOTEEN = smote_enn.fit_resample(X_scaled, y)
    #Train the Random Forest model
    # Create a random forest classifier.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
    from imblearn.metrics import classification_report_imbalanced

    rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

    # Fitting the model
    rf_model = rf_model.fit(X_SMOTEEN, y_SMOTEEN)

    #Predict
    y_pred = rf_model.predict(X_test)
    balanced_accuracy_score(y_test, y_pred)

    # Calculating the confusion matrix.
    cm = confusion_matrix(y_test, y_pred)

    # Create a DataFrame from the confusion matrix.
    cm_df = pd.DataFrame(
        cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

    # Calculating the accuracy score.
    acc_score = balanced_accuracy_score(y_test, y_pred)

    # Displaying results of SMOTEEN Random Forest
    # return (f"Confusion Matrix {display(cm_df)} Accuracy Score : {acc_score} Classification Report {classification_report_imbalanced(y_test, y_pred)}" )
    print ('\033[1m' + 'Results for ' + str(year))
    #print(f"Confusion Matrix {year}")
    display (cm_df)
    print(f"Accuracy Score : {acc_score}")
    print("Classification Report")
    print(classification_report_imbalanced(y_test, y_pred))
    
#     print(*X_year_joined.columns, sep =', ')
#     print()
#     print(f"Data was originally {Counter(y)} and was SMOTEEN sampled to {Counter(y_SMOTEEN)}.")
#     print()
    #sort features by their importance.
   # display(sorted(zip(rf_model.feature_importances_, X_year_joined.columns), reverse=True))
    
    # Put results into dataframe
    report_dict = classification_report(y_test, y_pred, output_dict=True)
    results_df = pd.DataFrame(report_dict).transpose()
    results_df['Year'] = year
    
    global accuracy_results_df_single_year
    accuracy_results_df_single_year = accuracy_results_df_single_year.append(results_df)

# ML Results Model

In [None]:
# Run function for different decades
import pandas as pd
accuracy_results_df_decade = pd.DataFrame()
i = 1950
while i <= 2020:
    random_forest_func_pprint(joined_df, i)
    i +=10

In [24]:
# Run function for all years
import pandas as pd
accuracy_results_df_all_years = pd.DataFrame()

#Remove Popularity, explicit, mode_value
X_minus_pop_mode_explicit = joined_df[['valence',
       'acoustic', 'danceability', 'duration_ms', 'energy',
       'instrument', 'key_value', 'liveness', 'loudness',
       'speechiness', 'tempo']]

y = joined_df['top_100']

#Run function
random_forest_func_all_years(X_minus_pop_mode_explicit)

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,42162,439
Actual 1,4,376


Accuracy Score : 0.9895843808954323
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      0.99      0.99      0.99      0.98     42601
          1       0.46      0.99      0.99      0.63      0.99      0.98       380

avg / total       1.00      0.99      0.99      0.99      0.99      0.98     42981

valence, acoustic, danceability, duration_ms, energy, instrument, key_value, liveness, loudness, speechiness, tempo

Data was originally Counter({0: 170394, 1: 1527}) and was SMOTEEN sampled to Counter({1: 169771, 0: 157093}).



[(0.18605986345008454, 'loudness'),
 (0.13444325493533255, 'acoustic'),
 (0.09900828087723701, 'instrument'),
 (0.09842017842090524, 'energy'),
 (0.09839180727720796, 'duration_ms'),
 (0.09446979622082731, 'valence'),
 (0.06752717845677988, 'speechiness'),
 (0.057540400857782364, 'danceability'),
 (0.057258806472415685, 'key_value'),
 (0.05613133967585932, 'liveness'),
 (0.05074909335556818, 'tempo')]

In [15]:
# Run function for individual years
import pandas as pd
accuracy_results_df_single_year = pd.DataFrame()
i = 2003
while i <= 2020:
    random_forest_func_single_year(joined_df, i)
    i +=1

[1mResults for 2003


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1965,14
Actual 1,0,10


Accuracy Score : 0.9964628600303184
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1979
          1       0.42      1.00      0.99      0.59      1.00      0.99        10

avg / total       1.00      0.99      1.00      0.99      1.00      0.99      1989

[1mResults for 2004


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1970,10
Actual 1,0,9


Accuracy Score : 0.9974747474747474
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1980
          1       0.47      1.00      0.99      0.64      1.00      1.00         9

avg / total       1.00      0.99      1.00      1.00      1.00      0.99      1989

[1mResults for 2005


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1955,10
Actual 1,0,10


Accuracy Score : 0.9974554707379135
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1965
          1       0.50      1.00      0.99      0.67      1.00      1.00        10

avg / total       1.00      0.99      1.00      1.00      1.00      0.99      1975

[1mResults for 2006


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1938,17
Actual 1,0,19


Accuracy Score : 0.9956521739130435
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1955
          1       0.53      1.00      0.99      0.69      1.00      0.99        19

avg / total       1.00      0.99      1.00      0.99      1.00      0.99      1974

[1mResults for 2007


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1928,24
Actual 1,1,17


Accuracy Score : 0.9660746812386156
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      0.94      0.99      0.97      0.94      1952
          1       0.41      0.94      0.99      0.58      0.97      0.93        18

avg / total       0.99      0.99      0.94      0.99      0.97      0.94      1970

[1mResults for 2008


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1914,37
Actual 1,0,20


Accuracy Score : 0.9905176832393645
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1951
          1       0.35      1.00      0.98      0.52      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1971

[1mResults for 2009


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1918,34
Actual 1,0,20


Accuracy Score : 0.9912909836065573
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1952
          1       0.37      1.00      0.98      0.54      0.99      0.98        20

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1972

[1mResults for 2010


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1940,27
Actual 1,0,20


Accuracy Score : 0.9931367564819522
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.98      1967
          1       0.43      1.00      0.99      0.60      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.98      1987

[1mResults for 2011


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1963,20
Actual 1,0,17


Accuracy Score : 0.9949571356530509
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1983
          1       0.46      1.00      0.99      0.63      0.99      0.99        17

avg / total       1.00      0.99      1.00      0.99      0.99      0.99      2000

[1mResults for 2012


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1947,31
Actual 1,0,19


Accuracy Score : 0.9921638018200203
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.98      1.00      0.99      0.99      0.98      1978
          1       0.38      1.00      0.98      0.55      0.99      0.99        19

avg / total       0.99      0.98      1.00      0.99      0.99      0.98      1997

[1mResults for 2013


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1968,12
Actual 1,0,18


Accuracy Score : 0.996969696969697
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1980
          1       0.60      1.00      0.99      0.75      1.00      0.99        18

avg / total       1.00      0.99      1.00      0.99      1.00      0.99      1998

[1mResults for 2014


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1949,25
Actual 1,0,18


Accuracy Score : 0.9936676798378926
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1974
          1       0.42      1.00      0.99      0.59      0.99      0.99        18

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1992

[1mResults for 2015


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1936,27
Actual 1,0,19


Accuracy Score : 0.9931227712684667
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.98      1963
          1       0.41      1.00      0.99      0.58      0.99      0.99        19

avg / total       0.99      0.99      1.00      0.99      0.99      0.98      1982

[1mResults for 2016


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1900,22
Actual 1,1,17


Accuracy Score : 0.9664990172274252
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      0.94      0.99      0.97      0.94      1922
          1       0.44      0.94      0.99      0.60      0.97      0.93        18

avg / total       0.99      0.99      0.94      0.99      0.97      0.94      1940

[1mResults for 2017


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1907,15
Actual 1,0,20


Accuracy Score : 0.9960978147762747
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1922
          1       0.57      1.00      0.99      0.73      1.00      0.99        20

avg / total       1.00      0.99      1.00      0.99      1.00      0.99      1942

[1mResults for 2018


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1921,24
Actual 1,0,20


Accuracy Score : 0.9938303341902314
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      0.99      0.99      0.99      1945
          1       0.45      1.00      0.99      0.62      0.99      0.99        20

avg / total       0.99      0.99      1.00      0.99      0.99      0.99      1965

[1mResults for 2019


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1921,16
Actual 1,0,20


Accuracy Score : 0.9958699019101704
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1937
          1       0.56      1.00      0.99      0.71      1.00      0.99        20

avg / total       1.00      0.99      1.00      0.99      1.00      0.99      1957

[1mResults for 2020


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,1986,13
Actual 1,0,20


Accuracy Score : 0.9967483741870935
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.99      1.00      1.00      1.00      0.99      1999
          1       0.61      1.00      0.99      0.75      1.00      0.99        20

avg / total       1.00      0.99      1.00      0.99      1.00      0.99      2019



## Put results DFs into CSVs


In [16]:
#individual years
accuracy_results_df_single_year.to_csv('classification_report_single_year_pitchfork.csv')


In [25]:
#all years
accuracy_results_df_all_years.to_csv('classification_report_all_years_pitchfork.csv')

In [None]:
#by decade
accuracy_results_df_decade.to_csv('classification_report_decades_pitchfork.csv')

In [18]:
accuracy_results_df_all_years