In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
from dotenv import load_dotenv
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


In [2]:
# Load the data into a Pandas DataFrame .
artist_df = pd.read_csv("Resources/artist_df.csv") 

# Display sample data
artist_df.head(10)

Unnamed: 0,id,name,image_url,isni,code2,hometown_city,verified,current_city,sp_followers,sp_popularity,...,popularity,rank,change,velocity,streak,created_at,peak_date,peak_rank,time_on_chart,rankStats
0,2762,Taylor Swift,https://share.chartmetric.com/artists/299/172/...,0000000078519858,US,,True,,114465831.0,100.0,...,100,1,0.0,0.0,78,2024-07-01 08:58:49.864,2024-07-01,1,2273,"[{'date': '2024-06-18', 'rank': 1}, {'date': '..."
1,5596,Billie Eilish,https://i.scdn.co/image/ab67616100005174d8b998...,000000046748058X,US,Los Angeles,False,,95726393.0,94.0,...,94,2,0.0,0.0,78,2024-07-01 08:58:49.864,2019-06-28,1,2271,"[{'date': '2024-06-18', 'rank': 2}, {'date': '..."
2,3380,Drake,https://i.scdn.co/image/ab67616100005174429338...,000000012032246X,CA,Toronto,False,,90019984.0,94.0,...,94,3,0.0,0.0,78,2024-07-01 08:58:49.864,2021-11-16,1,2277,"[{'date': '2024-06-18', 'rank': 3}, {'date': '..."
3,3852,The Weeknd,https://i.scdn.co/image/ab6761610000e5eb214f3c...,0000000382556637,CA,,False,,85489663.0,93.0,...,93,4,0.0,0.0,78,2024-07-01 08:58:49.864,2023-03-22,1,2265,"[{'date': '2024-06-18', 'rank': 5}, {'date': '..."
4,214945,Bad Bunny,https://i.scdn.co/image/ab6761610000e5eb9ad50e...,0000000466373221,PR,,True,,82987624.0,94.0,...,93,5,0.0,0.0,78,2024-07-01 08:58:49.864,2023-03-20,1,2267,"[{'date': '2024-06-18', 'rank': 4}, {'date': '..."
5,3768,Kendrick Lamar,https://i.scdn.co/image/ab6761610000e5eb437b9e...,0000000362907030,US,Compton,False,,30374768.0,92.0,...,92,6,0.0,0.0,78,2024-07-01 08:58:49.864,2018-04-15,2,2261,"[{'date': '2024-06-18', 'rank': 6}, {'date': '..."
6,4215,Travis Scott,https://i.scdn.co/image/ab6761610000e5eb19c279...,0000000384091917,US,Houston,False,Houston,30348985.0,92.0,...,92,7,0.0,-0.142857,78,2024-07-01 08:58:49.864,2018-09-20,2,2273,"[{'date': '2024-06-18', 'rank': 7}, {'date': '..."
7,236,Eminem,https://i.scdn.co/image/ab6761610000e5eba00b11...,0000000368611919,US,St. Joseph,False,Detroit,86054535.0,91.0,...,91,8,0.0,0.142857,78,2024-07-01 08:58:49.864,2020-02-09,1,2274,"[{'date': '2024-06-18', 'rank': 9}, {'date': '..."
8,3963,Ariana Grande,https://i.scdn.co/image/ab6761610000e5eb40b5c0...,0000000059110272,US,,False,Boca Raton,97308371.0,91.0,...,91,9,-2.0,-0.142857,78,2024-07-01 08:58:49.864,2020-12-02,1,2267,"[{'date': '2024-06-18', 'rank': 10}, {'date': ..."
9,135326,Post Malone,https://i.scdn.co/image/ab6761610000e5ebe17c0a...,0000000463129709,US,,False,,44358034.0,90.0,...,90,10,1.0,0.142857,78,2024-07-01 08:58:49.864,2020-01-14,1,2269,"[{'date': '2024-06-18', 'rank': 8}, {'date': '..."


In [3]:
# Check for NaN values in the entire dataframe
nan_check = artist_df.isna().any().any()

if nan_check:
    print("There are NaN values in the dataframe.")
else:
    print("There are no NaN values in the dataframe.")

There are NaN values in the dataframe.


In [4]:
# Drop rows with null values
artist_df_without_na = artist_df.dropna().copy()


In [5]:
# Check for NaN values in the entire dataframe
nan_check = artist_df_without_na.isna().any().any()

if nan_check:
    print("There are NaN values in the dataframe.")
else:
    print("There are no NaN values in the dataframe.")

There are no NaN values in the dataframe.


In [6]:
artist_df_without_na.columns

Index(['id', 'name', 'image_url', 'isni', 'code2', 'hometown_city', 'verified',
       'current_city', 'sp_followers', 'sp_popularity', 'sp_monthly_listeners',
       'deezer_fans', 'cm_artist_rank', 'cm_artist_score', 'tags',
       'spotify_artist_ids', 'itunes_artist_ids', 'deezer_artist_ids',
       'amazon_artist_ids', 'timestp', 'popularity', 'rank', 'change',
       'velocity', 'streak', 'created_at', 'peak_date', 'peak_rank',
       'time_on_chart', 'rankStats'],
      dtype='object')

In [7]:
# Convert categorical columns to dummy variables
artist_df_without_na_dum = \
    pd.get_dummies(artist_df_without_na[['name', \
    'verified', 'tags', 'rankStats', \
        'timestp', 'created_at', 'peak_date']], drop_first=True, dtype=int)

artist_df_without_na_dum

Unnamed: 0,verified,name_Anitta,name_Arctic Monkeys,name_Aventura,name_Beyoncé,name_Britney Spears,name_Bruno Mars,name_Charli xcx,name_Chris Stapleton,name_Coldplay,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
6,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
11,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15,False,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
16,False,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
23,True,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
27,False,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
38,False,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
43,False,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [None]:
# Drop the original columns from artist_df_without_na
artist_df_without_na.drop(columns= ['name', 'verified', 'tags', 'rankStats', 'timestp', 'created_at', 'peak_date'], inplace=True)

# Concatenate the dummy variables DataFrame with the modified original DataFrame
artist_df_with_dummies = pd.concat([artist_df_without_na, artist_df_without_na_dum], axis=1)

In [9]:
# Check the resulting DataFrame
artist_df_with_dummies.head(10)

Unnamed: 0,id,image_url,isni,code2,hometown_city,current_city,sp_followers,sp_popularity,sp_monthly_listeners,deezer_fans,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
6,4215,https://i.scdn.co/image/ab6761610000e5eb19c279...,0000000384091917,US,Houston,Houston,30348985.0,92.0,66674476.0,2473569.0,...,0,0,0,0,0,0,0,0,0,0
7,236,https://i.scdn.co/image/ab6761610000e5eba00b11...,0000000368611919,US,St. Joseph,Detroit,86054535.0,91.0,84566034.0,17361566.0,...,0,0,0,0,0,0,0,0,0,0
10,4550,https://i.scdn.co/image/ab6761610000e5ebe053b8...,0000000440350523,US,Los Angeles,Los Angeles,9788760.0,90.0,84128121.0,521.0,...,0,0,0,0,0,0,0,0,0,1
11,1845,https://i.scdn.co/image/ab6761610000e5eb6e835a...,0000000116557565,US,Atlanta,Atlanta,25824406.0,90.0,65903173.0,4173943.0,...,0,0,0,0,0,0,0,0,0,0
15,212715,https://i.scdn.co/image/ab6761610000e5ebcdb969...,0000000467316966,US,Knoxville,Nashville,9820711.0,90.0,44708353.0,151815.0,...,0,0,0,0,0,1,0,0,0,0
16,152776,https://i.scdn.co/image/ab6761610000e5ebe82c62...,0000000467116622,CO,Medellin,Medellin,11994451.0,90.0,43359471.0,720243.0,...,0,1,0,0,0,0,0,0,0,0
23,439,https://i.scdn.co/image/ab6761610000e5eb989ed0...,000000011551394X,GB,London,London,51165510.0,88.0,82248589.0,16955455.0,...,0,0,0,1,0,0,0,0,0,0
27,3501,https://i.scdn.co/image/ab6761610000e5ebc36dd9...,0000000117226071,US,Los Angeles,Los Angeles,56447529.0,88.0,74274248.0,11124510.0,...,0,0,0,1,0,0,0,0,0,0
38,2838,https://i.scdn.co/image/ab6761610000e5eb247f44...,0000000114914936,US,Houston,Houston,38040317.0,86.0,60782690.0,12070853.0,...,0,0,0,0,0,0,0,1,0,0
43,127583,https://i.scdn.co/image/ab6761610000e5ebcb6926...,0000000063128221,IN,Kolkata,Mumbai,33562867.0,86.0,36977121.0,56389.0,...,0,0,1,0,0,0,0,0,0,0


In [10]:
artist_df_with_dummies.columns

Index(['id', 'image_url', 'isni', 'code2', 'hometown_city', 'current_city',
       'sp_followers', 'sp_popularity', 'sp_monthly_listeners', 'deezer_fans',
       ...
       'peak_date_2022-05-22', 'peak_date_2023-01-07', 'peak_date_2023-01-08',
       'peak_date_2023-01-09', 'peak_date_2023-04-07', 'peak_date_2023-04-20',
       'peak_date_2023-04-30', 'peak_date_2024-04-07', 'peak_date_2024-06-22',
       'peak_date_2024-06-30'],
      dtype='object', length=145)

In [21]:
artist_df_to_use_for_model=artist_df_with_dummies.select_dtypes(include='number')
artist_df_to_use_for_model

Unnamed: 0,id,sp_followers,sp_popularity,sp_monthly_listeners,deezer_fans,cm_artist_rank,cm_artist_score,popularity,rank,change,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
6,4215,30348985.0,92.0,66674476.0,2473569.0,33.0,297372.0,92,7,0.0,...,0,0,0,0,0,0,0,0,0,0
7,236,86054535.0,91.0,84566034.0,17361566.0,3.0,551377.0,91,8,0.0,...,0,0,0,0,0,0,0,0,0,0
10,4550,9788760.0,90.0,84128121.0,521.0,20.0,337919.0,90,11,1.0,...,0,0,0,0,0,0,0,0,0,1
11,1845,25824406.0,90.0,65903173.0,4173943.0,23.0,318463.0,90,12,0.0,...,0,0,0,0,0,0,0,0,0,0
15,212715,9820711.0,90.0,44708353.0,151815.0,106.0,177489.0,90,16,1.0,...,0,0,0,0,0,1,0,0,0,0
16,152776,11994451.0,90.0,43359471.0,720243.0,58.0,234694.0,90,17,1.0,...,0,1,0,0,0,0,0,0,0,0
23,439,51165510.0,88.0,82248589.0,16955455.0,13.0,375207.0,88,24,-7.0,...,0,0,0,1,0,0,0,0,0,0
27,3501,56447529.0,88.0,74274248.0,11124510.0,19.0,355038.0,88,28,3.0,...,0,0,0,1,0,0,0,0,0,0
38,2838,38040317.0,86.0,60782690.0,12070853.0,15.0,367228.0,86,39,3.0,...,0,0,0,0,0,0,0,1,0,0
43,127583,33562867.0,86.0,36977121.0,56389.0,126.0,164531.0,86,44,3.0,...,0,0,1,0,0,0,0,0,0,0


In [22]:
# This is to get information on the newly preprocessed dataframe
artist_df_to_use_for_model.info()

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 6 to 198
Columns: 135 entries, id to peak_date_2024-06-30
dtypes: float64(8), int32(121), int64(6)
memory usage: 22.4 KB


In [23]:
# Setup X and y variables
X = artist_df_to_use_for_model.drop(columns=['sp_popularity', 'sp_followers', 'rank'])
y1 = artist_df_with_dummies['sp_popularity']
y2 = artist_df_with_dummies['sp_followers']
y3 = artist_df_with_dummies['rank']

In [24]:
X.head()

Unnamed: 0,id,sp_monthly_listeners,deezer_fans,cm_artist_rank,cm_artist_score,popularity,change,velocity,streak,peak_rank,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
6,4215,66674476.0,2473569.0,33.0,297372.0,92,0.0,-0.142857,78,2,...,0,0,0,0,0,0,0,0,0,0
7,236,84566034.0,17361566.0,3.0,551377.0,91,0.0,0.142857,78,1,...,0,0,0,0,0,0,0,0,0,0
10,4550,84128121.0,521.0,20.0,337919.0,90,1.0,-0.714286,78,10,...,0,0,0,0,0,0,0,0,0,1
11,1845,65903173.0,4173943.0,23.0,318463.0,90,0.0,0.142857,78,2,...,0,0,0,0,0,0,0,0,0,0
15,212715,44708353.0,151815.0,106.0,177489.0,90,1.0,0.428571,78,6,...,0,0,0,0,0,1,0,0,0,0


In [25]:
y1.head()

6     92.0
7     91.0
10    90.0
11    90.0
15    90.0
Name: sp_popularity, dtype: float64

In [26]:
y2.head()

6     30348985.0
7     86054535.0
10     9788760.0
11    25824406.0
15     9820711.0
Name: sp_followers, dtype: float64

In [27]:
y3.head()

6      7
7      8
10    11
11    12
15    16
Name: rank, dtype: int64

In [28]:
# Verify the info we now have after the changes affected above, with the info method
artist_df_to_use_for_model.info()

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 6 to 198
Columns: 135 entries, id to peak_date_2024-06-30
dtypes: float64(8), int32(121), int64(6)
memory usage: 22.4 KB


In [29]:
artist_df_to_use_for_model.describe()

Unnamed: 0,id,sp_followers,sp_popularity,sp_monthly_listeners,deezer_fans,cm_artist_rank,cm_artist_score,popularity,rank,change,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
count,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,...,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0,38.0
mean,49284.526316,21574870.0,84.052632,44126240.0,4870781.0,102.578947,218191.631579,83.947368,92.631579,2.710526,...,0.026316,0.078947,0.052632,0.131579,0.026316,0.026316,0.026316,0.026316,0.026316,0.052632
std,131703.044548,16430200.0,3.719542,17815160.0,4442484.0,76.191661,92739.691511,3.755697,61.475735,2.179857,...,0.162221,0.273276,0.226294,0.34257,0.162221,0.162221,0.162221,0.162221,0.162221,0.226294
min,22.0,3353182.0,79.0,16099210.0,1.0,3.0,94580.0,79.0,7.0,-7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,794.5,10855990.0,81.0,33674830.0,1341354.0,40.75,150975.5,81.0,44.75,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2906.0,15749770.0,84.0,38267940.0,4346604.0,90.5,194076.5,83.5,85.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,4542.5,27204600.0,86.0,54082900.0,6928186.0,146.75,267686.5,86.0,149.25,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,731530.0,86054540.0,92.0,84566030.0,17361570.0,327.0,551377.0,92.0,199.0,5.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [30]:
artist_df_to_use_for_model.dtypes

id                        int64
sp_followers            float64
sp_popularity           float64
sp_monthly_listeners    float64
deezer_fans             float64
                         ...   
peak_date_2023-04-20      int32
peak_date_2023-04-30      int32
peak_date_2024-04-07      int32
peak_date_2024-06-22      int32
peak_date_2024-06-30      int32
Length: 135, dtype: object

In [31]:
# Use train_test_split to separate the data
X_train, X_test, y1_train, y1_test = train_test_split(X, y1)
X_train, X_test, y2_train, y2_test = train_test_split(X, y2)
X_train, X_test, y3_train, y3_test = train_test_split(X, y3)

Model and Fit the Data to a Logistic Regression

In [32]:
# Create a `LogisticRegression` function and assign it 
# to a variable named `logistic_regression_model`.
logistic_regression_model = LogisticRegression()

In [33]:
X_train.head(10)

Unnamed: 0,id,sp_monthly_listeners,deezer_fans,cm_artist_rank,cm_artist_score,popularity,change,velocity,streak,peak_rank,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
7,236,84566034.0,17361566.0,3.0,551377.0,91,0.0,0.142857,78,1,...,0,0,0,0,0,0,0,0,0,0
38,2838,60782690.0,12070853.0,15.0,367228.0,86,3.0,0.428571,78,9,...,0,0,0,0,0,0,0,1,0,0
151,4397,16099211.0,1708221.0,225.0,117902.0,81,5.0,1.714286,78,72,...,0,0,0,0,0,0,1,0,0,0
198,152627,34739657.0,300484.0,174.0,137668.0,79,3.0,1.571429,78,14,...,1,0,0,0,0,0,0,0,0,0
51,3883,56701904.0,1.0,38.0,273410.0,85,3.0,0.857143,78,1,...,0,0,0,0,0,0,0,0,0,0
170,2974,33468573.0,6762895.0,78.0,208561.0,80,4.0,2.142857,78,20,...,0,0,0,0,0,0,0,0,0,0
179,22,25625183.0,3561514.0,274.0,106118.0,80,4.0,2.428571,78,74,...,0,0,0,1,0,0,0,0,0,0
23,439,82248589.0,16955455.0,13.0,375207.0,88,-7.0,-0.857143,78,4,...,0,0,0,1,0,0,0,0,0,0
11,1845,65903173.0,4173943.0,23.0,318463.0,90,0.0,0.142857,78,2,...,0,0,0,0,0,0,0,0,0,0
184,1464,20259985.0,1983348.0,247.0,112650.0,80,4.0,2.428571,78,118,...,0,0,0,0,0,0,0,0,0,0


In [34]:
X_test.head(10)

Unnamed: 0,id,sp_monthly_listeners,deezer_fans,cm_artist_rank,cm_artist_score,popularity,change,velocity,streak,peak_rank,...,peak_date_2022-05-22,peak_date_2023-01-07,peak_date_2023-01-08,peak_date_2023-01-09,peak_date_2023-04-07,peak_date_2023-04-20,peak_date_2023-04-30,peak_date_2024-04-07,peak_date_2024-06-22,peak_date_2024-06-30
59,731530,40742863.0,1246837.0,82.0,203894.0,85,3.0,1.285714,5,7,...,0,0,1,0,0,0,0,0,0,0
27,3501,74274248.0,11124510.0,19.0,355038.0,88,3.0,-0.428571,78,7,...,0,0,0,1,0,0,0,0,0,0
6,4215,66674476.0,2473569.0,33.0,297372.0,92,0.0,-0.142857,78,2,...,0,0,0,0,0,0,0,0,0,0
91,4018,32284127.0,4620913.0,164.0,141744.0,83,2.0,1.428571,78,36,...,0,0,0,0,0,0,0,0,0,0
54,1802,52311491.0,6919428.0,46.0,259891.0,85,3.0,-1.0,78,2,...,0,0,0,0,0,0,0,0,0,0
46,207759,27813833.0,254284.0,157.0,146366.0,86,3.0,0.428571,78,40,...,0,0,0,0,0,0,0,0,1,0
166,1450,38507518.0,6479336.0,65.0,224229.0,80,5.0,2.142857,78,13,...,0,0,0,0,0,0,0,0,0,0
52,217671,54673364.0,2120523.0,37.0,274265.0,85,3.0,0.714286,78,3,...,0,0,0,0,0,0,0,0,0,0
70,2289,49602842.0,4425179.0,151.0,148780.0,84,2.0,1.142857,78,10,...,0,0,0,1,0,0,0,0,0,0
173,4520,31720969.0,7102250.0,140.0,154598.0,80,4.0,2.142857,78,57,...,0,0,0,0,0,0,0,0,0,0


In [35]:
# This is to create the StandardScaler instance
scaler = StandardScaler()

In [36]:
# Fit the Standard Scaler with the training data
X_train_scaled_n_fitted=scaler.fit(X_train)
X_test_scaled_n_fitted=scaler.fit(X_test)

In [37]:
# Scale the training data with 'transform'
X_train_scaled_n_transformed = scaler.transform(X_train)
X_test_scaled_n_transformed = scaler.transform(X_test)

In [38]:
# Train a Logistic Regression model and print the model score based on the first target 'y1'
logistic_regression_model = LogisticRegression(random_state=1)
logistic_regression_model.fit(X_train, y1_train)
print(f"Training Data Score: {logistic_regression_model.score(X_train_scaled_n_transformed, y1_train)}")

Training Data Score: 0.10714285714285714


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [39]:
# Train a Logistic Regression model and print the model score based on the second target 'y2'
logistic_regression_model = LogisticRegression(random_state=3)
logistic_regression_model.fit(X_train, y2_train)
logistic_regression_model.fit(X_train, y2_train)
print(f"Training Data Score: {logistic_regression_model.score(X_train_scaled_n_transformed, y2_train)}")

Training Data Score: 0.07142857142857142


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [40]:
# Train a Logistic Regression model and print the model score based on the third target 'y3'
logistic_regression_model = LogisticRegression(random_state=13)
logistic_regression_model.fit(X_train, y3_train)
logistic_regression_model.fit(X_train, y3_train)
print(f"Training Data Score: {logistic_regression_model.score(X_train_scaled_n_transformed, y3_train)}")

Training Data Score: 0.07142857142857142


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [41]:
# Make and save testing predictions with the saved logistic regression model using the test data
testing_predictions = logistic_regression_model.predict(X_test_scaled_n_transformed)
# Review the predictions
testing_predictions



array([11, 58, 11, 58, 58, 11, 58, 11, 11, 58], dtype=int64)

In [42]:
# Calculate the accuracy score by evaluating `y1_test` vs. `testing_predictions`.
accuracy = accuracy_score(y1_test, testing_predictions)
print("Accuracy Score:", accuracy)

Accuracy Score: 0.0


In [43]:
# Calculate the accuracy score by evaluating `y2_test` vs. `testing_predictions`.
accuracy = accuracy_score(y2_test, testing_predictions)
print("Accuracy Score:", accuracy)

Accuracy Score: 0.0


In [44]:
# Calculate the accuracy score by evaluating `y3_test` vs. `testing_predictions`.
accuracy = accuracy_score(y3_test, testing_predictions)
print("Accuracy Score:", accuracy)

Accuracy Score: 0.0


Random Forest Classifier Model

In [None]:
# Load the data into a Pandas DataFrame .
artist_tracks_df = pd.read_csv("../Team_9/Team_9//Resources/artist_tracks_df.csv") #index_col="coin_id")

# Display sample data
artist_tracks_df.head(10)

In [None]:
# Load the data into a Pandas DataFrame .
tracks_df = pd.read_csv("../Team_9/Team_9//Resources/tracks_df.csv") #index_col="coin_id")

# Display sample data
tracks_df.head(10)

In [None]:
# Load the data into a Pandas DataFrame .
youtube_df = pd.read_csv("../Team_9/Team_9//Resources/youtube_df.csv") #index_col="coin_id")

# Display sample data
youtube_df.head(10)