<a href="https://www.kaggle.com/code/skwsampath/code-alpha?scriptVersionId=174879376" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import warnings

# Suppress convergence and feature names warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Step 1: Data Collection
data = pd.read_csv("/kaggle/input/spotify-eda-recommendation-system/data_by_genres_o.csv")

# Step 2: Preprocessing
data.dropna(inplace=True)

# Step 3: Convert popularity into categories (discrete classes)
data['popularity_category'] = pd.cut(data['popularity'], bins=3, labels=['low', 'medium', 'high'])

# Step 4: Define features and target variable
features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
target = 'popularity_category'  # Adjusted target variable

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Step 6: Model Training
model = LogisticRegression(max_iter=100)
model.fit(X_train, y_train)

# Step 7: Model Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Step 8: Personalized Song Recommendation
def get_user_features(user_id):
    # Implement based on user data retrieval mechanism
    # For demonstration, return default user features
    return [0.6, 0.7, -8.0, 0.1, 0.3, 0.0, 0.2, 0.5, 120.0]

def recommend_songs(predicted_likelihood, genres_column):
    # Check if lengths match
    if len(predicted_likelihood) != len(genres_column):
        raise ValueError("Length of predicted_likelihood does not match length of genres_column")

    # Combine predicted_likelihood and genres_column into a DataFrame
    recommendation_df = pd.DataFrame({
        'likelihood': predicted_likelihood,
        'genres': genres_column
    })

    # Sort the DataFrame by likelihood in descending order
    recommendation_df = recommendation_df.sort_values(by='likelihood', ascending=False)

    # Return the top song from each genre as recommendations
    return recommendation_df['genres'].tolist()

# Step 9: Make personalized song recommendation
# Make personalized song recommendation
user_features = get_user_features("user123")  # Assuming user ID is known
predicted_likelihoods = model.predict_proba([user_features])[0]

# Recommend songs
recommended_songs = recommend_songs(predicted_likelihoods, data['genres'].head(len(predicted_likelihoods)))
print("Recommended Songs by Genre:", recommended_songs)
print("Shape of user_features:", len(user_features))


Accuracy: 0.6504201680672269
Recommended Songs by Genre: ['8-bit', '21st century classical', '432hz']
Shape of user_features: 9
