## 1. Inicial Adjusts

### 1.1 Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import statsmodels.api as sm
import networkx as nx
from textblob import TextBlob

### 1.2 Loading Data

In [None]:
import pandas as pd

# Loading the CSV file with thw encoding
df_spotify = pd.read_csv('/kaggle/input/top-spotify-songs-2023/spotify-2023.csv', encoding='ISO-8859-1')

# View the first few rows of the DataFrame
df_spotify.head()

### 1.3 Initial Definitions

In [None]:
# Define custom colors
colors = ['skyblue', 'lightcoral', 'lightgreen', 'gold', 'lightsalmon', 'lightseagreen', 'lightsteelblue', 'palevioletred', 'lightcyan', 'lightpink']

## 2. Inicial Analysis

In [None]:
# statistical summary (numeric columns)
numeric_summary = df_spotify.describe(include='number')

numeric_summary

In [None]:
# Check the DataFrame information
df_spotify.info()

In [None]:
# Check for duplicated rows in the entire DataFrame
are_there_duplicates = df_spotify.duplicated().any()

# Display the result
if are_there_duplicates:
    print("There are duplicated rows in the DataFrame.")
else:
    print("There are no duplicated rows in the DataFrame.")

## 3. Artist 

### 3.1 Artists who have released solo music

Identify and visualize top artists who have released music as solo artists

In [None]:
# Count the number of songs for each artist when artist_count is 1
solo_artist_counts = df_spotify[df_spotify['artist_count'] == 1]['artist(s)_name'].str.split(', ').explode().value_counts()

# Get the top 10 solo artists and sort them by the number of songs in descending order
top_10_solo_artists = solo_artist_counts.head(25).sort_values(ascending=False)

# Plot a bar chart for the top 25 solo artists with the blue palette
plt.figure(figsize=(12, 6))
top_10_solo_artists.plot(kind='bar', color=colors)
plt.xlabel('Artists')
plt.ylabel('Number of Songs')
plt.title('Top 25 Solo Artists with the Most Songs')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
## Check the Artist

# Define the artist name you want to search for
artist_name_to_search = "Taylor Swift" # Change the Artist Name

# Filter rows where the artist(s)_name column contains the specified artist name
# and artist_count is equal to 1
matching_songs = df_spotify[(df_spotify['artist(s)_name'].str.contains(artist_name_to_search, case=False, na=False)) &
                            (df_spotify['artist_count'] == 1)]

# Sort the matching songs by track name in alphabetical order
matching_songs = matching_songs.sort_values(by='track_name')

# Display the number of matching songs
print(f"{artist_name_to_search} has song(s): {len(matching_songs)}")

# Display the names of the matching songs in alphabetical order
print("\nSong Names:")
for song_name in matching_songs['track_name']:
    print(song_name)

### 3.2 Artist Collaboration

Determine which artists have collaborated the most with other artists and analyze how these collaborations impact the popularity of songs. Explore the diversity of collaborations and whether certain artist pairs are particularly successful.

In [None]:
# Filter rows where artist_count is greater than 1
collaborative_songs = df_spotify[df_spotify['artist_count'] > 1].copy()  # Make a copy to avoid SettingWithCopyWarning

# Split the 'artist(s)_name' column into multiple artists
collaborative_songs['artists'] = collaborative_songs['artist(s)_name'].str.split(',')

# Create a list of all collaborating artists
all_collaborating_artists = collaborative_songs['artists'].explode()

# Count how many times each artist has collaborated
collaboration_counts = all_collaborating_artists.value_counts()

# Visualize the top 25 collaborating artists
top_10_collaborators = collaboration_counts.head(25)

# Create a bar chart to show the top 10
plt.figure(figsize=(12, 6))
top_10_collaborators.plot(kind='bar', color=colors)
plt.xlabel('Artists')
plt.ylabel('Number of Collaborations')
plt.title('Top 25 Collaborating Artists')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
## Check the Artist

# Define the artist name you want to search for
artist_name_to_search = "Drake"  # Change the Artist Name

# Filter rows where the artist(s)_name column contains the specified artist name
# and artist_count is equal to 1
matching_songs = df_spotify[(df_spotify['artist(s)_name'].str.contains(artist_name_to_search, case=False, na=False)) &
                            (df_spotify['artist_count'] > 1)]

# Sort the matching songs by track name in alphabetical order
matching_songs = matching_songs.sort_values(by='track_name')

# Display the number of matching songs
print(f"{artist_name_to_search} has song(s): {len(matching_songs)}")

# Display the names of the matching songs in alphabetical order
print("\nSong Names:")
for song_name in matching_songs['track_name']:
    print(song_name)

In [None]:
# Filter rows where artist_count is greater than 1 (collaborations)
collaborative = df_spotify[df_spotify['artist_count'] > 1].copy()

# Check for duplicate artist names
duplicate_artist_counts = collaborative['artist(s)_name'].value_counts()

# Get the top 10 duplicate artist names
top_10_duplicate_artists = duplicate_artist_counts.head(10)

# Plot the top 10 duplicate artist names
plt.figure(figsize=(12, 6))
top_10_duplicate_artists.plot(kind='bar', color=colors)
plt.xlabel('Artists')
plt.ylabel('Songs')
plt.title('Top 10 Featured')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
## Check the Featured

# Define the artist name you want to search for
artist_name_to_search = "Drake, 21 Savage" # Change the Artists Name

# Filter rows where the artist(s)_name column contains the specified artist name
# and artist_count is equal to 1
matching_songs = df_spotify[(df_spotify['artist(s)_name'].str.contains(artist_name_to_search, case=False, na=False)) &
                            (df_spotify['artist_count'] > 1)]

# Sort the matching songs by track name in alphabetical order
matching_songs = matching_songs.sort_values(by='track_name')

# Display the number of matching songs
print(f"{artist_name_to_search} has song(s): {len(matching_songs)}")

# Display the names of the matching songs in alphabetical order
print("\nSong Names:")
for song_name in matching_songs['track_name']:
    print(song_name)

### 3. Most Prolific Artists
Identify the artists who have the most songs in the dataset, considering both their solo songs and their featured appearances in other songs. This can give insights into which artists are the most active in the music industry.

In [None]:
# Combine solo and collaboration counts for each artist
total_counts = solo_artist_counts.add(collaboration_counts, fill_value=0)

# Visualize the top 25 influential artists
top_10_influential = total_counts.sort_values(ascending=False).head(25)

# Create a bar chart to show the top 25 influential artists
plt.figure(figsize=(12, 6))
top_10_influential.plot(kind='bar', color=colors)
plt.xlabel('Artists')
plt.ylabel('Total Influence (Solo + Collaborations)')
plt.title('Top 25 Influential Artists')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()