## <center>Artists</center>

In [None]:
# Importing modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Connecting the JSON file to Pandas
df = pd.read_json('AccountData/StreamingHistory0.json')
df.to_csv('SpotifyStreamData.csv')
df

In [None]:
# Cleaning the time column to the correct format.
df['playTime'] = pd.to_datetime(df['endTime'])
df['year'] = pd.DatetimeIndex(df['playTime']).year
df['month'] = pd.DatetimeIndex(df['playTime']).month
df['day'] = pd.DatetimeIndex(df['playTime']).day
df['weekday'] = pd.DatetimeIndex(df['playTime']).weekday
df['time'] = pd.DatetimeIndex(df['playTime']).time
df['hours'] = pd.DatetimeIndex(df['playTime']).hour
df['day-name'] = df['playTime'].apply(lambda x: x.day_name())
df['artists_count'] = 1
df['songs_count'] = 1
df['timePlayed'] = pd.to_timedelta(df['msPlayed'], unit='ms')
df['ListenTimeHRS'] = df['timePlayed'].apply(lambda x: x.seconds/3600).round(3)
df['ListenTimeMNTS'] = df['timePlayed'].apply(lambda x: (x.seconds/60)%60).round(3)
df.drop(columns=['endTime', 'timePlayed', 'msPlayed'], inplace=True)

In [None]:
df

In [None]:
# Removing music before 1st Jan 2023
df['playTime'] = pd.to_datetime(df['playTime'])
lower_bound = pd.to_datetime('2022-12-31')
upper_bound = pd.to_datetime('2023-12-01')
artists_df = df[(df['playTime'] > lower_bound) & (df['playTime'] < upper_bound)].reset_index().drop(columns=['index'])
artists_df

In [None]:
# Checking unique values
artists_df.nunique()

In [None]:
# Calculating the unique artists
total_artists = artists_df['artistName'].count().sum()
unique_artist = artists_df['artistName'].nunique()
total_artists, unique_artist

In [None]:
# Percentage of unique artists
uniqueArtistList = np.array([unique_artist, total_artists - unique_artist])
unique_artistLab = ['Unique Artists', 'Non-Unique Artists']
colors = ['#ADFF2F', '#FFA500']
explode = (0.05, 0.05)
plt.pie(uniqueArtistList, colors=colors, labels=unique_artistLab, autopct='%1.1f%%', pctdistance=0.85, explode=explode, startangle=180)
centre_circle = plt.Circle((0, 0), 0.7, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.title('Percentage of Unique Artists')
plt.savefig('unique_artists.png')
plt.show()

In [None]:
# Grouping the artists and sorting them for top 10 artists
top10ArtistsTimeDF = artists_df.groupby(['artistName'])[['ListenTimeHRS', 'ListenTimeMNTS', 'artists_count']].sum().sort_values(by='ListenTimeMNTS', ascending=False).reset_index().head(10)
top10ArtistsTimeDF

In [None]:
# Graph for top 10 artists
sns.set(style="whitegrid")

# Create a bar chart
plt.figure(figsize=(10, 6))
bar_chart = sns.barplot(x='artistName', y='ListenTimeHRS', data=top10ArtistsTimeDF, palette='viridis')
bar_chart.set_xticklabels(bar_chart.get_xticklabels(), rotation=45, horizontalalignment='right')
bar_chart.set_title('Listen Time in Hours by Artist')
bar_chart.set_xlabel('Artist Name')
bar_chart.set_ylabel('Listen Time (Hours)')

# Save the graph to a PNG file
plt.savefig('listen_time_bar_chart.png')

# Display the chart
plt.show()