<a href="https://colab.research.google.com/github/darapanenichandana/my-app/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode
import seaborn as sns
import datetime as dt
import warnings
import plotly.io as pio
import os
print(os.listdir())  # Check current directory files


warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
init_notebook_mode(connected=True)
pio.renderers.default = "colab"

df = pd.read_csv('/content/singerstable.csv')


df.head()
df.info()
print(df.isnull().sum())
print(df.duplicated().value_counts())

df.drop_duplicates(inplace=True)
print(df.shape)
print(df.describe())


numeric_df = df.select_dtypes(include=['number'])
fig = px.imshow(numeric_df.corr(), text_auto=True, height=800, width=800,
                color_continuous_scale=px.colors.sequential.Greens,
                aspect='auto', title='<b>Pairwise correlation of columns</b>')
fig.update_layout(title_x=0.5)
fig.write_image("correlation_heatmap.png")
fig.show()


fig = px.area(df.groupby('year', as_index=False)['song'].count().sort_values(by='year'),
              x='year', y='song', labels={'song': 'Total Songs'},
              color_discrete_sequence=['green'],
              title='<b>Year by Year Songs Collection</b>')
fig.update_layout(hovermode='x', title_x=0.5)
fig.write_image("yearly_songs.png")
fig.show()

fig = px.histogram(df.groupby('genre', as_index=False).count().sort_values(by='song', ascending=False),
                   x='genre', y='song', color_discrete_sequence=['green'],
                   template='plotly_dark', marginal='box',
                   title='<b>Total Songs Based on Genres</b>')
fig.update_layout(title_x=0.5)
fig.write_image("genre_songs.png")
fig.show()


fig = px.histogram(df.groupby('genre', as_index=False).sum().sort_values(by='popularity', ascending=False),
                   x='genre', y='popularity', color_discrete_sequence=['lightgreen'],
                   template='plotly_dark', marginal='box',
                   title='<b>Popular Genres Based on Popularity</b>')
fig.update_layout(title_x=0.5)
fig.write_image("popular_genres.png")
fig.show()


fig = px.bar(df.groupby('artist', as_index=False).count().sort_values(by='song', ascending=False).head(50),
             x='artist', y='song', labels={'song': 'Total Songs'},
             width=1000, color_discrete_sequence=['green'], text='song',
             title='<b>List of Songs Recorded by Each Singer</b>')
fig.write_image("songs_by_singer.png")
fig.show()


fig = px.bar(df.groupby('artist', as_index=False).sum().sort_values(by='popularity', ascending=False).head(30),
             x='artist', y='popularity', color_discrete_sequence=['lightgreen'],
             template='plotly_dark', text='popularity',
             title='<b>Top 30 Popular Singers</b>')
fig.write_image("top_singers.png")
fig.show()


fig = px.line(df.sort_values(by='popularity', ascending=False).head(25),
              x='song', y='popularity', hover_data=['artist'],
              color_discrete_sequence=['green'], markers=True,
              title='<b>Top 25 Songs on Spotify</b>')
fig.write_image("top_songs.png")
fig.show()


df_clean = df.dropna(subset=['artist', 'genre', 'song', 'popularity'])
df_clean = df_clean[df_clean['popularity'].apply(lambda x: isinstance(x, (int, float)))]
fig = px.treemap(df_clean, path=['artist', 'genre', 'song'], values='popularity',
                 title='<b>TreeMap of Singers Playlist</b>')
fig.update_traces(root_color='lightgreen')
fig.update_layout(title_x=0.5)
fig.write_image("treemap_singers.png")
fig.show()

fig = px.pie(df.groupby('explicit', as_index=False).count().sort_values(by='song', ascending=False),
             names='explicit', values='song', labels={'song': 'Total Songs'}, hole=.6,
             color_discrete_sequence=['green', 'crimson'], template='plotly_dark',
             title='<b>Songs Having Explicit Content</b>')
fig.update_layout(title_x=0.5)
fig.write_image("explicit_content_pie.png")
fig.show()


fig = px.area(df[df['explicit'] == True].groupby('year', as_index=False).count().sort_values(by='year'),
              x='year', y='song', labels={'song': 'Total Songs'},
              markers=True, color_discrete_sequence=['red'],
              template='plotly_dark', title='<b>Yearwise Explicit Content Songs</b>')
fig.update_layout(hovermode='x')
fig.write_image("explicit_content_yearly.png")
fig.show()


fig = px.scatter(df, x='tempo', y='popularity', color='tempo',
                 color_continuous_scale=px.colors.sequential.Plasma,
                 template='plotly_dark', title='<b>Tempo Versus Popularity</b>')
fig.write_image("tempo_vs_popularity.png")
fig.show()


fig = px.scatter(df, x='speechiness', y='popularity', color='speechiness',
                 color_continuous_scale=px.colors.sequential.Plasma,
                 template='plotly_dark', title='<b>Speechiness Versus Popularity</b>')
fig.write_image("speechiness_vs_popularity.png")
fig.show()


fig = px.scatter(df, x='energy', y='danceability', color='danceability',
                 color_continuous_scale=px.colors.sequential.Plotly3,
                 template='plotly_dark', title='<b>Energy Versus Danceability</b>')
fig.write_image("energy_vs_danceability.png")
fig.show()

fig = px.scatter(df, x='energy', y='loudness', color_discrete_sequence=['lightgreen'],
                 template='plotly_dark', title='<b>Energy Versus Loudness Correlation</b>')
fig.write_image("energy_vs_loudness.png")
fig.show()

['.config', 'energy_vs_loudness.png', 'top_singers.png', 'singerstable.csv', 'energy_vs_danceability.png', 'explicit_content_pie.png', 'songs_by_singer.png', 'yearly_songs.png', 'treemap_singers.png', 'correlation_heatmap.png', 'genre_songs.png', 'top_songs.png', 'explicit_content_yearly.png', 'tempo_vs_popularity.png', 'speechiness_vs_popularity.png', 'popular_genres.png', 'sample_data']


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        5 non-null      float64
 1   artist            5 non-null      object 
 2   song              5 non-null      object 
 3   duration_ms       5 non-null      float64
 4   explicit          5 non-null      object 
 5   year              5 non-null      float64
 6   popularity        5 non-null      float64
 7   danceability      5 non-null      float64
 8   energy            5 non-null      float64
 9   key               5 non-null      float64
 10  loudness          5 non-null      float64
 11  mode              5 non-null      float64
 12  speechiness       5 non-null      float64
 13  acousticness      5 non-null      float64
 14  instrumentalness  5 non-null      float64
 15  liveness          5 non-null      float64
 16  valence           5 non-null      float64
 17  t