### Loading and inspecting the data.

In [3]:
#Importing pandas as pd
import pandas as pd

In [11]:
#Reading csv file
filepath = "DataFiles/songs.csv"
df = pd.read_csv(filepath)

In [13]:
#Inspecting data
df.head()

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration,Popularity
0,Include name this.,Patrick Anderson,Care.,R&B,2008-01-09,262,71
1,Manage west energy.,Eric Miller,Raise get.,Jazz,2011-08-20,187,37
2,Evening court painting.,Richard Curry,Sport.,Electronic,2010-05-30,212,58
3,Section turn hour.,James Smith,Full.,Hip-Hop,2014-10-12,272,59
4,Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34


In [15]:
df.shape

(50000, 7)

In [19]:
df = df.rename(columns={"Duration":"Duration in Minutes"})

In [21]:
df

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity
0,Include name this.,Patrick Anderson,Care.,R&B,2008-01-09,262,71
1,Manage west energy.,Eric Miller,Raise get.,Jazz,2011-08-20,187,37
2,Evening court painting.,Richard Curry,Sport.,Electronic,2010-05-30,212,58
3,Section turn hour.,James Smith,Full.,Hip-Hop,2014-10-12,272,59
4,Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34
...,...,...,...,...,...,...,...
49995,Thing.,Brandon Thomas,Lawyer sign.,Rock,2008-06-07,300,95
49996,Option.,Barry White,Cold.,Pop,2016-09-06,220,23
49997,Ability ever.,Leah Obrien,Trade.,Blues,2013-10-16,257,79
49998,Occur claim who.,Ashley James,Explain moment.,Jazz,2018-03-06,262,22


### Cleaning and preparing the data.

In [25]:
df.isnull().sum()

Title                  0
Artist                 0
Album                  0
Genre                  0
Release Date           0
Duration in Minutes    0
Popularity             0
dtype: int64

In [31]:
df[df.duplicated()]

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity


In [37]:
q3 = df["Duration in Minutes"].quantile(0.75)
q1 = df["Duration in Minutes"].quantile(0.25)

iqr = q3-q1

upper_bound=q3+1.5*iqr
lower_bound = q1-1.5*iqr

print(upper_bound)
print(lower_bound)

391.5
27.5


In [43]:
df=df[(df['Duration in Minutes'] >= lower_bound) & (df['Duration in Minutes'] <= upper_bound)]

In [45]:
df.shape

(50000, 7)

In [47]:
q3 = df["Popularity"].quantile(0.75)
q1 = df["Popularity"].quantile(0.25)

iqr = q3-q1

upper_bound=q3+1.5*iqr
lower_bound = q1-1.5*iqr

print(upper_bound)
print(lower_bound)

152.5
-51.5


In [51]:
df=df[(df['Popularity'] >= lower_bound) & (df['Popularity'] <= upper_bound)]

In [53]:
df.shape

(50000, 7)

### Performing data manipulation.

In [56]:
df.head()

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity
0,Include name this.,Patrick Anderson,Care.,R&B,2008-01-09,262,71
1,Manage west energy.,Eric Miller,Raise get.,Jazz,2011-08-20,187,37
2,Evening court painting.,Richard Curry,Sport.,Electronic,2010-05-30,212,58
3,Section turn hour.,James Smith,Full.,Hip-Hop,2014-10-12,272,59
4,Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34


In [110]:
df.groupby("Genre")["Album"].count()

Genre
Blues         5101
Classical     4972
Country       5176
Electronic    4968
Hip-Hop       5033
Jazz          4939
Pop           5026
R&B           4973
Reggae        4946
Rock          4866
Name: Album, dtype: int64

In [148]:
df['Release Date'] = pd.to_datetime(df['Release Date'])

In [166]:
# Define the date you want to filter by
date_filter = pd.to_datetime("2010-12-31")

# Filter the DataFrame
First_Decade = df[df['Release Date'] <= date_filter]
First_Decade

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity
0,Include name this.,Patrick Anderson,Care.,R&B,2008-01-09,262,71
2,Evening court painting.,Richard Curry,Sport.,Electronic,2010-05-30,212,58
4,Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34
5,Turn child.,Jessica Martin,Cold according.,R&B,2006-09-16,207,58
6,Old.,Cheyenne Powell,Oil.,Country,2010-04-23,163,72
...,...,...,...,...,...,...,...
49986,Fly live.,Katie Rodriguez,Guess sometimes.,Rock,2007-01-17,299,4
49990,Land recent bank.,Christopher Silva,Long.,Pop,2009-08-06,163,2
49993,By population message.,Madeline Dickson,Mean.,Classical,2005-02-13,142,59
49995,Thing.,Brandon Thomas,Lawyer sign.,Rock,2008-06-07,300,95


In [168]:
# Which genre had the highest count between 2000 to 2010(first_decade)

First_Decade.groupby("Genre")["Release Date"].count()

Genre
Blues         2762
Classical     2578
Country       2706
Electronic    2600
Hip-Hop       2660
Jazz          2564
Pop           2632
R&B           2608
Reggae        2519
Rock          2580
Name: Release Date, dtype: int64

In [184]:
#Total count for the first_decade
First_Decade_Count=First_Decade["Genre"].count()

In [172]:
# Define the date you want to filter by
date_filter = pd.to_datetime("2020-12-31")

# Filter the DataFrame
Second_Decade = df[df['Release Date'] <= date_filter]
Second_Decade

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity
0,Include name this.,Patrick Anderson,Care.,R&B,2008-01-09,262,71
1,Manage west energy.,Eric Miller,Raise get.,Jazz,2011-08-20,187,37
2,Evening court painting.,Richard Curry,Sport.,Electronic,2010-05-30,212,58
3,Section turn hour.,James Smith,Full.,Hip-Hop,2014-10-12,272,59
4,Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34
...,...,...,...,...,...,...,...
49995,Thing.,Brandon Thomas,Lawyer sign.,Rock,2008-06-07,300,95
49996,Option.,Barry White,Cold.,Pop,2016-09-06,220,23
49997,Ability ever.,Leah Obrien,Trade.,Blues,2013-10-16,257,79
49998,Occur claim who.,Ashley James,Explain moment.,Jazz,2018-03-06,262,22


In [176]:
# Which genre had the highest count between 2011 to 2020(second_decade)

Second_Decade.groupby("Genre")["Release Date"].count()

Genre
Blues         5101
Classical     4972
Country       5176
Electronic    4968
Hip-Hop       5033
Jazz          4939
Pop           5026
R&B           4973
Reggae        4946
Rock          4866
Name: Release Date, dtype: int64

In [182]:
#Total count for the second_decade
Second_Decade_Count=Second_Decade["Genre"].count()

In [190]:
Percentage_Increase_between_decades= ((Second_Decade_Count-First_Decade_Count)/First_Decade_Count)*100
print(First_Decade_Count)
print(Second_Decade_Count)
print(Percentage_Increase_between_decades)

26209
50000
90.77416154756


### Summary for the first and second decade

In [199]:
First_Decade_Group=First_Decade.groupby("Genre")["Popularity"].sum()

Genre
Blues         136816
Classical     130989
Country       134882
Electronic    129253
Hip-Hop       132678
Jazz          129983
Pop           133671
R&B           132582
Reggae        128671
Rock          129652
Name: Popularity, dtype: int64

In [116]:
Blues

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity
4,Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34
9,Face become we.,Raymond White,Probably camera.,Blues,2011-11-07,177,55
12,Business research.,Michael Glass,Speak.,Blues,2019-10-12,145,92
42,Election paper.,Jon Powers,Hold offer.,Blues,2012-01-11,281,80
55,Participant join tend.,Dalton Wyatt,Property serve.,Blues,2009-12-04,239,92
...,...,...,...,...,...,...,...
49921,Herself western.,Jessica Livingston,Trial.,Blues,2012-11-11,230,43
49943,Firm name back.,Joy Murillo,Strong.,Blues,2012-11-08,207,6
49957,Foot public break.,Kimberly Duran,Third administration.,Blues,2008-01-08,278,14
49982,Move.,Jamie Sims,What.,Blues,2001-11-12,138,74


In [128]:
Blues_Popularity=Blues.sort_values(by = "Popularity" ,ascending=False)

In [136]:
Blues_Popularity.head(20)

Unnamed: 0,Title,Artist,Album,Genre,Release Date,Duration in Minutes,Popularity
4312,Force finally religious.,John Campos,Center.,Blues,2006-04-27,295,100
30570,Manager do town.,Kevin Blevins,Series sort.,Blues,2013-01-05,229,100
5777,After thing wrong.,Raymond Wilkins,Especially.,Blues,2003-08-09,148,100
49107,Effort finally common food.,Angela Murphy,Left offer.,Blues,2013-10-20,291,100
15759,Water service.,Reginald Anderson,Positive.,Blues,2020-09-06,247,100
30358,Receive.,Robert Dominguez,Class.,Blues,2020-12-26,285,100
23997,Key seven safe.,Stephanie Robinson,Doctor million.,Blues,2010-07-22,271,100
13941,Tell my.,Jacqueline Long,Film teach.,Blues,2014-09-29,177,100
24747,Various opportunity cost.,Jon Michael,Card show.,Blues,2020-02-05,201,100
34519,Woman still.,Christopher Figueroa,Million already.,Blues,2013-11-11,159,100


In [140]:
df["Release Date"].max()

'2020-12-30'