# Pandas training

### imports

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
base_url = "https://gist.githubusercontent.com/alti-tude/710cb9d4dfc7ebcd0afb9cf93d8f6a8d/raw/574730ba009e69b81d6c79f2fef2c3dd5145db0a/"

### dataframe basics

In [5]:
data = {
    "apples": [3, 2, 0, 1],
    "oranges": [0, 3, 7, 2],
}

purchases = pd.DataFrame(data)

purchases

Unnamed: 0,apples,oranges
0,3,0
1,2,3
2,0,7
3,1,2


In [6]:
purchases = pd.DataFrame(data, index=["June", "Robert", "Lily", "David"])
purchases

Unnamed: 0,apples,oranges
June,3,0
Robert,2,3
Lily,0,7
David,1,2


In [7]:
purchases.loc["June"]

apples     3
oranges    0
Name: June, dtype: int64

### reading files (or urls)

In [10]:
df = pd.read_csv(f"{base_url}purchases.csv", index_col=0)

df

Unnamed: 0,apples,oranges
June,3,0
Robert,2,3
Lily,0,7
David,1,2


In [11]:
df = pd.read_json(f"{base_url}purchases.json")

df

Unnamed: 0,apples,oranges
June,3,0
Robert,2,3
Lily,0,7
David,1,2


In [15]:
movies_df = pd.read_csv(f"{base_url}IMDB-Movie-Data.csv", index_col="Title")

movies_df.head()

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0
Suicide Squad,5,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0


In [17]:
movies_df.tail(10)

Unnamed: 0_level_0,Rank,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Underworld: Rise of the Lycans,991,"Action,Adventure,Fantasy",An origins story centered on the centuries-old...,Patrick Tatopoulos,"Rhona Mitra, Michael Sheen, Bill Nighy, Steven...",2009,92,6.6,129708,45.8,44.0
Taare Zameen Par,992,"Drama,Family,Music",An eight-year-old boy is thought to be a lazy ...,Aamir Khan,"Darsheel Safary, Aamir Khan, Tanay Chheda, Sac...",2007,165,8.5,102697,1.2,42.0
Take Me Home Tonight,993,"Comedy,Drama,Romance","Four years after graduation, an awkward high s...",Michael Dowse,"Topher Grace, Anna Faris, Dan Fogler, Teresa P...",2011,97,6.3,45419,6.92,
Resident Evil: Afterlife,994,"Action,Adventure,Horror",While still out to destroy the evil Umbrella C...,Paul W.S. Anderson,"Milla Jovovich, Ali Larter, Wentworth Miller,K...",2010,97,5.9,140900,60.13,37.0
Project X,995,Comedy,3 high school seniors throw a birthday party t...,Nima Nourizadeh,"Thomas Mann, Oliver Cooper, Jonathan Daniel Br...",2012,88,6.7,164088,54.72,48.0
Secret in Their Eyes,996,"Crime,Drama,Mystery","A tight-knit team of rising investigators, alo...",Billy Ray,"Chiwetel Ejiofor, Nicole Kidman, Julia Roberts...",2015,111,6.2,27585,,45.0
Hostel: Part II,997,Horror,Three American college students studying abroa...,Eli Roth,"Lauren German, Heather Matarazzo, Bijou Philli...",2007,94,5.5,73152,17.54,46.0
Step Up 2: The Streets,998,"Drama,Music,Romance",Romantic sparks occur between two dance studen...,Jon M. Chu,"Robert Hoffman, Briana Evigan, Cassie Ventura,...",2008,98,6.2,70699,58.01,50.0
Search Party,999,"Adventure,Comedy",A pair of friends embark on a mission to reuni...,Scot Armstrong,"Adam Pally, T.J. Miller, Thomas Middleditch,Sh...",2014,93,5.6,4881,,22.0
Nine Lives,1000,"Comedy,Family,Fantasy",A stuffy businessman finds himself trapped ins...,Barry Sonnenfeld,"Kevin Spacey, Jennifer Garner, Robbie Amell,Ch...",2016,87,5.3,12435,19.64,11.0


In [18]:
movies_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, Guardians of the Galaxy to Nine Lives
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Rank                1000 non-null   int64  
 1   Genre               1000 non-null   object 
 2   Description         1000 non-null   object 
 3   Director            1000 non-null   object 
 4   Actors              1000 non-null   object 
 5   Year                1000 non-null   int64  
 6   Runtime (Minutes)   1000 non-null   int64  
 7   Rating              1000 non-null   float64
 8   Votes               1000 non-null   int64  
 9   Revenue (Millions)  872 non-null    float64
 10  Metascore           936 non-null    float64
dtypes: float64(3), int64(4), object(4)
memory usage: 133.8+ KB


In [19]:
movies_df.shape

(1000, 11)

### actions on dataframe

In [20]:
tmp_df = movies_df.append(movies_df)

tmp_df.shape

(2000, 11)

In [21]:
tmp_df = tmp_df.drop_duplicates()

tmp_df.shape

(1000, 11)

In [23]:
movies_df.rename(
    columns={
        "Runtime (Minutes)": "Runtime",
        "Revenue (Millions)": "Revenue",
    },
    inplace=True
)

movies_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, Guardians of the Galaxy to Nine Lives
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Rank         1000 non-null   int64  
 1   Genre        1000 non-null   object 
 2   Description  1000 non-null   object 
 3   Director     1000 non-null   object 
 4   Actors       1000 non-null   object 
 5   Year         1000 non-null   int64  
 6   Runtime      1000 non-null   int64  
 7   Rating       1000 non-null   float64
 8   Votes        1000 non-null   int64  
 9   Revenue      872 non-null    float64
 10  Metascore    936 non-null    float64
dtypes: float64(3), int64(4), object(4)
memory usage: 93.8+ KB


In [25]:
movies_df.columns = ["rank", "genre", "description", "director", "actors", "year", "runtime", "rating", "votes", "revenue", "metascore"]

movies_df.columns

Index(['rank', 'genre', 'description', 'director', 'actors', 'year', 'runtime',
       'rating', 'votes', 'revenue', 'metascore'],
      dtype='object')

### managing nulls

In [31]:
movies_df.dropna().isnull()

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Guardians of the Galaxy,False,False,False,False,False,False,False,False,False,False,False
Prometheus,False,False,False,False,False,False,False,False,False,False,False
Split,False,False,False,False,False,False,False,False,False,False,False
Sing,False,False,False,False,False,False,False,False,False,False,False
Suicide Squad,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
Resident Evil: Afterlife,False,False,False,False,False,False,False,False,False,False,False
Project X,False,False,False,False,False,False,False,False,False,False,False
Hostel: Part II,False,False,False,False,False,False,False,False,False,False,False
Step Up 2: The Streets,False,False,False,False,False,False,False,False,False,False,False


In [33]:
revenue = movies_df["revenue"]
mean_revenue = revenue.mean()
revenue.fillna(mean_revenue, inplace=True)

movies_df["revenue"] = revenue

### selecting rows

In [36]:
movies_df.loc["Prometheus"], movies_df.iloc[1]

(rank                                                           2
 genre                                   Adventure,Mystery,Sci-Fi
 description    Following clues to the origin of mankind, a te...
 director                                            Ridley Scott
 actors         Noomi Rapace, Logan Marshall-Green, Michael Fa...
 year                                                        2012
 runtime                                                      124
 rating                                                         7
 votes                                                     485820
 revenue                                                   126.46
 metascore                                                     65
 Name: Prometheus, dtype: object,
 rank                                                           2
 genre                                   Adventure,Mystery,Sci-Fi
 description    Following clues to the origin of mankind, a te...
 director                                 

In [38]:
movies_df.loc["Prometheus":"Sing"]

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0


In [40]:
movies_df.iloc[1:4]

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0


In [41]:
movies_df[movies_df["director"] == "Ridley Scott"]

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
The Martian,103,"Adventure,Drama,Sci-Fi",An astronaut becomes stranded on Mars after hi...,Ridley Scott,"Matt Damon, Jessica Chastain, Kristen Wiig, Ka...",2015,144,8.0,556097,228.43,80.0
Robin Hood,388,"Action,Adventure,Drama","In 12th century England, Robin and his band of...",Ridley Scott,"Russell Crowe, Cate Blanchett, Matthew Macfady...",2010,140,6.7,221117,105.22,53.0
American Gangster,471,"Biography,Crime,Drama","In 1970s America, a detective works to bring d...",Ridley Scott,"Denzel Washington, Russell Crowe, Chiwetel Eji...",2007,157,7.8,337835,130.13,76.0
Exodus: Gods and Kings,517,"Action,Adventure,Drama",The defiant leader Moses rises up against the ...,Ridley Scott,"Christian Bale, Joel Edgerton, Ben Kingsley, S...",2014,150,6.0,137299,65.01,52.0
The Counselor,522,"Crime,Drama,Thriller",A lawyer finds himself in over his head when h...,Ridley Scott,"Michael Fassbender, Penélope Cruz, Cameron Dia...",2013,117,5.3,84927,16.97,48.0
A Good Year,531,"Comedy,Drama,Romance",A British investment broker inherits his uncle...,Ridley Scott,"Russell Crowe, Abbie Cornish, Albert Finney, M...",2006,117,6.9,74674,7.46,47.0
Body of Lies,738,"Action,Drama,Romance",A CIA agent on the ground in Jordan hunts down...,Ridley Scott,"Leonardo DiCaprio, Russell Crowe, Mark Strong,...",2008,128,7.1,182305,39.38,57.0


In [42]:
movies_df[movies_df["rating"] >= 8.6].head(3)

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Interstellar,37,"Adventure,Drama,Sci-Fi",A team of explorers travel through a wormhole ...,Christopher Nolan,"Matthew McConaughey, Anne Hathaway, Jessica Ch...",2014,169,8.6,1047747,187.99,74.0
The Dark Knight,55,"Action,Crime,Drama",When the menace known as the Joker wreaks havo...,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart,Mi...",2008,152,9.0,1791916,533.32,82.0
Inception,81,"Action,Adventure,Sci-Fi","A thief, who steals corporate secrets through ...",Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen...",2010,148,8.8,1583625,292.57,74.0


In [46]:
movies_df[(movies_df["rating"] >= 8.6) | (movies_df["director"] == "Christopher Nolan")]

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Interstellar,37,"Adventure,Drama,Sci-Fi",A team of explorers travel through a wormhole ...,Christopher Nolan,"Matthew McConaughey, Anne Hathaway, Jessica Ch...",2014,169,8.6,1047747,187.99,74.0
The Dark Knight,55,"Action,Crime,Drama",When the menace known as the Joker wreaks havo...,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart,Mi...",2008,152,9.0,1791916,533.32,82.0
The Prestige,65,"Drama,Mystery,Sci-Fi",Two stage magicians engage in competitive one-...,Christopher Nolan,"Christian Bale, Hugh Jackman, Scarlett Johanss...",2006,130,8.5,913152,53.08,66.0
Inception,81,"Action,Adventure,Sci-Fi","A thief, who steals corporate secrets through ...",Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen...",2010,148,8.8,1583625,292.57,74.0
Kimi no na wa,97,"Animation,Drama,Fantasy",Two strangers find themselves linked in a biza...,Makoto Shinkai,"Ryûnosuke Kamiki, Mone Kamishiraishi, Ryô Nari...",2016,106,8.6,34110,4.68,79.0
Dangal,118,"Action,Biography,Drama",Former wrestler Mahavir Singh Phogat and his t...,Nitesh Tiwari,"Aamir Khan, Sakshi Tanwar, Fatima Sana Shaikh,...",2016,161,8.8,48969,11.15,
The Dark Knight Rises,125,"Action,Thriller",Eight years after the Joker's reign of anarchy...,Christopher Nolan,"Christian Bale, Tom Hardy, Anne Hathaway,Gary ...",2012,164,8.5,1222645,448.13,78.0
The Intouchables,250,"Biography,Comedy,Drama",After he becomes a quadriplegic from a paragli...,Olivier Nakache,"François Cluzet, Omar Sy, Anne Le Ny, Audrey F...",2011,112,8.6,557965,13.18,57.0


### applying functions

In [48]:
label = lambda x: "good" if x >= 8.0 else "bad"
movies_df["rating_category"] = movies_df["rating"].apply(label)

movies_df.head()

Unnamed: 0_level_0,rank,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore,rating_category
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Guardians of the Galaxy,1,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0,good
Prometheus,2,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0,bad
Split,3,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0,bad
Sing,4,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0,bad
Suicide Squad,5,"Action,Adventure,Fantasy",A secret government agency recruits some of th...,David Ayer,"Will Smith, Jared Leto, Margot Robbie, Viola D...",2016,123,6.2,393727,325.02,40.0,bad


## Task

In [72]:
music_df = pd.read_csv(f"{base_url}MusicTop50.csv", index_col=0)

music_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 1 to 50
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Track.Name          50 non-null     object 
 1   Artist.Name         50 non-null     object 
 2   Genre               50 non-null     object 
 3   Beats.Per.Minute    48 non-null     float64
 4   Energy              49 non-null     float64
 5   Danceability..db..  50 non-null     int64  
 6   Loudness..          50 non-null     int64  
 7   Liveness.           48 non-null     float64
 8   Valence ()          49 non-null     float64
 9   Length              47 non-null     float64
 10  Acousticness        50 non-null     int64  
 11  Speechiness         50 non-null     int64  
 12  Popularity          49 non-null     float64
dtypes: float64(6), int64(4), object(3)
memory usage: 5.5+ KB


In [73]:
music_df.head()

Unnamed: 0,Track.Name,Artist.Name,Genre,Beats.Per.Minute,Energy,Danceability..db..,Loudness..,Liveness.,Valence (),Length,Acousticness,Speechiness,Popularity
1,Se�orita,Shawn Mendes,canadian pop,117.0,55.0,76,-6,8.0,75.0,191.0,4,3,79.0
2,China,Anuel AA,reggaeton flow,105.0,81.0,79,-4,8.0,61.0,302.0,8,9,92.0
3,boyfriend (with Social House),Ariana Grande,dance pop,190.0,80.0,40,-4,16.0,70.0,186.0,12,46,85.0
4,Beautiful People (feat. Khalid),Ed Sheeran,pop,93.0,65.0,64,-8,8.0,55.0,198.0,12,19,86.0
5,Goodbyes (Feat. Young Thug),Post Malone,dfw rap,,65.0,58,-4,11.0,18.0,175.0,45,7,94.0


In [74]:
music_df.tail()

Unnamed: 0,Track.Name,Artist.Name,Genre,Beats.Per.Minute,Energy,Danceability..db..,Loudness..,Liveness.,Valence (),Length,Acousticness,Speechiness,Popularity
46,One Thing Right,Marshmello,brostep,88.0,62.0,66,-2,,44.0,182.0,7,5,88.0
47,Te Robar�,Nicky Jam,latin,176.0,75.0,67,-4,8.0,80.0,202.0,24,6,88.0
48,Happier,Marshmello,brostep,100.0,79.0,69,-3,17.0,67.0,214.0,19,5,88.0
49,Call You Mine,The Chainsmokers,edm,104.0,70.0,59,-6,41.0,50.0,218.0,23,3,88.0
50,Cross Me (feat. Chance the Rapper & PnB Rock),Ed Sheeran,pop,95.0,79.0,75,-6,7.0,61.0,206.0,21,12,82.0


In [75]:
music_df.columns = ["name", "artist", "genre", "bpm", "energy", "dance", "loudness", "liveness", "valence", "length", "acousticness", "speechiness", "popularity"]

In [77]:
music_df_dropna = music_df.dropna()

music_df_dropna.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 1 to 50
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          41 non-null     object 
 1   artist        41 non-null     object 
 2   genre         41 non-null     object 
 3   bpm           41 non-null     float64
 4   energy        41 non-null     float64
 5   dance         41 non-null     int64  
 6   loudness      41 non-null     int64  
 7   liveness      41 non-null     float64
 8   valence       41 non-null     float64
 9   length        41 non-null     float64
 10  acousticness  41 non-null     int64  
 11  speechiness   41 non-null     int64  
 12  popularity    41 non-null     float64
dtypes: float64(6), int64(4), object(3)
memory usage: 4.5+ KB


In [78]:
music_df_fill_mean = music_df.fillna(music_df.mean())

music_df_fill_mean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 1 to 50
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          50 non-null     object 
 1   artist        50 non-null     object 
 2   genre         50 non-null     object 
 3   bpm           50 non-null     float64
 4   energy        50 non-null     float64
 5   dance         50 non-null     int64  
 6   loudness      50 non-null     int64  
 7   liveness      50 non-null     float64
 8   valence       50 non-null     float64
 9   length        50 non-null     float64
 10  acousticness  50 non-null     int64  
 11  speechiness   50 non-null     int64  
 12  popularity    50 non-null     float64
dtypes: float64(6), int64(4), object(3)
memory usage: 5.5+ KB


In [79]:
music_df_fill_median = music_df.fillna(music_df.median())

music_df_fill_median.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 1 to 50
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          50 non-null     object 
 1   artist        50 non-null     object 
 2   genre         50 non-null     object 
 3   bpm           50 non-null     float64
 4   energy        50 non-null     float64
 5   dance         50 non-null     int64  
 6   loudness      50 non-null     int64  
 7   liveness      50 non-null     float64
 8   valence       50 non-null     float64
 9   length        50 non-null     float64
 10  acousticness  50 non-null     int64  
 11  speechiness   50 non-null     int64  
 12  popularity    50 non-null     float64
dtypes: float64(6), int64(4), object(3)
memory usage: 5.5+ KB


In [113]:
least_popular = music_df_fill_mean[music_df_fill_mean.popularity == music_df_fill_mean.popularity.min()].iloc[0]

least_popular.artist

'Shawn Mendes'

In [114]:
artist_songs = music_df_fill_mean[music_df_fill_mean.artist == least_popular.artist]

loudest = artist_songs[artist_songs.loudness == artist_songs.loudness.max()]

loudest

Unnamed: 0,name,artist,genre,bpm,energy,dance,loudness,liveness,valence,length,acousticness,speechiness,popularity
26,If I Can't Have You,Shawn Mendes,canadian pop,119.354167,82.0,69,-4,13.791667,87.0,191.0,49,6,70.0


## Plotting

In [115]:
df = music_df_fill_mean

In [116]:
fig = go.Figure()

loud_sort = df.sort_values(by="loudness")

fig.add_trace(go.Scatter(x=loud_sort.loudness, y=loud_sort.energy, name="energy"))
fig.add_trace(go.Scatter(x=loud_sort.loudness, y=loud_sort.bpm, name="bpm"))

In [134]:
names, counts = np.unique(df.artist, return_counts=True)

df2 = pd.DataFrame.from_dict(dict(zip(names, counts)), orient="index").reset_index()
df2.columns = ["name", "counts"]
df2.head()

Unnamed: 0,name,counts
0,Ali Gatie,1
1,Anuel AA,1
2,Ariana Grande,2
3,Bad Bunny,1
4,Billie Eilish,2


In [136]:
fig = go.Figure()

fig.add_trace(go.Bar(x=df2.name, y=df2.counts))