In [3]:
import pandas as pd

# Load the dataset from the given URL
url = 'https://www.officialcharts.com/chart-news/the-best-selling-albums-of-all-time-on-the-official-uk-chart__15551/'
data = pd.read_html(url, header=0)
df = data[0]
df

Unnamed: 0,POS,TITLE,ARTIST,YEAR,HIGH POSN
0,1,GREATEST HITS,QUEEN,1981,1
1,2,GOLD - GREATEST HITS,ABBA,1992,1
2,3,SGT PEPPER'S LONELY HEARTS CLUB BAND,BEATLES,1967,1
3,4,21,ADELE,2011,1
4,5,WHAT'S THE STORY MORNING GLORY,OASIS,1995,1
5,6,THRILLER,MICHAEL JACKSON,1982,1
6,7,THE DARK SIDE OF THE MOON,PINK FLOYD,1973,2
7,8,BROTHERS IN ARMS,DIRE STRAITS,1985,1
8,9,BAD,MICHAEL JACKSON,1987,1
9,10,GREATEST HITS II,QUEEN,1991,1


In [5]:
# Rename the columns
df.columns = ['POZ','TYTUŁ', 'ARTYSTA', 'ROK', 'MAX POZ']
df



Unnamed: 0,POZ,TYTUŁ,ARTYSTA,ROK,MAX POZ
0,1,GREATEST HITS,QUEEN,1981,1
1,2,GOLD - GREATEST HITS,ABBA,1992,1
2,3,SGT PEPPER'S LONELY HEARTS CLUB BAND,BEATLES,1967,1
3,4,21,ADELE,2011,1
4,5,WHAT'S THE STORY MORNING GLORY,OASIS,1995,1
5,6,THRILLER,MICHAEL JACKSON,1982,1
6,7,THE DARK SIDE OF THE MOON,PINK FLOYD,1973,2
7,8,BROTHERS IN ARMS,DIRE STRAITS,1985,1
8,9,BAD,MICHAEL JACKSON,1987,1
9,10,GREATEST HITS II,QUEEN,1991,1


In [10]:
# Number of unique artists
num_unique_artists = df['ARTYSTA'].nunique()
num_unique_artists



47

In [9]:
# Group the data by artist and count the occurrences to find the most frequent bands
artist_counts = df['ARTYSTA'].value_counts()
most_frequent_bands = artist_counts[artist_counts > 1]
artist_counts



ARTYSTA
COLDPLAY                    3
TAKE THAT                   3
FLEETWOOD MAC               2
ABBA                        2
DIDO                        2
ORIGINAL SOUNDTRACK         2
QUEEN                       2
ROBBIE WILLIAMS             2
BEATLES                     2
MICHAEL JACKSON             2
ADELE                       2
SNOW PATROL                 1
KEANE                       1
U2                          1
JEFF WAYNE                  1
SCISSOR SISTERS             1
PHIL COLLINS                1
GEORGE MICHAEL              1
EMINEM                      1
MIKE OLDFIELD               1
TRAVIS                      1
TRACY CHAPMAN               1
ED SHEERAN                  1
NORAH JONES                 1
PAUL SIMON                  1
ORIGINAL CAST RECORDING     1
ALANIS MORISSETTE           1
CORRS                       1
KINGS OF LEON               1
BOB MARLEY & THE WAILERS    1
OASIS                       1
PINK FLOYD                  1
DIRE STRAITS                1
MA

In [11]:
# Change column names to start with a capital letter and the rest in lowercase
df.columns = ['Poz','Tytuł', 'Artysta', 'Rok', 'Max poz']
df

Unnamed: 0,Poz,Tytuł,Artysta,Rok,Max poz
0,1,GREATEST HITS,QUEEN,1981,1
1,2,GOLD - GREATEST HITS,ABBA,1992,1
2,3,SGT PEPPER'S LONELY HEARTS CLUB BAND,BEATLES,1967,1
3,4,21,ADELE,2011,1
4,5,WHAT'S THE STORY MORNING GLORY,OASIS,1995,1
5,6,THRILLER,MICHAEL JACKSON,1982,1
6,7,THE DARK SIDE OF THE MOON,PINK FLOYD,1973,2
7,8,BROTHERS IN ARMS,DIRE STRAITS,1985,1
8,9,BAD,MICHAEL JACKSON,1987,1
9,10,GREATEST HITS II,QUEEN,1991,1


In [12]:
# Drop the 'Max poz' column
df = df.drop(columns=['Max poz'])
df

Unnamed: 0,Poz,Tytuł,Artysta,Rok
0,1,GREATEST HITS,QUEEN,1981
1,2,GOLD - GREATEST HITS,ABBA,1992
2,3,SGT PEPPER'S LONELY HEARTS CLUB BAND,BEATLES,1967
3,4,21,ADELE,2011
4,5,WHAT'S THE STORY MORNING GLORY,OASIS,1995
5,6,THRILLER,MICHAEL JACKSON,1982
6,7,THE DARK SIDE OF THE MOON,PINK FLOYD,1973
7,8,BROTHERS IN ARMS,DIRE STRAITS,1985
8,9,BAD,MICHAEL JACKSON,1987
9,10,GREATEST HITS II,QUEEN,1991


In [13]:
# Year with the most albums on the list
most_common_year = df['Rok'].value_counts().idxmax()
most_common_year

2000

In [14]:
# Number of albums released between 1960 and 1990 inclusive
albums_1960_1990 = df[(df['Rok'] >= 1960) & (df['Rok'] <= 1990)].shape[0]
albums_1960_1990

22

In [15]:
# Year of the most recently released album on the list
youngest_album_year = df['Rok'].max()
youngest_album_year


2015

In [16]:
# List of the earliest released album for each artist
earliest_albums = df.sort_values('Rok').drop_duplicates('Artysta', keep='first').sort_values('Artysta')
earliest_albums

Unnamed: 0,Poz,Tytuł,Artysta,Rok
45,46,GREATEST HITS,ABBA,1975
3,4,21,ADELE,2011
40,41,JAGGED LITTLE PILL,ALANIS MORISSETTE,1995
12,13,BACK TO BLACK,AMY WINEHOUSE,2006
2,3,SGT PEPPER'S LONELY HEARTS CLUB BAND,BEATLES,1967
15,16,LEGEND,BOB MARLEY & THE WAILERS,1984
44,45,PARACHUTES,COLDPLAY,2000
27,28,TALK ON CORNERS,CORRS,1997
25,26,WHITE LADDER,DAVID GRAY,1998
24,25,NO ANGEL,DIDO,2000


In [22]:
# Save the result to a CSV file
earliest_albums.to_csv(r'C:\Kodilla\albumy_wszechczasow\albumy_wszechczasow.csv', index=False)

(num_unique_artists, most_frequent_bands.index.tolist(), most_common_year, albums_1960_1990, youngest_album_year)

(47,
 ['COLDPLAY',
  'TAKE THAT',
  'FLEETWOOD MAC',
  'ABBA',
  'DIDO',
  'ORIGINAL SOUNDTRACK',
  'QUEEN',
  'ROBBIE WILLIAMS',
  'BEATLES',
  'MICHAEL JACKSON',
  'ADELE'],
 2000,
 22,
 2015)