# DB connection 

In [1]:
from pymongo import MongoClient
import matplotlib.pyplot as plt

client = MongoClient() # we run on localhost and default port
db = client['spotify']

# Get Collections

In [2]:
all = db['all']

In [3]:
 print(all)

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'spotify'), 'all')


In [4]:
def display_results(res):
    for entry in res:
        print(entry)

# Query 11
### Average track length per genre

Unfortunately, the "genre" field is related to the artist only, not to the album/track, which would be more accurate.

(By the way, how many genres do we have?)

In [5]:
uniqueGenresQuery = [
  {
    '$group': {
      '_id': "$genre_0",
    }
  },
  {
    '$group': {
      '_id': True,
      'uniqueGenres': {'$sum': 1}
    }
  } 
]

result_uniqueGenresQuery = list(all.aggregate(uniqueGenresQuery))

In [6]:
result_uniqueGenresQuery

[{'_id': True, 'uniqueGenres': 3009}]

In [7]:
query11 = [
  {
    '$group': {
      '_id': "$genre_0",
      'averageTrackLengthPerGenre': {'$avg': "$duration_ms"}
    }
  },
  {
    '$project': {
      '_id': 0,
      'genre': "$_id",
      'averageTrackLengthPerGenre': 1
    }
  },
  {
    '$facet': {
        "shortest100": [
            { '$sort': { "averageTrackLengthPerGenre": 1 } },
            { '$limit': 100 }
        ],
        "longest100": [
            { '$sort': { "averageTrackLengthPerGenre": -1 } },
            { '$limit': 100 }
        ]
    }
  }
]

result_query11 = list(all.aggregate(query11))

In [8]:
result_query11

[{'shortest100': [{'averageTrackLengthPerGenre': 7882.4,
    'genre': 'sound effects'},
   {'averageTrackLengthPerGenre': 29568.317073170732, 'genre': 'ringtone'},
   {'averageTrackLengthPerGenre': 59256.0, 'genre': 'cruise'},
   {'averageTrackLengthPerGenre': 61558.42857142857, 'genre': 'birthday'},
   {'averageTrackLengthPerGenre': 66968.65384615384, 'genre': 'hauntology'},
   {'averageTrackLengthPerGenre': 68000.0, 'genre': 'deep dance pop'},
   {'averageTrackLengthPerGenre': 69782.04761904762,
    'genre': 'nordic soundtrack'},
   {'averageTrackLengthPerGenre': 74288.0, 'genre': 'russian viral rap'},
   {'averageTrackLengthPerGenre': 75696.0, 'genre': 'japanoise'},
   {'averageTrackLengthPerGenre': 79004.0, 'genre': 'hypnagogic pop'},
   {'averageTrackLengthPerGenre': 80184.6, 'genre': 'plena uruguaya'},
   {'averageTrackLengthPerGenre': 82726.1, 'genre': 'bachchon ke geet'},
   {'averageTrackLengthPerGenre': 83465.11627906977,
    'genre': 'german romanticism'},
   {'averageTrackL

Funnily enough, there exist genres like "sound effects" and "ringtone".
Anyway, it seems like the shortest tracks come from genres like punk or are related to "virality" at first sight, while the longest ones come from classical music.
Let's explore deeper!

In [9]:
shortest100 = [entry['shortest100'] for entry in result_query11]
shortest100 = shortest100[0]
longest100 = [entry['longest100'] for entry in result_query11]
longest100 = longest100[0]
shortest_genres = [entry['genre'] for entry in shortest100]
longest_genres = [entry['genre'] for entry in longest100]

# hardcore_count = 0
# classical_count = 0

# hardcore_count = sum(s.lower().split().count('hardcore') for s in shortest_genres)
# classical_count = sum(s.lower().split().count('classical') for s in longest_genres)

# print(hardcore_count)
# print(classical_count)

In [None]:
shortest_genres

In [None]:
longest_genres

In [10]:
from collections import Counter

# Concatenate all strings into a single space-separated string
shortest_genres_text = ' '.join(shortest_genres)

# Split the combined string into a list of words
shortest_genres_words = shortest_genres_text.lower().split()

# Use Counter to count the frequency of each word
shortest_genres_word_frequency = Counter(shortest_genres_words)

# Print the words ordered by frequencies in descending order
for word, frequency in shortest_genres_word_frequency.most_common():
    if frequency >= 3:
        print(f'The word "{word}" appears {frequency} times.')
        
print('')

# Concatenate all strings into a single space-separated string
longest_genres_text = ' '.join(longest_genres)

# Split the combined string into a list of words
longest_genres_words = longest_genres_text.lower().split()

# Use Counter to count the frequency of each word
longest_genres_word_frequency = Counter(longest_genres_words)

# Print the words ordered by frequencies in descending order
for word, frequency in longest_genres_word_frequency.most_common():
    if frequency >= 3:
        print(f'The word "{word}" appears {frequency} times.')

# If you want to get the frequency of a specific word, you can do
# specified_word = "hardcore"
# specified_word_frequency = word_frequency[specified_word.lower()]
# print(f'The word "{specified_word}" appears {specified_word_frequency}

The word "indie" appears 8 times.
The word "rap" appears 7 times.
The word "pop" appears 5 times.
The word "hip" appears 5 times.
The word "hop" appears 5 times.
The word "russian" appears 4 times.
The word "hardcore" appears 4 times.
The word "musica" appears 4 times.
The word "music" appears 4 times.
The word "german" appears 3 times.
The word "beats" appears 3 times.
The word "japanese" appears 3 times.
The word "phonk" appears 3 times.
The word "children's" appears 3 times.
The word "kazakh" appears 3 times.
The word "punk" appears 3 times.

The word "house" appears 9 times.
The word "classical" appears 8 times.
The word "progressive" appears 7 times.
The word "techno" appears 6 times.
The word "trance" appears 6 times.
The word "pop" appears 5 times.
The word "jazz" appears 5 times.
The word "industrial" appears 3 times.
The word "contemporary" appears 3 times.
The word "swedish" appears 3 times.
The word "rock" appears 3 times.
The word "electronic" appears 3 times.
The word "fol

One of the expected genres with the shortest tracks is indeed hardcore punk, but we can go in the opposite direction and find music for children too! The other results of more common genres (indie/rap/pop/beats/phonk) are possibly given by the emerging of artists producing tracks to be used in viral TikTok contents.
On the other hand, we have subgenres coming from the (relatively) new electronic music world (house/techno/trance/industrial) and from the (relatively) old world of jazz and classical music.